diff options
author | Bruce Momjian | 1997-09-07 05:04:48 +0000 |
---|---|---|
committer | Bruce Momjian | 1997-09-07 05:04:48 +0000 |
commit | 1ccd423235a48739d6f7a4d7889705b5f9ecc69b (patch) | |
tree | 8001c4e839dfad8f29ceda7f8c5f5dbb8759b564 | |
parent | 8fecd4febf8357f3cc20383ed29ced484877d5ac (diff) |
Massive commit to run PGINDENT on all *.c and *.h files.
687 files changed, 152157 insertions, 138270 deletions
diff --git a/contrib/array/array_iterator.c b/contrib/array/array_iterator.c index 95ab119f85..c0c61f0b6a 100644 --- a/contrib/array/array_iterator.c +++ b/contrib/array/array_iterator.c @@ -8,8 +8,8 @@ * For example array_int4eq returns true if some of the elements * of an array of int4 is equal to the given value: * - * array_int4eq({1,2,3}, 1) --> true - * array_int4eq({1,2,3}, 4) --> false + * array_int4eq({1,2,3}, 1) --> true + * array_int4eq({1,2,3}, 4) --> false * * If we have defined T array types and O scalar operators * we can define T x O array operators, each of them has a name @@ -19,10 +19,10 @@ * the array_int4_like because there is no like operator for int4. * It is now possible to write queries which look inside the arrays: * - * create table t(id int4[], txt text[]); - * select * from t where t.id *= 123; - * select * from t where t.txt *~ '[a-z]'; - * select * from t where t.txt[1:3] **~ '[a-z]'; + * create table t(id int4[], txt text[]); + * select * from t where t.id *= 123; + * select * from t where t.txt *~ '[a-z]'; + * select * from t where t.txt[1:3] **~ '[a-z]'; * * Copyright (c) 1996, Massimo Dal Zotto <[email protected]> */ @@ -40,93 +40,116 @@ #include "utils/builtins.h" #include "utils/elog.h" -static int32 -array_iterator(Oid elemtype, Oid proc, int and, ArrayType *array, Datum value) +static int32 +array_iterator(Oid elemtype, Oid proc, int and, ArrayType * array, Datum value) { - HeapTuple typ_tuple; - TypeTupleForm typ_struct; - bool typbyval; - int typlen; - func_ptr proc_fn; - int pronargs; - int nitems, i, result; - int ndim, *dim; - char *p; - - /* Sanity checks */ - if ((array == (ArrayType *) NULL) - || (ARR_IS_LO(array) == true)) { - /* elog(NOTICE, "array_iterator: array is null"); */ - return (0); - } - ndim = ARR_NDIM(array); - dim = ARR_DIMS(array); - nitems = getNitems(ndim, dim); - if (nitems == 0) { - /* elog(NOTICE, "array_iterator: nitems = 0"); */ - return (0); - } - - /* Lookup element type information */ - typ_tuple = SearchSysCacheTuple(TYPOID, ObjectIdGetDatum(elemtype),0,0,0); - if (!HeapTupleIsValid(typ_tuple)) { - elog(WARN,"array_iterator: cache lookup failed for type %d", elemtype); - return 0; - } - typ_struct = (TypeTupleForm) GETSTRUCT(typ_tuple); - typlen = typ_struct->typlen; - typbyval = typ_struct->typbyval; - - /* Lookup the function entry point */ - proc_fn == (func_ptr) NULL; - fmgr_info(proc, &proc_fn, &pronargs); - if ((proc_fn == NULL) || (pronargs != 2)) { - elog(WARN, "array_iterator: fmgr_info lookup failed for oid %d", proc); - return (0); - } - - /* Scan the array and apply the operator to each element */ - result = 0; - p = ARR_DATA_PTR(array); - for (i = 0; i < nitems; i++) { - if (typbyval) { - switch(typlen) { - case 1: - result = (int) (*proc_fn)(*p, value); - break; - case 2: - result = (int) (*proc_fn)(* (int16 *) p, value); - break; - case 3: - case 4: - result = (int) (*proc_fn)(* (int32 *) p, value); - break; - } - p += typlen; - } else { - result = (int) (*proc_fn)(p, value); - if (typlen > 0) { - p += typlen; - } else { - p += INTALIGN(* (int32 *) p); - } - } - if (result) { - if (!and) { - return (1); - } - } else { - if (and) { + HeapTuple typ_tuple; + TypeTupleForm typ_struct; + bool typbyval; + int typlen; + func_ptr proc_fn; + int pronargs; + int nitems, + i, + result; + int ndim, + *dim; + char *p; + + /* Sanity checks */ + if ((array == (ArrayType *) NULL) + || (ARR_IS_LO(array) == true)) + { + /* elog(NOTICE, "array_iterator: array is null"); */ + return (0); + } + ndim = ARR_NDIM(array); + dim = ARR_DIMS(array); + nitems = getNitems(ndim, dim); + if (nitems == 0) + { + /* elog(NOTICE, "array_iterator: nitems = 0"); */ return (0); - } } - } - if (and && result) { - return (1); - } else { - return (0); - } + /* Lookup element type information */ + typ_tuple = SearchSysCacheTuple(TYPOID, ObjectIdGetDatum(elemtype), 0, 0, 0); + if (!HeapTupleIsValid(typ_tuple)) + { + elog(WARN, "array_iterator: cache lookup failed for type %d", elemtype); + return 0; + } + typ_struct = (TypeTupleForm) GETSTRUCT(typ_tuple); + typlen = typ_struct->typlen; + typbyval = typ_struct->typbyval; + + /* Lookup the function entry point */ + proc_fn == (func_ptr) NULL; + fmgr_info(proc, &proc_fn, &pronargs); + if ((proc_fn == NULL) || (pronargs != 2)) + { + elog(WARN, "array_iterator: fmgr_info lookup failed for oid %d", proc); + return (0); + } + + /* Scan the array and apply the operator to each element */ + result = 0; + p = ARR_DATA_PTR(array); + for (i = 0; i < nitems; i++) + { + if (typbyval) + { + switch (typlen) + { + case 1: + result = (int) (*proc_fn) (*p, value); + break; + case 2: + result = (int) (*proc_fn) (*(int16 *) p, value); + break; + case 3: + case 4: + result = (int) (*proc_fn) (*(int32 *) p, value); + break; + } + p += typlen; + } + else + { + result = (int) (*proc_fn) (p, value); + if (typlen > 0) + { + p += typlen; + } + else + { + p += INTALIGN(*(int32 *) p); + } + } + if (result) + { + if (!and) + { + return (1); + } + } + else + { + if (and) + { + return (0); + } + } + } + + if (and && result) + { + return (1); + } + else + { + return (0); + } } /* @@ -134,39 +157,39 @@ array_iterator(Oid elemtype, Oid proc, int and, ArrayType *array, Datum value) */ int32 -array_texteq(ArrayType *array, char* value) +array_texteq(ArrayType * array, char *value) { - return array_iterator((Oid) 25, /* text */ - (Oid) 67, /* texteq */ - 0, /* logical or */ - array, (Datum)value); + return array_iterator((Oid) 25, /* text */ + (Oid) 67, /* texteq */ + 0, /* logical or */ + array, (Datum) value); } int32 -array_all_texteq(ArrayType *array, char* value) +array_all_texteq(ArrayType * array, char *value) { - return array_iterator((Oid) 25, /* text */ - (Oid) 67, /* texteq */ - 1, /* logical and */ - array, (Datum)value); + return array_iterator((Oid) 25, /* text */ + (Oid) 67, /* texteq */ + 1, /* logical and */ + array, (Datum) value); } int32 -array_textregexeq(ArrayType *array, char* value) +array_textregexeq(ArrayType * array, char *value) { - return array_iterator((Oid) 25, /* text */ - (Oid) 81, /* textregexeq */ - 0, /* logical or */ - array, (Datum)value); + return array_iterator((Oid) 25, /* text */ + (Oid) 81, /* textregexeq */ + 0, /* logical or */ + array, (Datum) value); } int32 -array_all_textregexeq(ArrayType *array, char* value) +array_all_textregexeq(ArrayType * array, char *value) { - return array_iterator((Oid) 25, /* text */ - (Oid) 81, /* textregexeq */ - 1, /* logical and */ - array, (Datum)value); + return array_iterator((Oid) 25, /* text */ + (Oid) 81, /* textregexeq */ + 1, /* logical and */ + array, (Datum) value); } /* @@ -175,39 +198,39 @@ array_all_textregexeq(ArrayType *array, char* value) */ int32 -array_char16eq(ArrayType *array, char* value) +array_char16eq(ArrayType * array, char *value) { - return array_iterator((Oid) 20, /* char16 */ - (Oid) 490, /* char16eq */ - 0, /* logical or */ - array, (Datum)value); + return array_iterator((Oid) 20, /* char16 */ + (Oid) 490, /* char16eq */ + 0, /* logical or */ + array, (Datum) value); } int32 -array_all_char16eq(ArrayType *array, char* value) +array_all_char16eq(ArrayType * array, char *value) { - return array_iterator((Oid) 20, /* char16 */ - (Oid) 490, /* char16eq */ - 1, /* logical and */ - array, (Datum)value); + return array_iterator((Oid) 20, /* char16 */ + (Oid) 490, /* char16eq */ + 1, /* logical and */ + array, (Datum) value); } int32 -array_char16regexeq(ArrayType *array, char* value) +array_char16regexeq(ArrayType * array, char *value) { - return array_iterator((Oid) 20, /* char16 */ - (Oid) 700, /* char16regexeq */ - 0, /* logical or */ - array, (Datum)value); + return array_iterator((Oid) 20, /* char16 */ + (Oid) 700, /* char16regexeq */ + 0, /* logical or */ + array, (Datum) value); } int32 -array_all_char16regexeq(ArrayType *array, char* value) +array_all_char16regexeq(ArrayType * array, char *value) { - return array_iterator((Oid) 20, /* char16 */ - (Oid) 700, /* char16regexeq */ - 1, /* logical and */ - array, (Datum)value); + return array_iterator((Oid) 20, /* char16 */ + (Oid) 700, /* char16regexeq */ + 1, /* logical and */ + array, (Datum) value); } /* @@ -215,37 +238,37 @@ array_all_char16regexeq(ArrayType *array, char* value) */ int32 -array_int4eq(ArrayType *array, int4 value) +array_int4eq(ArrayType * array, int4 value) { - return array_iterator((Oid) 23, /* int4 */ - (Oid) 65, /* int4eq */ - 0, /* logical or */ - array, (Datum)value); + return array_iterator((Oid) 23, /* int4 */ + (Oid) 65, /* int4eq */ + 0, /* logical or */ + array, (Datum) value); } int32 -array_all_int4eq(ArrayType *array, int4 value) +array_all_int4eq(ArrayType * array, int4 value) { - return array_iterator((Oid) 23, /* int4 */ - (Oid) 65, /* int4eq */ - 1, /* logical and */ - array, (Datum)value); + return array_iterator((Oid) 23, /* int4 */ + (Oid) 65, /* int4eq */ + 1, /* logical and */ + array, (Datum) value); } int32 -array_int4gt(ArrayType *array, int4 value) +array_int4gt(ArrayType * array, int4 value) { - return array_iterator((Oid) 23, /* int4 */ - (Oid) 147, /* int4gt */ - 0, /* logical or */ - array, (Datum)value); + return array_iterator((Oid) 23, /* int4 */ + (Oid) 147, /* int4gt */ + 0, /* logical or */ + array, (Datum) value); } int32 -array_all_int4gt(ArrayType *array, int4 value) +array_all_int4gt(ArrayType * array, int4 value) { - return array_iterator((Oid) 23, /* int4 */ - (Oid) 147, /* int4gt */ - 1, /* logical and */ - array, (Datum)value); + return array_iterator((Oid) 23, /* int4 */ + (Oid) 147, /* int4gt */ + 1, /* logical and */ + array, (Datum) value); } diff --git a/contrib/datetime/datetime_functions.c b/contrib/datetime/datetime_functions.c index e684ce5826..08e7250700 100644 --- a/contrib/datetime/datetime_functions.c +++ b/contrib/datetime/datetime_functions.c @@ -13,86 +13,99 @@ #include "utils/datetime.h" -TimeADT *time_difference(TimeADT * time1, TimeADT * time2) +TimeADT * +time_difference(TimeADT * time1, TimeADT * time2) { - TimeADT *result = (TimeADT *) palloc(sizeof(TimeADT)); - *result = *time1 - *time2; - return (result); + TimeADT *result = (TimeADT *) palloc(sizeof(TimeADT)); + + *result = *time1 - *time2; + return (result); } -TimeADT *currenttime() +TimeADT * +currenttime() { - time_t current_time; - struct tm *tm; - TimeADT *result = (TimeADT *) palloc(sizeof(TimeADT)); - - current_time = time(NULL); - tm = localtime(¤t_time); - *result = ((((tm->tm_hour*60)+tm->tm_min)*60)+tm->tm_sec); - return (result); + time_t current_time; + struct tm *tm; + TimeADT *result = (TimeADT *) palloc(sizeof(TimeADT)); + + current_time = time(NULL); + tm = localtime(¤t_time); + *result = ((((tm->tm_hour * 60) + tm->tm_min) * 60) + tm->tm_sec); + return (result); } -DateADT currentdate() +DateADT +currentdate() { - time_t current_time; - struct tm *tm; - DateADT result; - current_time = time(NULL); - tm = localtime(¤t_time); - - result = date2j(tm->tm_year,tm->tm_mon + 1,tm->tm_mday) - - date2j(100,1,1); - return (result); + time_t current_time; + struct tm *tm; + DateADT result; + + current_time = time(NULL); + tm = localtime(¤t_time); + + result = date2j(tm->tm_year, tm->tm_mon + 1, tm->tm_mday) - + date2j(100, 1, 1); + return (result); } -int4 hours(TimeADT * time) +int4 +hours(TimeADT * time) { - return(*time / (60*60)); + return (*time / (60 * 60)); } -int4 minutes(TimeADT * time) +int4 +minutes(TimeADT * time) { - return(((int) (*time / 60)) % 60); + return (((int) (*time / 60)) % 60); } -int4 seconds(TimeADT * time) +int4 +seconds(TimeADT * time) { - return(((int) *time) % 60); + return (((int) *time) % 60); } -int4 day(DateADT *date) +int4 +day(DateADT * date) { - struct tm tm; + struct tm tm; - j2date( (*date + date2j(2000,1,1)), - &tm.tm_year, &tm.tm_mon, &tm.tm_mday); + j2date((*date + date2j(2000, 1, 1)), + &tm.tm_year, &tm.tm_mon, &tm.tm_mday); - return (tm.tm_mday); + return (tm.tm_mday); } -int4 month(DateADT *date) +int4 +month(DateADT * date) { - struct tm tm; + struct tm tm; - j2date( (*date + date2j(2000,1,1)), - &tm.tm_year, &tm.tm_mon, &tm.tm_mday); + j2date((*date + date2j(2000, 1, 1)), + &tm.tm_year, &tm.tm_mon, &tm.tm_mday); - return (tm.tm_mon); + return (tm.tm_mon); } -int4 year(DateADT *date) +int4 +year(DateADT * date) { - struct tm tm; + struct tm tm; - j2date( (*date + date2j(2000,1,1)), - &tm.tm_year, &tm.tm_mon, &tm.tm_mday); + j2date((*date + date2j(2000, 1, 1)), + &tm.tm_year, &tm.tm_mon, &tm.tm_mday); - return (tm.tm_year); + return (tm.tm_year); } -int4 asminutes(TimeADT * time) +int4 +asminutes(TimeADT * time) { - int seconds = (int) *time; + int seconds = (int) *time; - return (seconds / 60); + return (seconds / 60); } -int4 asseconds(TimeADT * time) +int4 +asseconds(TimeADT * time) { - int seconds = (int) *time; + int seconds = (int) *time; - return (seconds); + return (seconds); } diff --git a/contrib/int8/int8.c b/contrib/int8/int8.c index 93822cb480..a73986beeb 100644 --- a/contrib/int8/int8.c +++ b/contrib/int8/int8.c @@ -1,12 +1,12 @@ /*------------------------------------------------------------------------- * * int8.c-- - * Internal 64-bit integer operations + * Internal 64-bit integer operations * *------------------------------------------------------------------------- */ -#include <stdio.h> /* for sprintf proto, etc. */ -#include <stdlib.h> /* for strtod, etc. */ +#include <stdio.h> /* for sprintf proto, etc. */ +#include <stdlib.h> /* for strtod, etc. */ #include <string.h> #include <ctype.h> #include <time.h> @@ -17,7 +17,7 @@ #include "postgres.h" #include "utils/palloc.h" -#define MAXINT8LEN 25 +#define MAXINT8LEN 25 #define USE_LOCAL_CODE 1 @@ -26,53 +26,58 @@ #endif #ifndef HAVE_64BIT_INTS -typedef char[8] int64; +typedef char [8] int64; #elif defined(__alpha) typedef long int int64; + #define INT64_FORMAT "%ld" #elif defined(__GNUC__) typedef long long int int64; + #define INT64_FORMAT "%Ld" #else typedef long int int64; + #define INT64_FORMAT "%ld" #endif -int64 *int8in(char *str); -char *int8out(int64 *val); - -bool int8eq(int64 *val1, int64 *val2); -bool int8ne(int64 *val1, int64 *val2); -bool int8lt(int64 *val1, int64 *val2); -bool int8gt(int64 *val1, int64 *val2); -bool int8le(int64 *val1, int64 *val2); -bool int8ge(int64 *val1, int64 *val2); - -bool int84eq(int64 *val1, int32 val2); -bool int84ne(int64 *val1, int32 val2); -bool int84lt(int64 *val1, int32 val2); -bool int84gt(int64 *val1, int32 val2); -bool int84le(int64 *val1, int32 val2); -bool int84ge(int64 *val1, int32 val2); - -int64 *int8um(int64 *val); -int64 *int8pl(int64 *val1, int64 *val2); -int64 *int8mi(int64 *val1, int64 *val2); -int64 *int8mul(int64 *val1, int64 *val2); -int64 *int8div(int64 *val1, int64 *val2); - -int64 *int48(int32 val); -int32 int84(int64 *val); +int64 *int8in(char *str); +char *int8out(int64 * val); + +bool int8eq(int64 * val1, int64 * val2); +bool int8ne(int64 * val1, int64 * val2); +bool int8lt(int64 * val1, int64 * val2); +bool int8gt(int64 * val1, int64 * val2); +bool int8le(int64 * val1, int64 * val2); +bool int8ge(int64 * val1, int64 * val2); + +bool int84eq(int64 * val1, int32 val2); +bool int84ne(int64 * val1, int32 val2); +bool int84lt(int64 * val1, int32 val2); +bool int84gt(int64 * val1, int32 val2); +bool int84le(int64 * val1, int32 val2); +bool int84ge(int64 * val1, int32 val2); + +int64 *int8um(int64 * val); +int64 *int8pl(int64 * val1, int64 * val2); +int64 *int8mi(int64 * val1, int64 * val2); +int64 *int8mul(int64 * val1, int64 * val2); +int64 *int8div(int64 * val1, int64 * val2); + +int64 *int48(int32 val); +int32 int84(int64 * val); + #if FALSE -int64 *int28(int16 val); -int16 int82(int64 *val); +int64 *int28(int16 val); +int16 int82(int64 * val); + #endif -float64 i8tod(int64 *val); -int64 *dtoi8(float64 val); +float64 i8tod(int64 * val); +int64 *dtoi8(float64 val); #if USE_LOCAL_CODE @@ -88,7 +93,7 @@ int64 *dtoi8(float64 val); /*********************************************************************** ** - ** Routines for 64-bit integers. + ** Routines for 64-bit integers. ** ***********************************************************************/ @@ -98,264 +103,289 @@ int64 *dtoi8(float64 val); /* int8in() */ -int64 *int8in(char *str) +int64 * +int8in(char *str) { - int64 *result = PALLOCTYPE(int64); + int64 *result = PALLOCTYPE(int64); #if HAVE_64BIT_INTS - if (!PointerIsValid(str)) - elog (WARN,"Bad (null) int8 external representation",NULL); + if (!PointerIsValid(str)) + elog(WARN, "Bad (null) int8 external representation", NULL); - if (sscanf(str, INT64_FORMAT, result) != 1) - elog(WARN,"Bad int8 external representation '%s'",str); + if (sscanf(str, INT64_FORMAT, result) != 1) + elog(WARN, "Bad int8 external representation '%s'", str); #else - elog(WARN,"64-bit integers are not supported",NULL); - result = NULL; + elog(WARN, "64-bit integers are not supported", NULL); + result = NULL; #endif - return(result); -} /* int8in() */ + return (result); +} /* int8in() */ /* int8out() */ -char *int8out(int64 *val) +char * +int8out(int64 * val) { - char *result; + char *result; - int len; - char buf[MAXINT8LEN+1]; + int len; + char buf[MAXINT8LEN + 1]; #if HAVE_64BIT_INTS - if (!PointerIsValid(val)) - return(NULL); + if (!PointerIsValid(val)) + return (NULL); - if ((len = snprintf( buf, MAXINT8LEN, INT64_FORMAT, *val)) < 0) - elog (WARN,"Unable to format int8",NULL); + if ((len = snprintf(buf, MAXINT8LEN, INT64_FORMAT, *val)) < 0) + elog(WARN, "Unable to format int8", NULL); - result = PALLOC(len+1); + result = PALLOC(len + 1); - strcpy(result, buf); + strcpy(result, buf); #else - elog(WARN,"64-bit integers are not supported",NULL); - result = NULL; + elog(WARN, "64-bit integers are not supported", NULL); + result = NULL; #endif - return( result); -} /* int8out() */ + return (result); +} /* int8out() */ /*---------------------------------------------------------- - * Relational operators for int8s. + * Relational operators for int8s. *---------------------------------------------------------*/ /* int8relop() * Is val1 relop val2? */ -bool int8eq(int64 *val1, int64 *val2) +bool +int8eq(int64 * val1, int64 * val2) { - return(*val1 == *val2); -} /* int8eq() */ + return (*val1 == *val2); +} /* int8eq() */ -bool int8ne(int64 *val1, int64 *val2) +bool +int8ne(int64 * val1, int64 * val2) { - return(*val1 != *val2); -} /* int8ne() */ + return (*val1 != *val2); +} /* int8ne() */ -bool int8lt(int64 *val1, int64 *val2) +bool +int8lt(int64 * val1, int64 * val2) { - return(*val1 < *val2); -} /* int8lt() */ + return (*val1 < *val2); +} /* int8lt() */ -bool int8gt(int64 *val1, int64 *val2) +bool +int8gt(int64 * val1, int64 * val2) { - return(*val1 > *val2); -} /* int8gt() */ + return (*val1 > *val2); +} /* int8gt() */ -bool int8le(int64 *val1, int64 *val2) +bool +int8le(int64 * val1, int64 * val2) { - return(*val1 <= *val2); -} /* int8le() */ + return (*val1 <= *val2); +} /* int8le() */ -bool int8ge(int64 *val1, int64 *val2) +bool +int8ge(int64 * val1, int64 * val2) { - return(*val1 >= *val2); -} /* int8ge() */ + return (*val1 >= *val2); +} /* int8ge() */ /* int84relop() * Is 64-bit val1 relop 32-bit val2? */ -bool int84eq(int64 *val1, int32 val2) +bool +int84eq(int64 * val1, int32 val2) { - return(*val1 == val2); -} /* int84eq() */ + return (*val1 == val2); +} /* int84eq() */ -bool int84ne(int64 *val1, int32 val2) +bool +int84ne(int64 * val1, int32 val2) { - return(*val1 != val2); -} /* int84ne() */ + return (*val1 != val2); +} /* int84ne() */ -bool int84lt(int64 *val1, int32 val2) +bool +int84lt(int64 * val1, int32 val2) { - return(*val1 < val2); -} /* int84lt() */ + return (*val1 < val2); +} /* int84lt() */ -bool int84gt(int64 *val1, int32 val2) +bool +int84gt(int64 * val1, int32 val2) { - return(*val1 > val2); -} /* int84gt() */ + return (*val1 > val2); +} /* int84gt() */ -bool int84le(int64 *val1, int32 val2) +bool +int84le(int64 * val1, int32 val2) { - return(*val1 <= val2); -} /* int84le() */ + return (*val1 <= val2); +} /* int84le() */ -bool int84ge(int64 *val1, int32 val2) +bool +int84ge(int64 * val1, int32 val2) { - return(*val1 >= val2); -} /* int84ge() */ + return (*val1 >= val2); +} /* int84ge() */ /*---------------------------------------------------------- - * Arithmetic operators on 64-bit integers. + * Arithmetic operators on 64-bit integers. *---------------------------------------------------------*/ -int64 *int8um(int64 *val) +int64 * +int8um(int64 * val) { - int64 *result = PALLOCTYPE(int64); + int64 *result = PALLOCTYPE(int64); - if (!PointerIsValid(val)) - return NULL; + if (!PointerIsValid(val)) + return NULL; - *result = (- *val); + *result = (-*val); - return(result); -} /* int8um() */ + return (result); +} /* int8um() */ -int64 *int8pl(int64 *val1, int64 *val2) +int64 * +int8pl(int64 * val1, int64 * val2) { - int64 *result = PALLOCTYPE(int64); + int64 *result = PALLOCTYPE(int64); - if ((!PointerIsValid(val1)) || (!PointerIsValid(val2))) - return NULL; + if ((!PointerIsValid(val1)) || (!PointerIsValid(val2))) + return NULL; - *result = *val1 + *val2; + *result = *val1 + *val2; - return(result); -} /* int8pl() */ + return (result); +} /* int8pl() */ -int64 *int8mi(int64 *val1, int64 *val2) +int64 * +int8mi(int64 * val1, int64 * val2) { - int64 *result = PALLOCTYPE(int64); + int64 *result = PALLOCTYPE(int64); - if ((!PointerIsValid(val1)) || (!PointerIsValid(val2))) - return NULL; + if ((!PointerIsValid(val1)) || (!PointerIsValid(val2))) + return NULL; - *result = *val1 - *val2; + *result = *val1 - *val2; - return(result); -} /* int8mi() */ + return (result); +} /* int8mi() */ -int64 *int8mul(int64 *val1, int64 *val2) +int64 * +int8mul(int64 * val1, int64 * val2) { - int64 *result = PALLOCTYPE(int64); + int64 *result = PALLOCTYPE(int64); - if ((!PointerIsValid(val1)) || (!PointerIsValid(val2))) - return NULL; + if ((!PointerIsValid(val1)) || (!PointerIsValid(val2))) + return NULL; - *result = *val1 * *val2; + *result = *val1 * *val2; - return(result); -} /* int8mul() */ + return (result); +} /* int8mul() */ -int64 *int8div(int64 *val1, int64 *val2) +int64 * +int8div(int64 * val1, int64 * val2) { - int64 *result = PALLOCTYPE(int64); + int64 *result = PALLOCTYPE(int64); - if ((!PointerIsValid(val1)) || (!PointerIsValid(val2))) - return NULL; + if ((!PointerIsValid(val1)) || (!PointerIsValid(val2))) + return NULL; - *result = *val1 / *val2; + *result = *val1 / *val2; - return(result); -} /* int8div() */ + return (result); +} /* int8div() */ /*---------------------------------------------------------- - * Conversion operators. + * Conversion operators. *---------------------------------------------------------*/ -int64 *int48(int32 val) +int64 * +int48(int32 val) { - int64 *result = PALLOCTYPE(int64); + int64 *result = PALLOCTYPE(int64); - *result = val; + *result = val; - return(result); -} /* int48() */ + return (result); +} /* int48() */ -int32 int84(int64 *val) +int32 +int84(int64 * val) { - int32 result; + int32 result; - if (!PointerIsValid(val)) - elog(WARN,"Invalid (null) int64, can't convert int8 to int4",NULL); + if (!PointerIsValid(val)) + elog(WARN, "Invalid (null) int64, can't convert int8 to int4", NULL); - if ((*val < INT_MIN) || (*val > INT_MAX)) - elog(WARN,"int8 conversion to int4 is out of range",NULL); + if ((*val < INT_MIN) || (*val > INT_MAX)) + elog(WARN, "int8 conversion to int4 is out of range", NULL); - result = *val; + result = *val; - return(result); -} /* int84() */ + return (result); +} /* int84() */ #if FALSE -int64 *int28(int16 val) +int64 * +int28(int16 val) { - int64 *result; + int64 *result; - if (!PointerIsValid(result = PALLOCTYPE(int64))) - elog(WARN,"Memory allocation failed, can't convert int8 to int2",NULL); + if (!PointerIsValid(result = PALLOCTYPE(int64))) + elog(WARN, "Memory allocation failed, can't convert int8 to int2", NULL); - *result = val; + *result = val; - return(result); -} /* int28() */ + return (result); +} /* int28() */ -int16 int82(int64 *val) +int16 +int82(int64 * val) { - int16 result; + int16 result; - if (!PointerIsValid(val)) - elog(WARN,"Invalid (null) int8, can't convert to int2",NULL); + if (!PointerIsValid(val)) + elog(WARN, "Invalid (null) int8, can't convert to int2", NULL); - result = *val; + result = *val; + + return (result); +} /* int82() */ - return(result); -} /* int82() */ #endif -float64 i8tod(int64 *val) +float64 +i8tod(int64 * val) { - float64 result = PALLOCTYPE(float64data); + float64 result = PALLOCTYPE(float64data); - *result = *val; + *result = *val; - return(result); -} /* i8tod() */ + return (result); +} /* i8tod() */ -int64 *dtoi8(float64 val) +int64 * +dtoi8(float64 val) { - int64 *result = PALLOCTYPE(int64); - - if ((*val < (-pow(2,64)+1)) || (*val > (pow(2,64)-1))) - elog(WARN,"Floating point conversion to int64 is out of range",NULL); + int64 *result = PALLOCTYPE(int64); - *result = *val; + if ((*val < (-pow(2, 64) + 1)) || (*val > (pow(2, 64) - 1))) + elog(WARN, "Floating point conversion to int64 is out of range", NULL); - return(result); -} /* dtoi8() */ + *result = *val; + return (result); +} /* dtoi8() */ diff --git a/contrib/pginterface/halt.c b/contrib/pginterface/halt.c index 58ca11a587..000e4a9c40 100644 --- a/contrib/pginterface/halt.c +++ b/contrib/pginterface/halt.c @@ -1,8 +1,8 @@ /* ** -** halt.c +** halt.c ** -** This is used to print out error messages and exit +** This is used to print out error messages and exit */ #include <varargs.h> @@ -15,44 +15,46 @@ /*------------------------------------------------------------------------- ** -** halt - print error message, and call clean up routine or exit +** halt - print error message, and call clean up routine or exit ** **------------------------------------------------------------------------*/ /*VARARGS*/ -void halt(va_alist) +void +halt(va_alist) va_dcl { - va_list arg_ptr; - char *format, *pstr; - void (*sig_func)(); + va_list arg_ptr; + char *format, + *pstr; + void (*sig_func) (); va_start(arg_ptr); - format = va_arg(arg_ptr,char *); - if (strncmp(format,"PERROR", 6) != 0) - vfprintf(stderr,format,arg_ptr); + format = va_arg(arg_ptr, char *); + if (strncmp(format, "PERROR", 6) != 0) + vfprintf(stderr, format, arg_ptr); else { - for (pstr=format+6; *pstr == ' ' || *pstr == ':'; pstr++) + for (pstr = format + 6; *pstr == ' ' || *pstr == ':'; pstr++) ; - vfprintf(stderr,pstr,arg_ptr); + vfprintf(stderr, pstr, arg_ptr); perror(""); - } + } va_end(arg_ptr); fflush(stderr); - /* call one clean up function if defined */ - if ( (sig_func = signal(SIGTERM, SIG_DFL)) != SIG_DFL && - sig_func != SIG_IGN) - (*sig_func)(0); - else if ( (sig_func = signal(SIGHUP, SIG_DFL)) != SIG_DFL && - sig_func != SIG_IGN) - (*sig_func)(0); - else if ( (sig_func = signal(SIGINT, SIG_DFL)) != SIG_DFL && - sig_func != SIG_IGN) - (*sig_func)(0); - else if ( (sig_func = signal(SIGQUIT, SIG_DFL)) != SIG_DFL && - sig_func != SIG_IGN) - (*sig_func)(0); + /* call one clean up function if defined */ + if ((sig_func = signal(SIGTERM, SIG_DFL)) != SIG_DFL && + sig_func != SIG_IGN) + (*sig_func) (0); + else if ((sig_func = signal(SIGHUP, SIG_DFL)) != SIG_DFL && + sig_func != SIG_IGN) + (*sig_func) (0); + else if ((sig_func = signal(SIGINT, SIG_DFL)) != SIG_DFL && + sig_func != SIG_IGN) + (*sig_func) (0); + else if ((sig_func = signal(SIGQUIT, SIG_DFL)) != SIG_DFL && + sig_func != SIG_IGN) + (*sig_func) (0); exit(1); } diff --git a/contrib/pginterface/halt.h b/contrib/pginterface/halt.h index cb4ea545b9..8e1af05e4c 100644 --- a/contrib/pginterface/halt.h +++ b/contrib/pginterface/halt.h @@ -3,5 +3,4 @@ ** */ -void halt(); - +void halt(); diff --git a/contrib/pginterface/pginsert.c b/contrib/pginterface/pginsert.c index 92c869b383..9946b258ff 100644 --- a/contrib/pginterface/pginsert.c +++ b/contrib/pginterface/pginsert.c @@ -10,20 +10,25 @@ #include "halt.h" #include "pginterface.h" -int main(int argc, char **argv) +int +main(int argc, char **argv) { - char query[4000]; - int row =1; - int aint; - float afloat; - double adouble; - char achar[11], achar16[17], abpchar[11], avarchar[51], atext[51]; - time_t aabstime; - + char query[4000]; + int row = 1; + int aint; + float afloat; + double adouble; + char achar[11], + achar16[17], + abpchar[11], + avarchar[51], + atext[51]; + time_t aabstime; + if (argc != 2) - halt("Usage: %s database\n",argv[0]); + halt("Usage: %s database\n", argv[0]); - connectdb(argv[1],NULL,NULL,NULL,NULL); + connectdb(argv[1], NULL, NULL, NULL, NULL); on_error_continue(); doquery("DROP TABLE testfetch"); @@ -42,9 +47,9 @@ int main(int argc, char **argv) aabstime abstime) \ "); - while(1) + while (1) { - sprintf(query,"INSERT INTO testfetch VALUES ( \ + sprintf(query, "INSERT INTO testfetch VALUES ( \ %d, \ 2322.12, \ '923121.0323'::float8, \ @@ -55,44 +60,43 @@ int main(int argc, char **argv) 'Ernie', \ 'now' )", row); doquery(query); - + doquery("BEGIN WORK"); - doquery("DECLARE c_testfetch BINARY CURSOR FOR \ + doquery("DECLARE c_testfetch BINARY CURSOR FOR \ SELECT * FROM testfetch"); doquery("FETCH ALL IN c_testfetch"); while (fetch( - &aint, - &afloat, - &adouble, - achar, - achar16, - abpchar, - avarchar, - atext, - &aabstime) != END_OF_TUPLES) - printf("int %d\nfloat %f\ndouble %f\nchar %s\nchar16 %s\n\ + &aint, + &afloat, + &adouble, + achar, + achar16, + abpchar, + avarchar, + atext, + &aabstime) != END_OF_TUPLES) + printf("int %d\nfloat %f\ndouble %f\nchar %s\nchar16 %s\n\ bpchar %s\nvarchar %s\ntext %s\nabstime %s", - aint, - afloat, - adouble, - achar, - achar16, - abpchar, - avarchar, - atext, - ctime(&aabstime)); + aint, + afloat, + adouble, + achar, + achar16, + abpchar, + avarchar, + atext, + ctime(&aabstime)); + - doquery("CLOSE c_testfetch"); doquery("COMMIT WORK"); - printf("--- %-d rows inserted so far\n",row); - + printf("--- %-d rows inserted so far\n", row); + row++; } disconnectdb(); return 0; } - diff --git a/contrib/pginterface/pginterface.c b/contrib/pginterface/pginterface.c index 7593cd9f97..f1ade9dcd4 100644 --- a/contrib/pginterface/pginterface.c +++ b/contrib/pginterface/pginterface.c @@ -12,77 +12,82 @@ #include "halt.h" #include "pginterface.h" -static void sig_disconnect(); -static void set_signals(); +static void sig_disconnect(); +static void set_signals(); #define NUL '\0' /* GLOBAL VARIABLES */ -static PGconn* conn; -static PGresult* res = NULL; +static PGconn *conn; +static PGresult *res = NULL; #define ON_ERROR_STOP 0 -#define ON_ERROR_CONTINUE 1 +#define ON_ERROR_CONTINUE 1 -static int on_error_state = ON_ERROR_STOP; +static int on_error_state = ON_ERROR_STOP; /* LOCAL VARIABLES */ -static sigset_t block_sigs, unblock_sigs; -static int tuple; +static sigset_t block_sigs, + unblock_sigs; +static int tuple; /* ** -** connectdb - returns PGconn structure +** connectdb - returns PGconn structure ** */ -PGconn *connectdb( char *dbName, - char *pghost, - char *pgport, - char *pgoptions, - char *pgtty) +PGconn * +connectdb(char *dbName, + char *pghost, + char *pgport, + char *pgoptions, + char *pgtty) { /* make a connection to the database */ conn = PQsetdb(pghost, pgport, pgoptions, pgtty, dbName); if (PQstatus(conn) == CONNECTION_BAD) halt("Connection to database '%s' failed.\n%s\n", dbName, - PQerrorMessage(conn)); + PQerrorMessage(conn)); set_signals(); return conn; } /* ** -** disconnectdb +** disconnectdb ** */ -void disconnectdb() +void +disconnectdb() { PQfinish(conn); } /* ** -** doquery - returns PGresult structure +** doquery - returns PGresult structure ** */ -PGresult *doquery(char *query) +PGresult * +doquery(char *query) { if (res != NULL) PQclear(res); - sigprocmask(SIG_SETMASK,&block_sigs,NULL); + sigprocmask(SIG_SETMASK, &block_sigs, NULL); res = PQexec(conn, query); - sigprocmask(SIG_SETMASK,&unblock_sigs,NULL); + sigprocmask(SIG_SETMASK, &unblock_sigs, NULL); if (on_error_state == ON_ERROR_STOP && (res == NULL || - PQresultStatus(res) == PGRES_BAD_RESPONSE || - PQresultStatus(res) == PGRES_NONFATAL_ERROR || - PQresultStatus(res) == PGRES_FATAL_ERROR)) + PQresultStatus(res) == PGRES_BAD_RESPONSE || + PQresultStatus(res) == PGRES_NONFATAL_ERROR || + PQresultStatus(res) == PGRES_FATAL_ERROR)) { if (res != NULL) - fprintf(stderr,"query error: %s\n",PQcmdStatus(res)); - else fprintf(stderr,"connection error: %s\n",PQerrorMessage(conn)); + fprintf(stderr, "query error: %s\n", PQcmdStatus(res)); + else + fprintf(stderr, "connection error: %s\n", PQerrorMessage(conn)); PQfinish(conn); halt("failed request: %s\n", query); } @@ -92,14 +97,16 @@ PGresult *doquery(char *query) /* ** -** fetch - returns tuple number (starts at 0), or the value END_OF_TUPLES -** NULL pointers are skipped +** fetch - returns tuple number (starts at 0), or the value END_OF_TUPLES +** NULL pointers are skipped ** */ -int fetch(void *param, ...) +int +fetch(void *param,...) { - va_list ap; - int arg, num_fields; + va_list ap; + int arg, + num_fields; num_fields = PQnfields(res); @@ -113,11 +120,11 @@ int fetch(void *param, ...) { if (PQfsize(res, arg) == -1) { - memcpy(param,PQgetvalue(res,tuple,arg),PQgetlength(res,tuple,arg)); - ((char *)param)[PQgetlength(res,tuple,arg)] = NUL; + memcpy(param, PQgetvalue(res, tuple, arg), PQgetlength(res, tuple, arg)); + ((char *) param)[PQgetlength(res, tuple, arg)] = NUL; } else - memcpy(param,PQgetvalue(res,tuple,arg),PQfsize(res,arg)); + memcpy(param, PQgetvalue(res, tuple, arg), PQfsize(res, arg)); } param = va_arg(ap, char *); } @@ -127,15 +134,17 @@ int fetch(void *param, ...) /* ** -** fetchwithnulls - returns tuple number (starts at 0), -** or the value END_OF_TUPLES -** Returns true or false into null indicator variables -** NULL pointers are skipped +** fetchwithnulls - returns tuple number (starts at 0), +** or the value END_OF_TUPLES +** Returns true or false into null indicator variables +** NULL pointers are skipped */ -int fetchwithnulls(void *param, ...) +int +fetchwithnulls(void *param,...) { - va_list ap; - int arg, num_fields; + va_list ap; + int arg, + num_fields; num_fields = PQnfields(res); @@ -149,17 +158,17 @@ int fetchwithnulls(void *param, ...) { if (PQfsize(res, arg) == -1) { - memcpy(param,PQgetvalue(res,tuple,arg),PQgetlength(res,tuple,arg)); - ((char *)param)[PQgetlength(res,tuple,arg)] = NUL; + memcpy(param, PQgetvalue(res, tuple, arg), PQgetlength(res, tuple, arg)); + ((char *) param)[PQgetlength(res, tuple, arg)] = NUL; } else - memcpy(param,PQgetvalue(res,tuple,arg),PQfsize(res,arg)); + memcpy(param, PQgetvalue(res, tuple, arg), PQfsize(res, arg)); } param = va_arg(ap, char *); - if (PQgetisnull(res,tuple,arg) != 0) - *(int *)param = 1; + if (PQgetisnull(res, tuple, arg) != 0) + *(int *) param = 1; else - *(int *)param = 0; + *(int *) param = 0; param = va_arg(ap, char *); } va_end(ap); @@ -168,52 +177,56 @@ int fetchwithnulls(void *param, ...) /* ** -** on_error_stop +** on_error_stop ** */ -void on_error_stop() +void +on_error_stop() { on_error_state = ON_ERROR_STOP; } - + /* ** -** on_error_continue +** on_error_continue ** */ -void on_error_continue() +void +on_error_continue() { on_error_state = ON_ERROR_CONTINUE; } - + /* ** -** sig_disconnect +** sig_disconnect ** */ -static void sig_disconnect() +static void +sig_disconnect() { - fprintf(stderr,"exiting...\n"); + fprintf(stderr, "exiting...\n"); PQfinish(conn); exit(1); } /* ** -** set_signals +** set_signals ** */ -static void set_signals() +static void +set_signals() { sigemptyset(&block_sigs); sigemptyset(&unblock_sigs); - sigaddset(&block_sigs,SIGTERM); - sigaddset(&block_sigs,SIGHUP); - sigaddset(&block_sigs,SIGINT); -/* sigaddset(&block_sigs,SIGQUIT); no block */ - sigprocmask(SIG_SETMASK,&unblock_sigs,NULL); - signal(SIGTERM,sig_disconnect); - signal(SIGHUP,sig_disconnect); - signal(SIGINT,sig_disconnect); - signal(SIGQUIT,sig_disconnect); + sigaddset(&block_sigs, SIGTERM); + sigaddset(&block_sigs, SIGHUP); + sigaddset(&block_sigs, SIGINT); +/* sigaddset(&block_sigs,SIGQUIT); no block */ + sigprocmask(SIG_SETMASK, &unblock_sigs, NULL); + signal(SIGTERM, sig_disconnect); + signal(SIGHUP, sig_disconnect); + signal(SIGINT, sig_disconnect); + signal(SIGQUIT, sig_disconnect); } diff --git a/contrib/pginterface/pginterface.h b/contrib/pginterface/pginterface.h index 7e9cbdb5eb..65894eae34 100644 --- a/contrib/pginterface/pginterface.h +++ b/contrib/pginterface/pginterface.h @@ -3,12 +3,12 @@ * */ -PGresult *doquery(char *query); -PGconn *connectdb(); -void disconnectdb(); -int fetch(void *param, ...); -int fetchwithnulls(void *param, ...); -void on_error_continue(); -void on_error_stop(); +PGresult *doquery(char *query); +PGconn *connectdb(); +void disconnectdb(); +int fetch(void *param,...); +int fetchwithnulls(void *param,...); +void on_error_continue(); +void on_error_stop(); #define END_OF_TUPLES (-1) diff --git a/contrib/pginterface/pgnulltest.c b/contrib/pginterface/pgnulltest.c index 283106f726..2622edcaf6 100644 --- a/contrib/pginterface/pgnulltest.c +++ b/contrib/pginterface/pgnulltest.c @@ -12,29 +12,34 @@ #include <libpq-fe.h> #include <pginterface.h> -int main(int argc, char **argv) +int +main(int argc, char **argv) { - char query[4000]; - int row =1; - int aint; - float afloat; - double adouble; - char achar[11], achar16[17], abpchar[11], avarchar[51], atext[51]; - time_t aabstime; - int aint_null, - afloat_null, - adouble_null, - achar_null, - achar16_null, - abpchar_null, - avarchar_null, - atext_null, - aabstime_null; + char query[4000]; + int row = 1; + int aint; + float afloat; + double adouble; + char achar[11], + achar16[17], + abpchar[11], + avarchar[51], + atext[51]; + time_t aabstime; + int aint_null, + afloat_null, + adouble_null, + achar_null, + achar16_null, + abpchar_null, + avarchar_null, + atext_null, + aabstime_null; if (argc != 2) - halt("Usage: %s database\n",argv[0]); + halt("Usage: %s database\n", argv[0]); - connectdb(argv[1],NULL,NULL,NULL,NULL); + connectdb(argv[1], NULL, NULL, NULL, NULL); on_error_continue(); doquery("DROP TABLE testfetch"); @@ -54,7 +59,7 @@ int main(int argc, char **argv) "); #ifdef TEST_NON_NULLS - sprintf(query,"INSERT INTO testfetch VALUES ( \ + sprintf(query, "INSERT INTO testfetch VALUES ( \ 0, \ 0, \ 0, \ @@ -65,7 +70,7 @@ int main(int argc, char **argv) '', \ '');"); #else - sprintf(query,"INSERT INTO testfetch VALUES ( \ + sprintf(query, "INSERT INTO testfetch VALUES ( \ NULL, \ NULL, \ NULL, \ @@ -85,55 +90,54 @@ int main(int argc, char **argv) doquery("FETCH ALL IN c_testfetch"); if (fetchwithnulls( - &aint, - &aint_null, - &afloat, - &afloat_null, - &adouble, - &adouble_null, - achar, - &achar_null, - achar16, - &achar16_null, - abpchar, - &abpchar_null, - avarchar, - &avarchar_null, - atext, - &atext_null, - &aabstime, - &aabstime_null) != END_OF_TUPLES) - printf("int %d\nfloat %f\ndouble %f\nchar %s\nchar16 %s\n\ + &aint, + &aint_null, + &afloat, + &afloat_null, + &adouble, + &adouble_null, + achar, + &achar_null, + achar16, + &achar16_null, + abpchar, + &abpchar_null, + avarchar, + &avarchar_null, + atext, + &atext_null, + &aabstime, + &aabstime_null) != END_OF_TUPLES) + printf("int %d\nfloat %f\ndouble %f\nchar %s\nchar16 %s\n\ bpchar %s\nvarchar %s\ntext %s\nabstime %s\n", - aint, - afloat, - adouble, - achar, - achar16, - abpchar, - avarchar, - atext, - ctime(&aabstime)); - printf("NULL:\nint %d\nfloat %d\ndouble %d\nchar %d\nchar16 %d\n\ + aint, + afloat, + adouble, + achar, + achar16, + abpchar, + avarchar, + atext, + ctime(&aabstime)); + printf("NULL:\nint %d\nfloat %d\ndouble %d\nchar %d\nchar16 %d\n\ bpchar %d\nvarchar %d\ntext %d\nabstime %d\n", - aint_null, - afloat_null, - adouble_null, - achar_null, - achar16_null, - abpchar_null, - avarchar_null, - atext_null, - aabstime_null); + aint_null, + afloat_null, + adouble_null, + achar_null, + achar16_null, + abpchar_null, + avarchar_null, + atext_null, + aabstime_null); doquery("CLOSE c_testfetch"); doquery("COMMIT WORK"); - printf("--- %-d rows inserted so far\n",row); + printf("--- %-d rows inserted so far\n", row); row++; disconnectdb(); return 0; } - diff --git a/contrib/pginterface/pgwordcount.c b/contrib/pginterface/pgwordcount.c index f2d2d488fc..c447cdcaa0 100644 --- a/contrib/pginterface/pgwordcount.c +++ b/contrib/pginterface/pgwordcount.c @@ -10,17 +10,18 @@ #include <libpq-fe.h> #include "pginterface.h" -int main(int argc, char **argv) +int +main(int argc, char **argv) { - char query[4000]; - int row = 0; - int count; - char line[4000]; - + char query[4000]; + int row = 0; + int count; + char line[4000]; + if (argc != 2) - halt("Usage: %s database\n",argv[0]); + halt("Usage: %s database\n", argv[0]); - connectdb(argv[1],NULL,NULL,NULL,NULL); + connectdb(argv[1], NULL, NULL, NULL, NULL); on_error_continue(); doquery("DROP TABLE words"); on_error_stop(); @@ -35,33 +36,33 @@ int main(int argc, char **argv) word text_ops )\ "); - while(1) + while (1) { - if (scanf("%s",line) != 1) + if (scanf("%s", line) != 1) break; doquery("BEGIN WORK"); - sprintf(query,"\ + sprintf(query, "\ DECLARE c_words BINARY CURSOR FOR \ SELECT count(*) \ FROM words \ WHERE word = '%s'", line); doquery(query); doquery("FETCH ALL IN c_words"); - + while (fetch(&count) == END_OF_TUPLES) count = 0; doquery("CLOSE c_words"); doquery("COMMIT WORK"); if (count == 0) - sprintf(query,"\ + sprintf(query, "\ INSERT INTO words \ - VALUES (1, '%s')", line); + VALUES (1, '%s')", line); else - sprintf(query,"\ + sprintf(query, "\ UPDATE words \ SET matches = matches + 1 \ - WHERE word = '%s'", line); + WHERE word = '%s'", line); doquery(query); row++; } @@ -69,4 +70,3 @@ int main(int argc, char **argv) disconnectdb(); return 0; } - diff --git a/contrib/soundex/soundex.c b/contrib/soundex/soundex.c index 2ce6ef510f..44ad25808d 100644 --- a/contrib/soundex/soundex.c +++ b/contrib/soundex/soundex.c @@ -4,80 +4,86 @@ #include <string.h> #include <stdio.h> -#include "postgres.h" /* for char16, etc. */ -#include "utils/palloc.h" /* for palloc */ -#include "libpq-fe.h" /* for TUPLE */ +#include "postgres.h" /* for char16, etc. */ +#include "utils/palloc.h" /* for palloc */ +#include "libpq-fe.h" /* for TUPLE */ #include <stdio.h> #include <ctype.h> /* prototype for soundex function */ -char *soundex(char *instr, char *outstr); +char *soundex(char *instr, char *outstr); -text *text_soundex(text *t) +text * +text_soundex(text * t) { - /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ - char *table = "01230120022455012623010202"; - int count = 0; - text *new_t; - - char outstr[6+1]; /* max length of soundex is 6 */ - char *instr; - - /* make a null-terminated string */ - instr=palloc(VARSIZE(t)+1); - memcpy(instr,VARDATA(t),VARSIZE(t)-VARHDRSZ); - instr[VARSIZE(t)-VARHDRSZ] = (char)0; - - /* load soundex into outstr */ - soundex(instr, outstr); - - /* Now the outstr contains the soundex of instr */ - /* copy outstr to new_t */ - new_t = (text *) palloc(strlen(outstr)+VARHDRSZ); - memset(new_t, 0, strlen(outstr)+1); - VARSIZE(new_t) = strlen(outstr)+VARHDRSZ; - memcpy((void *) VARDATA(new_t), - (void *) outstr, - strlen(outstr)); - - /* free instr */ - pfree(instr); - - return(new_t); + /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ + char *table = "01230120022455012623010202"; + int count = 0; + text *new_t; + + char outstr[6 + 1]; /* max length of soundex is 6 */ + char *instr; + + /* make a null-terminated string */ + instr = palloc(VARSIZE(t) + 1); + memcpy(instr, VARDATA(t), VARSIZE(t) - VARHDRSZ); + instr[VARSIZE(t) - VARHDRSZ] = (char) 0; + + /* load soundex into outstr */ + soundex(instr, outstr); + + /* Now the outstr contains the soundex of instr */ + /* copy outstr to new_t */ + new_t = (text *) palloc(strlen(outstr) + VARHDRSZ); + memset(new_t, 0, strlen(outstr) + 1); + VARSIZE(new_t) = strlen(outstr) + VARHDRSZ; + memcpy((void *) VARDATA(new_t), + (void *) outstr, + strlen(outstr)); + + /* free instr */ + pfree(instr); + + return (new_t); } -char *soundex(char *instr, char *outstr) -{ /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ - char *table = "01230120022455012623010202"; - int count = 0; - - while(!isalpha(instr[0]) && instr[0]) - ++instr; - - if(!instr[0]) { /* Hey! Where'd the string go? */ - outstr[0]=(char)0; - return outstr; - } - - if(toupper(instr[0]) == 'P' && toupper(instr[1]) == 'H') { - instr[0] = 'F'; - instr[1] = 'A'; - } - - *outstr++ = (char)toupper(*instr++); - - while(*instr && count < 5) { - if(isalpha(*instr) && *instr != *(instr-1)) { - *outstr = table[toupper(instr[0]) - 'A']; - if(*outstr != '0') { - ++outstr; - ++count; - } - } - ++instr; - } - - *outstr = '\0'; - return(outstr); +char * +soundex(char *instr, char *outstr) +{ /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ + char *table = "01230120022455012623010202"; + int count = 0; + + while (!isalpha(instr[0]) && instr[0]) + ++instr; + + if (!instr[0]) + { /* Hey! Where'd the string go? */ + outstr[0] = (char) 0; + return outstr; + } + + if (toupper(instr[0]) == 'P' && toupper(instr[1]) == 'H') + { + instr[0] = 'F'; + instr[1] = 'A'; + } + + *outstr++ = (char) toupper(*instr++); + + while (*instr && count < 5) + { + if (isalpha(*instr) && *instr != *(instr - 1)) + { + *outstr = table[toupper(instr[0]) - 'A']; + if (*outstr != '0') + { + ++outstr; + ++count; + } + } + ++instr; + } + + *outstr = '\0'; + return (outstr); } - diff --git a/contrib/string/string_io.c b/contrib/string/string_io.c index b6158c74e4..e2cb2017c6 100644 --- a/contrib/string/string_io.c +++ b/contrib/string/string_io.c @@ -17,14 +17,14 @@ /* define this if you want to see iso-8859 characters */ #define ISO8859 -#define MIN(x, y) ((x) < (y) ? (x) : (y)) -#define VALUE(char) ((char) - '0') -#define DIGIT(val) ((val) + '0') -#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7')) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#define VALUE(char) ((char) - '0') +#define DIGIT(val) ((val) + '0') +#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7')) #ifndef ISO8859 -#define NOTPRINTABLE(c) (!isprint(c)) +#define NOTPRINTABLE(c) (!isprint(c)) #else -#define NOTPRINTABLE(c) (!isprint(c) && ((c) < 0xa0)) +#define NOTPRINTABLE(c) (!isprint(c) && ((c) < 0xa0)) #endif /* @@ -36,115 +36,129 @@ * The function is used by output methods of various string types. * * Arguments: - * data - input data (can be NULL) - * size - optional size of data. A negative value indicates - * that data is a null terminated string. + * data - input data (can be NULL) + * size - optional size of data. A negative value indicates + * that data is a null terminated string. * * Returns: - * a pointer to a new string containing the printable - * representation of data. + * a pointer to a new string containing the printable + * representation of data. */ -char * +char * string_output(char *data, int size) { - register unsigned char c, *p, *r, *result; - register int l, len; + register unsigned char c, + *p, + *r, + *result; + register int l, + len; + + if (data == NULL) + { + result = (char *) palloc(2); + result[0] = '-'; + result[1] = '\0'; + return (result); + } - if (data == NULL) { - result = (char *) palloc(2); - result[0] = '-'; - result[1] = '\0'; - return (result); - } - - if (size < 0) { - size = strlen(data); - } - - /* adjust string length for escapes */ - len = size; - for (p=data,l=size; l>0; p++,l--) { - switch (*p) { - case '\\': - case '"' : - case '{': - case '}': - case '\b': - case '\f': - case '\n': - case '\r': - case '\t': - case '\v': - len++; - break; - default: - if (NOTPRINTABLE(*p)) { - len += 3; - } + if (size < 0) + { + size = strlen(data); + } + + /* adjust string length for escapes */ + len = size; + for (p = data, l = size; l > 0; p++, l--) + { + switch (*p) + { + case '\\': + case '"': + case '{': + case '}': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': + len++; + break; + default: + if (NOTPRINTABLE(*p)) + { + len += 3; + } + } } - } - len++; - - result = (char *) palloc(len); - - for (p=data,r=result,l=size; (l > 0) && (c = *p); p++,l--) { - switch (c) { - case '\\': - case '"' : - case '{': - case '}': - *r++ = '\\'; - *r++ = c; - break; - case '\b': - *r++ = '\\'; - *r++ = 'b'; - break; - case '\f': - *r++ = '\\'; - *r++ = 'f'; - break; - case '\n': - *r++ = '\\'; - *r++ = 'n'; - break; - case '\r': - *r++ = '\\'; - *r++ = 'r'; - break; - case '\t': - *r++ = '\\'; - *r++ = 't'; - break; - case '\v': - *r++ = '\\'; - *r++ = 'v'; - break; - default: - if (NOTPRINTABLE(c)) { - *r = '\\'; - r += 3; - *r-- = DIGIT(c & 07); - c >>= 3; - *r-- = DIGIT(c & 07); - c >>= 3; - *r = DIGIT(c & 03); - r += 3; - } else { - *r++ = c; - } + len++; + + result = (char *) palloc(len); + + for (p = data, r = result, l = size; (l > 0) && (c = *p); p++, l--) + { + switch (c) + { + case '\\': + case '"': + case '{': + case '}': + *r++ = '\\'; + *r++ = c; + break; + case '\b': + *r++ = '\\'; + *r++ = 'b'; + break; + case '\f': + *r++ = '\\'; + *r++ = 'f'; + break; + case '\n': + *r++ = '\\'; + *r++ = 'n'; + break; + case '\r': + *r++ = '\\'; + *r++ = 'r'; + break; + case '\t': + *r++ = '\\'; + *r++ = 't'; + break; + case '\v': + *r++ = '\\'; + *r++ = 'v'; + break; + default: + if (NOTPRINTABLE(c)) + { + *r = '\\'; + r += 3; + *r-- = DIGIT(c & 07); + c >>= 3; + *r-- = DIGIT(c & 07); + c >>= 3; + *r = DIGIT(c & 03); + r += 3; + } + else + { + *r++ = c; + } + } } - } - *r = '\0'; + *r = '\0'; - return((char *) result); + return ((char *) result); } /* * string_input() -- * - * This function accepts a C string in input and copies it into a new + * This function accepts a C string in input and copies it into a new * object allocated with palloc() translating all escape sequences. * An optional header can be allocatd before the string, for example * to hold the length of a varlena object. @@ -153,209 +167,231 @@ string_output(char *data, int size) * receive strings in internal form. * * Arguments: - * str - input string possibly with escapes - * size - the required size of new data. A value of 0 - * indicates a variable size string, while a - * negative value indicates a variable size string - * of size not greater than this absolute value. - * hdrsize - size of an optional header to be allocated before - * the data. It must then be filled by the caller. - * rtn_size - an optional pointer to an int variable where the - * size of the new string is stored back. + * str - input string possibly with escapes + * size - the required size of new data. A value of 0 + * indicates a variable size string, while a + * negative value indicates a variable size string + * of size not greater than this absolute value. + * hdrsize - size of an optional header to be allocated before + * the data. It must then be filled by the caller. + * rtn_size - an optional pointer to an int variable where the + * size of the new string is stored back. * * Returns: - * a pointer to the new string or the header. + * a pointer to the new string or the header. */ -char * +char * string_input(char *str, int size, int hdrsize, int *rtn_size) { - register unsigned char *p, *r; - unsigned char *result; - int len; - - if ((str == NULL) || (hdrsize < 0)) { - return (char *) NULL; - } - - /* Compute result size */ - len = strlen(str); - for (p=str; *p; ) { - if (*p++ == '\\') { - if (ISOCTAL(*p)) { - if (ISOCTAL(*(p+1))) { - p++; - len--; - } - if (ISOCTAL(*(p+1))) { - p++; - len--; + register unsigned char *p, + *r; + unsigned char *result; + int len; + + if ((str == NULL) || (hdrsize < 0)) + { + return (char *) NULL; + } + + /* Compute result size */ + len = strlen(str); + for (p = str; *p;) + { + if (*p++ == '\\') + { + if (ISOCTAL(*p)) + { + if (ISOCTAL(*(p + 1))) + { + p++; + len--; + } + if (ISOCTAL(*(p + 1))) + { + p++; + len--; + } + } + if (*p) + p++; + len--; } - } - if (*p) p++; - len--; } - } - - /* result has variable length */ - if (size == 0) { - size = len+1; - } else - - /* result has variable length with maximum size */ - if (size < 0) { - size = MIN(len, - size)+1; - } - - result = (char *) palloc(hdrsize+size); - memset(result, 0, hdrsize+size); - if (rtn_size) { - *rtn_size = size; - } - - r = result + hdrsize; - for (p=str; *p; ) { - register unsigned char c; - if ((c = *p++) == '\\') { - switch (c = *p++) { - case '\0': - p--; - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - c = VALUE(c); - if (isdigit(*p)) { - c = (c<<3) + VALUE(*p++); + + /* result has variable length */ + if (size == 0) + { + size = len + 1; + } + else + /* result has variable length with maximum size */ + if (size < 0) + { + size = MIN(len, -size) + 1; + } + + result = (char *) palloc(hdrsize + size); + memset(result, 0, hdrsize + size); + if (rtn_size) + { + *rtn_size = size; + } + + r = result + hdrsize; + for (p = str; *p;) + { + register unsigned char c; + + if ((c = *p++) == '\\') + { + switch (c = *p++) + { + case '\0': + p--; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + c = VALUE(c); + if (isdigit(*p)) + { + c = (c << 3) + VALUE(*p++); + } + if (isdigit(*p)) + { + c = (c << 3) + VALUE(*p++); + } + *r++ = c; + break; + case 'b': + *r++ = '\b'; + break; + case 'f': + *r++ = '\f'; + break; + case 'n': + *r++ = '\n'; + break; + case 'r': + *r++ = '\r'; + break; + case 't': + *r++ = '\t'; + break; + case 'v': + *r++ = '\v'; + break; + default: + *r++ = c; + } } - if (isdigit(*p)) { - c = (c<<3) + VALUE(*p++); + else + { + *r++ = c; } - *r++ = c; - break; - case 'b': - *r++ = '\b'; - break; - case 'f': - *r++ = '\f'; - break; - case 'n': - *r++ = '\n'; - break; - case 'r': - *r++ = '\r'; - break; - case 't': - *r++ = '\t'; - break; - case 'v': - *r++ = '\v'; - break; - default: - *r++ = c; - } - } else { - *r++ = c; } - } - return((char *) result); + return ((char *) result); } -char * +char * c_charout(int32 c) { - char str[2]; + char str[2]; - str[0] = (char) c; - str[1] = '\0'; + str[0] = (char) c; + str[1] = '\0'; - return (string_output(str, 1)); + return (string_output(str, 1)); } -char * +char * c_char2out(uint16 s) { - return (string_output((char *) &s, 2)); + return (string_output((char *) &s, 2)); } -char * +char * c_char4out(uint32 s) { - return (string_output((char *) &s, 4)); + return (string_output((char *) &s, 4)); } -char * +char * c_char8out(char *s) { - return (string_output(s, 8)); + return (string_output(s, 8)); } -char * +char * c_char16out(char *s) { - return (string_output(s, 16)); + return (string_output(s, 16)); } /* * This can be used for text, bytea, SET and unknown data types */ -char * -c_textout(struct varlena *vlena) +char * +c_textout(struct varlena * vlena) { - int len = 0; - char *s = NULL; - - if (vlena) { - len = VARSIZE(vlena) - VARHDRSZ; - s = VARDATA(vlena); - } - return (string_output(s, len)); + int len = 0; + char *s = NULL; + + if (vlena) + { + len = VARSIZE(vlena) - VARHDRSZ; + s = VARDATA(vlena); + } + return (string_output(s, len)); } /* * This can be used for varchar and bpchar strings */ -char * +char * c_varcharout(char *s) { - int len; + int len; - if (s) { - len = *(int32*)s - 4; - s += 4; - } - return (string_output(s, len)); + if (s) + { + len = *(int32 *) s - 4; + s += 4; + } + return (string_output(s, len)); } #ifdef 0 struct varlena * c_textin(char *str) { - struct varlena *result; - int len; + struct varlena *result; + int len; - if (str == NULL) { - return ((struct varlena *) NULL); - } + if (str == NULL) + { + return ((struct varlena *) NULL); + } - result = (struct varlena *) string_input(str, 0, VARHDRSZ, &len); - VARSIZE(result) = len; + result = (struct varlena *) string_input(str, 0, VARHDRSZ, &len); + VARSIZE(result) = len; - return (result); + return (result); } -char * +char * c_char16in(char *str) { - return (string_input(str, 16, 0, NULL)); + return (string_input(str, 16, 0, NULL)); } -#endif +#endif diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 60ec3e4d3a..1725769030 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -1,18 +1,18 @@ /*------------------------------------------------------------------------- * * heaptuple.c-- - * This file contains heap tuple accessor and mutator routines, as well - * as a few various tuple utilities. + * This file contains heap tuple accessor and mutator routines, as well + * as a few various tuple utilities. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/heaptuple.c,v 1.21 1997/08/26 23:31:20 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/heaptuple.c,v 1.22 1997/09/07 04:37:30 momjian Exp $ * * NOTES - * The old interface functions have been converted to macros - * and moved to heapam.h + * The old interface functions have been converted to macros + * and moved to heapam.h * *------------------------------------------------------------------------- */ @@ -27,9 +27,9 @@ #include <utils/memutils.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif @@ -37,902 +37,991 @@ #if !defined(NO_ASSERT_CHECKING) && defined(sparc) && defined(sunos4) #define register -#endif /* !NO_ASSERT_CHECKING && sparc && sunos4 */ +#endif /* !NO_ASSERT_CHECKING && sparc && sunos4 */ /* ---------------------------------------------------------------- - * misc support routines + * misc support routines * ---------------------------------------------------------------- */ /* ---------------- - * ComputeDataSize + * ComputeDataSize * ---------------- */ Size ComputeDataSize(TupleDesc tupleDesc, - Datum value[], - char nulls[]) + Datum value[], + char nulls[]) { - uint32 data_length; - int i; - int numberOfAttributes = tupleDesc->natts; - AttributeTupleForm *att = tupleDesc->attrs; - - for (data_length = 0, i = 0; i < numberOfAttributes; i++) { - if (nulls[i] != ' ') continue; - - switch (att[i]->attlen) { - case -1: - /* - * This is the size of the disk representation and so - * must include the additional sizeof long. - */ - if (att[i]->attalign == 'd') { - data_length = DOUBLEALIGN(data_length) - + VARSIZE(DatumGetPointer(value[i])); - } else { - data_length = INTALIGN(data_length) - + VARSIZE(DatumGetPointer(value[i])); - } - break; - case sizeof(char): - data_length++; - break; - case sizeof(short): - data_length = SHORTALIGN(data_length + sizeof(short)); - break; - case sizeof(int32): - data_length = INTALIGN(data_length + sizeof(int32)); - break; - default: - if (att[i]->attlen < sizeof(int32)) - elog(WARN, "ComputeDataSize: attribute %d has len %d", - i, att[i]->attlen); - if (att[i]->attalign == 'd') - data_length = DOUBLEALIGN(data_length) + att[i]->attlen; - else - data_length = LONGALIGN(data_length) + att[i]->attlen; - break; + uint32 data_length; + int i; + int numberOfAttributes = tupleDesc->natts; + AttributeTupleForm *att = tupleDesc->attrs; + + for (data_length = 0, i = 0; i < numberOfAttributes; i++) + { + if (nulls[i] != ' ') + continue; + + switch (att[i]->attlen) + { + case -1: + + /* + * This is the size of the disk representation and so must + * include the additional sizeof long. + */ + if (att[i]->attalign == 'd') + { + data_length = DOUBLEALIGN(data_length) + + VARSIZE(DatumGetPointer(value[i])); + } + else + { + data_length = INTALIGN(data_length) + + VARSIZE(DatumGetPointer(value[i])); + } + break; + case sizeof(char): + data_length++; + break; + case sizeof(short): + data_length = SHORTALIGN(data_length + sizeof(short)); + break; + case sizeof(int32): + data_length = INTALIGN(data_length + sizeof(int32)); + break; + default: + if (att[i]->attlen < sizeof(int32)) + elog(WARN, "ComputeDataSize: attribute %d has len %d", + i, att[i]->attlen); + if (att[i]->attalign == 'd') + data_length = DOUBLEALIGN(data_length) + att[i]->attlen; + else + data_length = LONGALIGN(data_length) + att[i]->attlen; + break; + } } - } - - return data_length; + + return data_length; } /* ---------------- - * DataFill + * DataFill * ---------------- */ void DataFill(char *data, - TupleDesc tupleDesc, - Datum value[], - char nulls[], - char *infomask, - bits8 *bit) + TupleDesc tupleDesc, + Datum value[], + char nulls[], + char *infomask, + bits8 * bit) { - bits8 *bitP = 0; - int bitmask = 0; - uint32 data_length; - int i; - int numberOfAttributes = tupleDesc->natts; - AttributeTupleForm *att = tupleDesc->attrs; - - if (bit != NULL) { - bitP = &bit[-1]; - bitmask = CSIGNBIT; - } - - *infomask = 0; - - for (i = 0; i < numberOfAttributes; i++) { - if (bit != NULL) { - if (bitmask != CSIGNBIT) { - bitmask <<= 1; - } else { - bitP += 1; - *bitP = 0x0; - bitmask = 1; - } - - if (nulls[i] == 'n') { - *infomask |= HEAP_HASNULL; - continue; - } - - *bitP |= bitmask; + bits8 *bitP = 0; + int bitmask = 0; + uint32 data_length; + int i; + int numberOfAttributes = tupleDesc->natts; + AttributeTupleForm *att = tupleDesc->attrs; + + if (bit != NULL) + { + bitP = &bit[-1]; + bitmask = CSIGNBIT; } - - switch (att[i]->attlen) { - case -1: - *infomask |= HEAP_HASVARLENA; - if (att[i]->attalign=='d') { - data = (char *) DOUBLEALIGN(data); - } else { - data = (char *) INTALIGN(data); - } - data_length = VARSIZE(DatumGetPointer(value[i])); - memmove(data, DatumGetPointer(value[i]),data_length); - data += data_length; - break; - case sizeof(char): - *data = att[i]->attbyval ? - DatumGetChar(value[i]) : *((char *) value[i]); - data += sizeof(char); - break; - case sizeof(int16): - data = (char *) SHORTALIGN(data); - * (short *) data = (att[i]->attbyval ? - DatumGetInt16(value[i]) : - *((short *) value[i])); - data += sizeof(short); - break; - case sizeof(int32): - data = (char *) INTALIGN(data); - * (int32 *) data = (att[i]->attbyval ? - DatumGetInt32(value[i]) : - *((int32 *) value[i])); - data += sizeof(int32); - break; - default: - if (att[i]->attlen < sizeof(int32)) - elog(WARN, "DataFill: attribute %d has len %d", - i, att[i]->attlen); - if (att[i]->attalign == 'd') { - data = (char *) DOUBLEALIGN(data); - memmove(data, DatumGetPointer(value[i]), - att[i]->attlen); - data += att[i]->attlen; - } else { - data = (char *) LONGALIGN(data); - memmove(data, DatumGetPointer(value[i]), - att[i]->attlen); - data += att[i]->attlen; - } - break; + + *infomask = 0; + + for (i = 0; i < numberOfAttributes; i++) + { + if (bit != NULL) + { + if (bitmask != CSIGNBIT) + { + bitmask <<= 1; + } + else + { + bitP += 1; + *bitP = 0x0; + bitmask = 1; + } + + if (nulls[i] == 'n') + { + *infomask |= HEAP_HASNULL; + continue; + } + + *bitP |= bitmask; + } + + switch (att[i]->attlen) + { + case -1: + *infomask |= HEAP_HASVARLENA; + if (att[i]->attalign == 'd') + { + data = (char *) DOUBLEALIGN(data); + } + else + { + data = (char *) INTALIGN(data); + } + data_length = VARSIZE(DatumGetPointer(value[i])); + memmove(data, DatumGetPointer(value[i]), data_length); + data += data_length; + break; + case sizeof(char): + *data = att[i]->attbyval ? + DatumGetChar(value[i]) : *((char *) value[i]); + data += sizeof(char); + break; + case sizeof(int16): + data = (char *) SHORTALIGN(data); + *(short *) data = (att[i]->attbyval ? + DatumGetInt16(value[i]) : + *((short *) value[i])); + data += sizeof(short); + break; + case sizeof(int32): + data = (char *) INTALIGN(data); + *(int32 *) data = (att[i]->attbyval ? + DatumGetInt32(value[i]) : + *((int32 *) value[i])); + data += sizeof(int32); + break; + default: + if (att[i]->attlen < sizeof(int32)) + elog(WARN, "DataFill: attribute %d has len %d", + i, att[i]->attlen); + if (att[i]->attalign == 'd') + { + data = (char *) DOUBLEALIGN(data); + memmove(data, DatumGetPointer(value[i]), + att[i]->attlen); + data += att[i]->attlen; + } + else + { + data = (char *) LONGALIGN(data); + memmove(data, DatumGetPointer(value[i]), + att[i]->attlen); + data += att[i]->attlen; + } + break; + } } - } } /* ---------------------------------------------------------------- - * heap tuple interface + * heap tuple interface * ---------------------------------------------------------------- */ /* ---------------- - * heap_attisnull - returns 1 iff tuple attribute is not present + * heap_attisnull - returns 1 iff tuple attribute is not present * ---------------- */ int heap_attisnull(HeapTuple tup, int attnum) { - if (attnum > (int)tup->t_natts) - return (1); - - if (HeapTupleNoNulls(tup)) return(0); - - if (attnum > 0) { - return(att_isnull(attnum - 1, tup->t_bits)); - } else - switch (attnum) { - case SelfItemPointerAttributeNumber: - case ObjectIdAttributeNumber: - case MinTransactionIdAttributeNumber: - case MinCommandIdAttributeNumber: - case MaxTransactionIdAttributeNumber: - case MaxCommandIdAttributeNumber: - case ChainItemPointerAttributeNumber: - case AnchorItemPointerAttributeNumber: - case MinAbsoluteTimeAttributeNumber: - case MaxAbsoluteTimeAttributeNumber: - case VersionTypeAttributeNumber: - break; - - case 0: - elog(WARN, "heap_attisnull: zero attnum disallowed"); - - default: - elog(WARN, "heap_attisnull: undefined negative attnum"); + if (attnum > (int) tup->t_natts) + return (1); + + if (HeapTupleNoNulls(tup)) + return (0); + + if (attnum > 0) + { + return (att_isnull(attnum - 1, tup->t_bits)); } - - return (0); + else + switch (attnum) + { + case SelfItemPointerAttributeNumber: + case ObjectIdAttributeNumber: + case MinTransactionIdAttributeNumber: + case MinCommandIdAttributeNumber: + case MaxTransactionIdAttributeNumber: + case MaxCommandIdAttributeNumber: + case ChainItemPointerAttributeNumber: + case AnchorItemPointerAttributeNumber: + case MinAbsoluteTimeAttributeNumber: + case MaxAbsoluteTimeAttributeNumber: + case VersionTypeAttributeNumber: + break; + + case 0: + elog(WARN, "heap_attisnull: zero attnum disallowed"); + + default: + elog(WARN, "heap_attisnull: undefined negative attnum"); + } + + return (0); } /* ---------------------------------------------------------------- - * system attribute heap tuple support + * system attribute heap tuple support * ---------------------------------------------------------------- */ /* ---------------- - * heap_sysattrlen + * heap_sysattrlen * - * This routine returns the length of a system attribute. + * This routine returns the length of a system attribute. * ---------------- */ int heap_sysattrlen(AttrNumber attno) { - HeapTupleData *f = NULL; - - switch (attno) { - case SelfItemPointerAttributeNumber: return sizeof f->t_ctid; - case ObjectIdAttributeNumber: return sizeof f->t_oid; - case MinTransactionIdAttributeNumber: return sizeof f->t_xmin; - case MinCommandIdAttributeNumber: return sizeof f->t_cmin; - case MaxTransactionIdAttributeNumber: return sizeof f->t_xmax; - case MaxCommandIdAttributeNumber: return sizeof f->t_cmax; - case ChainItemPointerAttributeNumber: return sizeof f->t_chain; - case MinAbsoluteTimeAttributeNumber: return sizeof f->t_tmin; - case MaxAbsoluteTimeAttributeNumber: return sizeof f->t_tmax; - case VersionTypeAttributeNumber: return sizeof f->t_vtype; - - case AnchorItemPointerAttributeNumber: - elog(WARN, "heap_sysattrlen: field t_anchor does not exist!"); - return 0; - - default: - elog(WARN, "sysattrlen: System attribute number %d unknown.", attno); - return 0; - } + HeapTupleData *f = NULL; + + switch (attno) + { + case SelfItemPointerAttributeNumber: + return sizeof f->t_ctid; + case ObjectIdAttributeNumber: + return sizeof f->t_oid; + case MinTransactionIdAttributeNumber: + return sizeof f->t_xmin; + case MinCommandIdAttributeNumber: + return sizeof f->t_cmin; + case MaxTransactionIdAttributeNumber: + return sizeof f->t_xmax; + case MaxCommandIdAttributeNumber: + return sizeof f->t_cmax; + case ChainItemPointerAttributeNumber: + return sizeof f->t_chain; + case MinAbsoluteTimeAttributeNumber: + return sizeof f->t_tmin; + case MaxAbsoluteTimeAttributeNumber: + return sizeof f->t_tmax; + case VersionTypeAttributeNumber: + return sizeof f->t_vtype; + + case AnchorItemPointerAttributeNumber: + elog(WARN, "heap_sysattrlen: field t_anchor does not exist!"); + return 0; + + default: + elog(WARN, "sysattrlen: System attribute number %d unknown.", attno); + return 0; + } } /* ---------------- - * heap_sysattrbyval + * heap_sysattrbyval * - * This routine returns the "by-value" property of a system attribute. + * This routine returns the "by-value" property of a system attribute. * ---------------- */ bool heap_sysattrbyval(AttrNumber attno) { - bool byval; - - switch (attno) { - case SelfItemPointerAttributeNumber: - byval = false; - break; - case ObjectIdAttributeNumber: - byval = true; - break; - case MinTransactionIdAttributeNumber: - byval = true; - break; - case MinCommandIdAttributeNumber: - byval = true; - break; - case MaxTransactionIdAttributeNumber: - byval = true; - break; - case MaxCommandIdAttributeNumber: - byval = true; - break; - case ChainItemPointerAttributeNumber: - byval = false; - break; - case AnchorItemPointerAttributeNumber: - byval = false; - break; - case MinAbsoluteTimeAttributeNumber: - byval = true; - break; - case MaxAbsoluteTimeAttributeNumber: - byval = true; - break; - case VersionTypeAttributeNumber: - byval = true; - break; - default: - byval = true; - elog(WARN, "sysattrbyval: System attribute number %d unknown.", - attno); - break; - } - - return byval; + bool byval; + + switch (attno) + { + case SelfItemPointerAttributeNumber: + byval = false; + break; + case ObjectIdAttributeNumber: + byval = true; + break; + case MinTransactionIdAttributeNumber: + byval = true; + break; + case MinCommandIdAttributeNumber: + byval = true; + break; + case MaxTransactionIdAttributeNumber: + byval = true; + break; + case MaxCommandIdAttributeNumber: + byval = true; + break; + case ChainItemPointerAttributeNumber: + byval = false; + break; + case AnchorItemPointerAttributeNumber: + byval = false; + break; + case MinAbsoluteTimeAttributeNumber: + byval = true; + break; + case MaxAbsoluteTimeAttributeNumber: + byval = true; + break; + case VersionTypeAttributeNumber: + byval = true; + break; + default: + byval = true; + elog(WARN, "sysattrbyval: System attribute number %d unknown.", + attno); + break; + } + + return byval; } /* ---------------- - * heap_getsysattr + * heap_getsysattr * ---------------- */ -char * +char * heap_getsysattr(HeapTuple tup, Buffer b, int attnum) { - switch (attnum) { - case SelfItemPointerAttributeNumber: - return ((char *)&tup->t_ctid); - case ObjectIdAttributeNumber: - return ((char *) (long) tup->t_oid); - case MinTransactionIdAttributeNumber: - return ((char *) (long) tup->t_xmin); - case MinCommandIdAttributeNumber: - return ((char *) (long) tup->t_cmin); - case MaxTransactionIdAttributeNumber: - return ((char *) (long) tup->t_xmax); - case MaxCommandIdAttributeNumber: - return ((char *) (long) tup->t_cmax); - case ChainItemPointerAttributeNumber: - return ((char *) &tup->t_chain); - case AnchorItemPointerAttributeNumber: - elog(WARN, "heap_getsysattr: t_anchor does not exist!"); - break; - - /* - * For tmin and tmax, we need to do some extra work. These don't - * get filled in until the vacuum cleaner runs (or we manage to flush - * a page after setting the value correctly below). If the vacuum - * cleaner hasn't run yet, then the times stored in the tuple are - * wrong, and we need to look up the commit time of the transaction. - * We cache this value in the tuple to avoid doing the work more than - * once. - */ - - case MinAbsoluteTimeAttributeNumber: - if (!AbsoluteTimeIsBackwardCompatiblyValid(tup->t_tmin) && - TransactionIdDidCommit(tup->t_xmin)) - tup->t_tmin = TransactionIdGetCommitTime(tup->t_xmin); - return ((char *) (long) tup->t_tmin); - case MaxAbsoluteTimeAttributeNumber: - if (!AbsoluteTimeIsBackwardCompatiblyReal(tup->t_tmax)) { - if (TransactionIdDidCommit(tup->t_xmax)) - tup->t_tmax = TransactionIdGetCommitTime(tup->t_xmax); - else - tup->t_tmax = CURRENT_ABSTIME; + switch (attnum) + { + case SelfItemPointerAttributeNumber: + return ((char *) &tup->t_ctid); + case ObjectIdAttributeNumber: + return ((char *) (long) tup->t_oid); + case MinTransactionIdAttributeNumber: + return ((char *) (long) tup->t_xmin); + case MinCommandIdAttributeNumber: + return ((char *) (long) tup->t_cmin); + case MaxTransactionIdAttributeNumber: + return ((char *) (long) tup->t_xmax); + case MaxCommandIdAttributeNumber: + return ((char *) (long) tup->t_cmax); + case ChainItemPointerAttributeNumber: + return ((char *) &tup->t_chain); + case AnchorItemPointerAttributeNumber: + elog(WARN, "heap_getsysattr: t_anchor does not exist!"); + break; + + /* + * For tmin and tmax, we need to do some extra work. These don't + * get filled in until the vacuum cleaner runs (or we manage to + * flush a page after setting the value correctly below). If the + * vacuum cleaner hasn't run yet, then the times stored in the + * tuple are wrong, and we need to look up the commit time of the + * transaction. We cache this value in the tuple to avoid doing + * the work more than once. + */ + + case MinAbsoluteTimeAttributeNumber: + if (!AbsoluteTimeIsBackwardCompatiblyValid(tup->t_tmin) && + TransactionIdDidCommit(tup->t_xmin)) + tup->t_tmin = TransactionIdGetCommitTime(tup->t_xmin); + return ((char *) (long) tup->t_tmin); + case MaxAbsoluteTimeAttributeNumber: + if (!AbsoluteTimeIsBackwardCompatiblyReal(tup->t_tmax)) + { + if (TransactionIdDidCommit(tup->t_xmax)) + tup->t_tmax = TransactionIdGetCommitTime(tup->t_xmax); + else + tup->t_tmax = CURRENT_ABSTIME; + } + return ((char *) (long) tup->t_tmax); + case VersionTypeAttributeNumber: + return ((char *) (long) tup->t_vtype); + default: + elog(WARN, "heap_getsysattr: undefined attnum %d", attnum); } - return ((char *) (long) tup->t_tmax); - case VersionTypeAttributeNumber: - return ((char *) (long) tup->t_vtype); - default: - elog(WARN, "heap_getsysattr: undefined attnum %d", attnum); - } - return(NULL); + return (NULL); } /* ---------------- - * fastgetattr + * fastgetattr * - * This is a newer version of fastgetattr which attempts to be - * faster by caching attribute offsets in the attribute descriptor. + * This is a newer version of fastgetattr which attempts to be + * faster by caching attribute offsets in the attribute descriptor. * - * an alternate way to speed things up would be to cache offsets - * with the tuple, but that seems more difficult unless you take - * the storage hit of actually putting those offsets into the - * tuple you send to disk. Yuck. + * an alternate way to speed things up would be to cache offsets + * with the tuple, but that seems more difficult unless you take + * the storage hit of actually putting those offsets into the + * tuple you send to disk. Yuck. * - * This scheme will be slightly slower than that, but should - * preform well for queries which hit large #'s of tuples. After - * you cache the offsets once, examining all the other tuples using - * the same attribute descriptor will go much quicker. -cim 5/4/91 + * This scheme will be slightly slower than that, but should + * preform well for queries which hit large #'s of tuples. After + * you cache the offsets once, examining all the other tuples using + * the same attribute descriptor will go much quicker. -cim 5/4/91 * ---------------- */ -char * +char * fastgetattr(HeapTuple tup, - int attnum, - TupleDesc tupleDesc, - bool *isnull) + int attnum, + TupleDesc tupleDesc, + bool * isnull) { - char *tp; /* ptr to att in tuple */ - bits8 *bp = NULL; /* ptr to att in tuple */ - int slow; /* do we have to walk nulls? */ - AttributeTupleForm *att = tupleDesc->attrs; - - /* ---------------- - * sanity checks - * ---------------- - */ - - Assert(attnum > 0); - - /* ---------------- - * Three cases: - * - * 1: No nulls and no variable length attributes. - * 2: Has a null or a varlena AFTER att. - * 3: Has nulls or varlenas BEFORE att. - * ---------------- - */ - - if (isnull) - *isnull = false; - - if (HeapTupleNoNulls(tup)) { - attnum--; - if (att[attnum]->attcacheoff > 0) { - return (char *) - fetchatt( &(att[attnum]), - (char *)tup + tup->t_hoff + att[attnum]->attcacheoff); - } else if (attnum == 0) { - /* - * first attribute is always at position zero - */ - return((char *) fetchatt(&(att[0]), (char *) tup + tup->t_hoff)); - } - - tp = (char *) tup + tup->t_hoff; - - slow = 0; - } else { - /* - * there's a null somewhere in the tuple - */ + char *tp; /* ptr to att in tuple */ + bits8 *bp = NULL; /* ptr to att in tuple */ + int slow; /* do we have to walk nulls? */ + AttributeTupleForm *att = tupleDesc->attrs; - bp = tup->t_bits; - tp = (char *) tup + tup->t_hoff; - slow = 0; - attnum--; - /* ---------------- - * check to see if desired att is null + * sanity checks * ---------------- */ - - if (att_isnull(attnum, bp)) { - if (isnull) - *isnull = true; - return NULL; - } + + Assert(attnum > 0); /* ---------------- - * Now check to see if any preceeding bits are null... + * Three cases: + * + * 1: No nulls and no variable length attributes. + * 2: Has a null or a varlena AFTER att. + * 3: Has nulls or varlenas BEFORE att. * ---------------- */ - + + if (isnull) + *isnull = false; + + if (HeapTupleNoNulls(tup)) { - register int i = 0; /* current offset in bp */ - - for (i = 0; i < attnum && !slow; i++) { - if (att_isnull(i, bp)) slow = 1; - } + attnum--; + if (att[attnum]->attcacheoff > 0) + { + return (char *) + fetchatt(&(att[attnum]), + (char *) tup + tup->t_hoff + att[attnum]->attcacheoff); + } + else if (attnum == 0) + { + + /* + * first attribute is always at position zero + */ + return ((char *) fetchatt(&(att[0]), (char *) tup + tup->t_hoff)); + } + + tp = (char *) tup + tup->t_hoff; + + slow = 0; } - } - - /* - * now check for any non-fixed length attrs before our attribute - */ - if (!slow) { - if (att[attnum]->attcacheoff > 0) { - return (char *) - fetchatt(&(att[attnum]), - tp + att[attnum]->attcacheoff); - } else if (attnum == 0) { - return (char *) - fetchatt(&(att[0]), (char *) tup + tup->t_hoff); - } else if (!HeapTupleAllFixed(tup)) { - register int j = 0; - - for (j = 0; j < attnum && !slow; j++) - if (att[j]->attlen < 1) slow = 1; + else + { + + /* + * there's a null somewhere in the tuple + */ + + bp = tup->t_bits; + tp = (char *) tup + tup->t_hoff; + slow = 0; + attnum--; + + /* ---------------- + * check to see if desired att is null + * ---------------- + */ + + if (att_isnull(attnum, bp)) + { + if (isnull) + *isnull = true; + return NULL; + } + + /* ---------------- + * Now check to see if any preceeding bits are null... + * ---------------- + */ + + { + register int i = 0; /* current offset in bp */ + + for (i = 0; i < attnum && !slow; i++) + { + if (att_isnull(i, bp)) + slow = 1; + } + } } - } - - /* - * if slow is zero, and we got here, we know that we have a tuple with - * no nulls. We also have to initialize the remainder of - * the attribute cached offset values. - */ - if (!slow) { - register int j = 1; - register long off; - + /* - * need to set cache for some atts + * now check for any non-fixed length attrs before our attribute */ - - att[0]->attcacheoff = 0; - - while (att[j]->attcacheoff > 0) j++; - - off = att[j-1]->attcacheoff + att[j-1]->attlen; - - for (; j < attnum + 1; j++) { - switch(att[j]->attlen) { - case -1: - off = (att[j]->attalign=='d') ? - DOUBLEALIGN(off) : INTALIGN(off); - break; - case sizeof(char): - break; - case sizeof(short): - off = SHORTALIGN(off); - break; - case sizeof(int32): - off = INTALIGN(off); - break; - default: - if (att[j]->attlen < sizeof(int32)) { - elog(WARN, - "fastgetattr: attribute %d has len %d", - j, att[j]->attlen); + if (!slow) + { + if (att[attnum]->attcacheoff > 0) + { + return (char *) + fetchatt(&(att[attnum]), + tp + att[attnum]->attcacheoff); + } + else if (attnum == 0) + { + return (char *) + fetchatt(&(att[0]), (char *) tup + tup->t_hoff); + } + else if (!HeapTupleAllFixed(tup)) + { + register int j = 0; + + for (j = 0; j < attnum && !slow; j++) + if (att[j]->attlen < 1) + slow = 1; } - if (att[j]->attalign == 'd') - off = DOUBLEALIGN(off); - else - off = LONGALIGN(off); - break; - } - - att[j]->attcacheoff = off; - off += att[j]->attlen; } - - return - (char *)fetchatt(&(att[attnum]), tp + att[attnum]->attcacheoff); - } else { - register bool usecache = true; - register int off = 0; - register int i; - + /* - * Now we know that we have to walk the tuple CAREFULLY. - * - * Note - This loop is a little tricky. On iteration i we - * first set the offset for attribute i and figure out how much - * the offset should be incremented. Finally, we need to align the - * offset based on the size of attribute i+1 (for which the offset - * has been computed). -mer 12 Dec 1991 + * if slow is zero, and we got here, we know that we have a tuple with + * no nulls. We also have to initialize the remainder of the + * attribute cached offset values. */ - - for (i = 0; i < attnum; i++) { - if (!HeapTupleNoNulls(tup)) { - if (att_isnull(i, bp)) { - usecache = false; - continue; - } - } - switch (att[i]->attlen) { - case -1: - off = (att[i]->attalign=='d') ? - DOUBLEALIGN(off) : INTALIGN(off); - break; - case sizeof(char): - break; - case sizeof(short): - off = SHORTALIGN(off); - break; - case sizeof(int32): - off = INTALIGN(off); - break; - default: - if (att[i]->attlen < sizeof(int32)) - elog(WARN, - "fastgetattr2: attribute %d has len %d", - i, att[i]->attlen); - if (att[i]->attalign == 'd') - off = DOUBLEALIGN(off); - else - off = LONGALIGN(off); - break; - } - if (usecache && att[i]->attcacheoff > 0) { - off = att[i]->attcacheoff; - if (att[i]->attlen == -1) { - usecache = false; + if (!slow) + { + register int j = 1; + register long off; + + /* + * need to set cache for some atts + */ + + att[0]->attcacheoff = 0; + + while (att[j]->attcacheoff > 0) + j++; + + off = att[j - 1]->attcacheoff + att[j - 1]->attlen; + + for (; j < attnum + 1; j++) + { + switch (att[j]->attlen) + { + case -1: + off = (att[j]->attalign == 'd') ? + DOUBLEALIGN(off) : INTALIGN(off); + break; + case sizeof(char): + break; + case sizeof(short): + off = SHORTALIGN(off); + break; + case sizeof(int32): + off = INTALIGN(off); + break; + default: + if (att[j]->attlen < sizeof(int32)) + { + elog(WARN, + "fastgetattr: attribute %d has len %d", + j, att[j]->attlen); + } + if (att[j]->attalign == 'd') + off = DOUBLEALIGN(off); + else + off = LONGALIGN(off); + break; + } + + att[j]->attcacheoff = off; + off += att[j]->attlen; } - } else { - if (usecache) att[i]->attcacheoff = off; - } - - switch(att[i]->attlen) { - case sizeof(char): - off++; - break; - case sizeof(int16): - off += sizeof(int16); - break; - case sizeof(int32): - off += sizeof(int32); - break; - case -1: - usecache = false; - off += VARSIZE(tp + off); - break; - default: - off += att[i]->attlen; - break; - } + + return + (char *) fetchatt(&(att[attnum]), tp + att[attnum]->attcacheoff); } - switch (att[attnum]->attlen) { - case -1: - off = (att[attnum]->attalign=='d')? - DOUBLEALIGN(off) : INTALIGN(off); - break; - case sizeof(char): - break; - case sizeof(short): - off = SHORTALIGN(off); - break; - case sizeof(int32): - off = INTALIGN(off); - break; - default: - if (att[attnum]->attlen < sizeof(int32)) - elog(WARN, "fastgetattr3: attribute %d has len %d", - attnum, att[attnum]->attlen); - if (att[attnum]->attalign == 'd') - off = DOUBLEALIGN(off); - else - off = LONGALIGN(off); - break; + else + { + register bool usecache = true; + register int off = 0; + register int i; + + /* + * Now we know that we have to walk the tuple CAREFULLY. + * + * Note - This loop is a little tricky. On iteration i we first set + * the offset for attribute i and figure out how much the offset + * should be incremented. Finally, we need to align the offset + * based on the size of attribute i+1 (for which the offset has + * been computed). -mer 12 Dec 1991 + */ + + for (i = 0; i < attnum; i++) + { + if (!HeapTupleNoNulls(tup)) + { + if (att_isnull(i, bp)) + { + usecache = false; + continue; + } + } + switch (att[i]->attlen) + { + case -1: + off = (att[i]->attalign == 'd') ? + DOUBLEALIGN(off) : INTALIGN(off); + break; + case sizeof(char): + break; + case sizeof(short): + off = SHORTALIGN(off); + break; + case sizeof(int32): + off = INTALIGN(off); + break; + default: + if (att[i]->attlen < sizeof(int32)) + elog(WARN, + "fastgetattr2: attribute %d has len %d", + i, att[i]->attlen); + if (att[i]->attalign == 'd') + off = DOUBLEALIGN(off); + else + off = LONGALIGN(off); + break; + } + if (usecache && att[i]->attcacheoff > 0) + { + off = att[i]->attcacheoff; + if (att[i]->attlen == -1) + { + usecache = false; + } + } + else + { + if (usecache) + att[i]->attcacheoff = off; + } + + switch (att[i]->attlen) + { + case sizeof(char): + off++; + break; + case sizeof(int16): + off += sizeof(int16); + break; + case sizeof(int32): + off += sizeof(int32); + break; + case -1: + usecache = false; + off += VARSIZE(tp + off); + break; + default: + off += att[i]->attlen; + break; + } + } + switch (att[attnum]->attlen) + { + case -1: + off = (att[attnum]->attalign == 'd') ? + DOUBLEALIGN(off) : INTALIGN(off); + break; + case sizeof(char): + break; + case sizeof(short): + off = SHORTALIGN(off); + break; + case sizeof(int32): + off = INTALIGN(off); + break; + default: + if (att[attnum]->attlen < sizeof(int32)) + elog(WARN, "fastgetattr3: attribute %d has len %d", + attnum, att[attnum]->attlen); + if (att[attnum]->attalign == 'd') + off = DOUBLEALIGN(off); + else + off = LONGALIGN(off); + break; + } + return ((char *) fetchatt(&(att[attnum]), tp + off)); } - return((char *) fetchatt(&(att[attnum]), tp + off)); - } } /* ---------------- - * heap_copytuple + * heap_copytuple * - * returns a copy of an entire tuple + * returns a copy of an entire tuple * ---------------- */ HeapTuple heap_copytuple(HeapTuple tuple) { - HeapTuple newTuple; + HeapTuple newTuple; - if (! HeapTupleIsValid(tuple)) - return (NULL); - - /* XXX For now, just prevent an undetectable executor related error */ - if (tuple->t_len > MAXTUPLEN) { - elog(WARN, "palloctup: cannot handle length %d tuples", - tuple->t_len); - } - - newTuple = (HeapTuple) palloc(tuple->t_len); - memmove((char *) newTuple, (char *) tuple, (int) tuple->t_len); - return(newTuple); + if (!HeapTupleIsValid(tuple)) + return (NULL); + + /* XXX For now, just prevent an undetectable executor related error */ + if (tuple->t_len > MAXTUPLEN) + { + elog(WARN, "palloctup: cannot handle length %d tuples", + tuple->t_len); + } + + newTuple = (HeapTuple) palloc(tuple->t_len); + memmove((char *) newTuple, (char *) tuple, (int) tuple->t_len); + return (newTuple); } #ifdef NOT_USED /* ---------------- - * heap_deformtuple + * heap_deformtuple * - * the inverse of heap_formtuple (see below) + * the inverse of heap_formtuple (see below) * ---------------- */ void heap_deformtuple(HeapTuple tuple, - TupleDesc tdesc, - Datum values[], - char nulls[]) + TupleDesc tdesc, + Datum values[], + char nulls[]) { - int i; - int natts; - - Assert(HeapTupleIsValid(tuple)); - - natts = tuple->t_natts; - for (i = 0; i<natts; i++) { - bool isnull; - - values[i] = (Datum)heap_getattr(tuple, - InvalidBuffer, - i+1, - tdesc, - &isnull); - if (isnull) - nulls[i] = 'n'; - else - nulls[i] = ' '; - } + int i; + int natts; + + Assert(HeapTupleIsValid(tuple)); + + natts = tuple->t_natts; + for (i = 0; i < natts; i++) + { + bool isnull; + + values[i] = (Datum) heap_getattr(tuple, + InvalidBuffer, + i + 1, + tdesc, + &isnull); + if (isnull) + nulls[i] = 'n'; + else + nulls[i] = ' '; + } } + #endif /* ---------------- - * heap_formtuple + * heap_formtuple * - * constructs a tuple from the given value[] and null[] arrays + * constructs a tuple from the given value[] and null[] arrays * * old comments - * Handles alignment by aligning 2 byte attributes on short boundries - * and 3 or 4 byte attributes on long word boundries on a vax; and - * aligning non-byte attributes on short boundries on a sun. Does - * not properly align fixed length arrays of 1 or 2 byte types (yet). + * Handles alignment by aligning 2 byte attributes on short boundries + * and 3 or 4 byte attributes on long word boundries on a vax; and + * aligning non-byte attributes on short boundries on a sun. Does + * not properly align fixed length arrays of 1 or 2 byte types (yet). * - * Null attributes are indicated by a 'n' in the appropriate byte - * of the null[]. Non-null attributes are indicated by a ' ' (space). + * Null attributes are indicated by a 'n' in the appropriate byte + * of the null[]. Non-null attributes are indicated by a ' ' (space). * - * Fix me. (Figure that must keep context if debug--allow give oid.) - * Assumes in order. + * Fix me. (Figure that must keep context if debug--allow give oid.) + * Assumes in order. * ---------------- */ HeapTuple heap_formtuple(TupleDesc tupleDescriptor, - Datum value[], - char nulls[]) + Datum value[], + char nulls[]) { - char *tp; /* tuple pointer */ - HeapTuple tuple; /* return tuple */ - int bitmaplen; - long len; - int hoff; - bool hasnull = false; - int i; - int numberOfAttributes = tupleDescriptor->natts; - - len = sizeof *tuple - sizeof tuple->t_bits; - - for (i = 0; i < numberOfAttributes && !hasnull; i++) { - if (nulls[i] != ' ') hasnull = true; - } - - if (numberOfAttributes > MaxHeapAttributeNumber) - elog(WARN, "heap_formtuple: numberOfAttributes of %d > %d", - numberOfAttributes, MaxHeapAttributeNumber); - - if (hasnull) { - bitmaplen = BITMAPLEN(numberOfAttributes); - len += bitmaplen; - } - - hoff = len = DOUBLEALIGN(len); /* be conservative here */ - - len += ComputeDataSize(tupleDescriptor, value, nulls); - - tp = (char *) palloc(len); - tuple = (HeapTuple) tp; - - memset(tp, 0, (int)len); - - tuple->t_len = len; - tuple->t_natts = numberOfAttributes; - tuple->t_hoff = hoff; - tuple->t_tmin = INVALID_ABSTIME; - tuple->t_tmax = CURRENT_ABSTIME; - - DataFill((char *)tuple + tuple->t_hoff, - tupleDescriptor, - value, - nulls, - &tuple->t_infomask, - (hasnull ? tuple->t_bits : NULL)); - - return (tuple); + char *tp; /* tuple pointer */ + HeapTuple tuple; /* return tuple */ + int bitmaplen; + long len; + int hoff; + bool hasnull = false; + int i; + int numberOfAttributes = tupleDescriptor->natts; + + len = sizeof *tuple - sizeof tuple->t_bits; + + for (i = 0; i < numberOfAttributes && !hasnull; i++) + { + if (nulls[i] != ' ') + hasnull = true; + } + + if (numberOfAttributes > MaxHeapAttributeNumber) + elog(WARN, "heap_formtuple: numberOfAttributes of %d > %d", + numberOfAttributes, MaxHeapAttributeNumber); + + if (hasnull) + { + bitmaplen = BITMAPLEN(numberOfAttributes); + len += bitmaplen; + } + + hoff = len = DOUBLEALIGN(len); /* be conservative here */ + + len += ComputeDataSize(tupleDescriptor, value, nulls); + + tp = (char *) palloc(len); + tuple = (HeapTuple) tp; + + memset(tp, 0, (int) len); + + tuple->t_len = len; + tuple->t_natts = numberOfAttributes; + tuple->t_hoff = hoff; + tuple->t_tmin = INVALID_ABSTIME; + tuple->t_tmax = CURRENT_ABSTIME; + + DataFill((char *) tuple + tuple->t_hoff, + tupleDescriptor, + value, + nulls, + &tuple->t_infomask, + (hasnull ? tuple->t_bits : NULL)); + + return (tuple); } /* ---------------- - * heap_modifytuple + * heap_modifytuple * - * forms a new tuple from an old tuple and a set of replacement values. + * forms a new tuple from an old tuple and a set of replacement values. * ---------------- */ HeapTuple heap_modifytuple(HeapTuple tuple, - Buffer buffer, - Relation relation, - Datum replValue[], - char replNull[], - char repl[]) + Buffer buffer, + Relation relation, + Datum replValue[], + char replNull[], + char repl[]) { - int attoff; - int numberOfAttributes; - Datum *value; - char *nulls; - bool isNull; - HeapTuple newTuple; - int madecopy; - uint8 infomask; - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(HeapTupleIsValid(tuple)); - Assert(BufferIsValid(buffer) || RelationIsValid(relation)); - Assert(HeapTupleIsValid(tuple)); - Assert(PointerIsValid(replValue)); - Assert(PointerIsValid(replNull)); - Assert(PointerIsValid(repl)); - - /* ---------------- - * if we're pointing to a disk page, then first - * make a copy of our tuple so that all the attributes - * are available. XXX this is inefficient -cim - * ---------------- - */ - madecopy = 0; - if (BufferIsValid(buffer) == true) { - relation = (Relation) BufferGetRelation(buffer); - tuple = heap_copytuple(tuple); - madecopy = 1; - } - - numberOfAttributes = RelationGetRelationTupleForm(relation)->relnatts; - - /* ---------------- - * allocate and fill value[] and nulls[] arrays from either - * the tuple or the repl information, as appropriate. - * ---------------- - */ - value = (Datum *) palloc(numberOfAttributes * sizeof *value); - nulls = (char *) palloc(numberOfAttributes * sizeof *nulls); - - for (attoff = 0; - attoff < numberOfAttributes; - attoff += 1) { - - if (repl[attoff] == ' ') { - char *attr; - - attr = - heap_getattr(tuple, - InvalidBuffer, - AttrOffsetGetAttrNumber(attoff), - RelationGetTupleDescriptor(relation), - &isNull) ; - value[attoff] = PointerGetDatum(attr); - nulls[attoff] = (isNull) ? 'n' : ' '; - - } else if (repl[attoff] != 'r') { - elog(WARN, "heap_modifytuple: repl is \\%3d", repl[attoff]); - - } else { /* == 'r' */ - value[attoff] = replValue[attoff]; - nulls[attoff] = replNull[attoff]; + int attoff; + int numberOfAttributes; + Datum *value; + char *nulls; + bool isNull; + HeapTuple newTuple; + int madecopy; + uint8 infomask; + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(HeapTupleIsValid(tuple)); + Assert(BufferIsValid(buffer) || RelationIsValid(relation)); + Assert(HeapTupleIsValid(tuple)); + Assert(PointerIsValid(replValue)); + Assert(PointerIsValid(replNull)); + Assert(PointerIsValid(repl)); + + /* ---------------- + * if we're pointing to a disk page, then first + * make a copy of our tuple so that all the attributes + * are available. XXX this is inefficient -cim + * ---------------- + */ + madecopy = 0; + if (BufferIsValid(buffer) == true) + { + relation = (Relation) BufferGetRelation(buffer); + tuple = heap_copytuple(tuple); + madecopy = 1; + } + + numberOfAttributes = RelationGetRelationTupleForm(relation)->relnatts; + + /* ---------------- + * allocate and fill value[] and nulls[] arrays from either + * the tuple or the repl information, as appropriate. + * ---------------- + */ + value = (Datum *) palloc(numberOfAttributes * sizeof *value); + nulls = (char *) palloc(numberOfAttributes * sizeof *nulls); + + for (attoff = 0; + attoff < numberOfAttributes; + attoff += 1) + { + + if (repl[attoff] == ' ') + { + char *attr; + + attr = + heap_getattr(tuple, + InvalidBuffer, + AttrOffsetGetAttrNumber(attoff), + RelationGetTupleDescriptor(relation), + &isNull); + value[attoff] = PointerGetDatum(attr); + nulls[attoff] = (isNull) ? 'n' : ' '; + + } + else if (repl[attoff] != 'r') + { + elog(WARN, "heap_modifytuple: repl is \\%3d", repl[attoff]); + + } + else + { /* == 'r' */ + value[attoff] = replValue[attoff]; + nulls[attoff] = replNull[attoff]; + } } - } - - /* ---------------- - * create a new tuple from the values[] and nulls[] arrays - * ---------------- - */ - newTuple = heap_formtuple(RelationGetTupleDescriptor(relation), - value, - nulls); - - /* ---------------- - * copy the header except for t_len, t_natts, t_hoff, t_bits, t_infomask - * ---------------- - */ - infomask = newTuple->t_infomask; - memmove((char *) &newTuple->t_ctid, /*XXX*/ - (char *) &tuple->t_ctid, - ((char *) &tuple->t_hoff - (char *) &tuple->t_ctid)); /*XXX*/ - newTuple->t_infomask = infomask; - newTuple->t_natts = numberOfAttributes; /* fix t_natts just in case */ - - /* ---------------- - * if we made a copy of the tuple, then free it. - * ---------------- - */ - if (madecopy) - pfree(tuple); - - return - newTuple; + + /* ---------------- + * create a new tuple from the values[] and nulls[] arrays + * ---------------- + */ + newTuple = heap_formtuple(RelationGetTupleDescriptor(relation), + value, + nulls); + + /* ---------------- + * copy the header except for t_len, t_natts, t_hoff, t_bits, t_infomask + * ---------------- + */ + infomask = newTuple->t_infomask; + memmove((char *) &newTuple->t_ctid, /* XXX */ + (char *) &tuple->t_ctid, + ((char *) &tuple->t_hoff - (char *) &tuple->t_ctid)); /* XXX */ + newTuple->t_infomask = infomask; + newTuple->t_natts = numberOfAttributes; /* fix t_natts just in + * case */ + + /* ---------------- + * if we made a copy of the tuple, then free it. + * ---------------- + */ + if (madecopy) + pfree(tuple); + + return + newTuple; } /* ---------------------------------------------------------------- - * other misc functions + * other misc functions * ---------------------------------------------------------------- */ HeapTuple heap_addheader(uint32 natts, /* max domain index */ - int structlen, /* its length */ - char *structure) /* pointer to the struct */ + int structlen, /* its length */ + char *structure) /* pointer to the struct */ { - register char *tp; /* tuple data pointer */ - HeapTuple tup; - long len; - int hoff; - - AssertArg(natts > 0); - - len = sizeof (HeapTupleData) - sizeof (tup->t_bits); - - hoff = len = DOUBLEALIGN(len); /* be conservative */ - len += structlen; - tp = (char *) palloc(len); - tup = (HeapTuple) tp; - memset((char*)tup, 0, len); - - tup->t_len = (short) len; /* XXX */ - tp += tup->t_hoff = hoff; - tup->t_natts = natts; - tup->t_infomask = 0; - - memmove(tp, structure, structlen); - - return (tup); + register char *tp; /* tuple data pointer */ + HeapTuple tup; + long len; + int hoff; + + AssertArg(natts > 0); + + len = sizeof(HeapTupleData) - sizeof(tup->t_bits); + + hoff = len = DOUBLEALIGN(len); /* be conservative */ + len += structlen; + tp = (char *) palloc(len); + tup = (HeapTuple) tp; + memset((char *) tup, 0, len); + + tup->t_len = (short) len; /* XXX */ + tp += tup->t_hoff = hoff; + tup->t_natts = natts; + tup->t_infomask = 0; + + memmove(tp, structure, structlen); + + return (tup); } diff --git a/src/backend/access/common/heapvalid.c b/src/backend/access/common/heapvalid.c index 186ee654b3..0caeb54e17 100644 --- a/src/backend/access/common/heapvalid.c +++ b/src/backend/access/common/heapvalid.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * heapvalid.c-- - * heap tuple qualification validity checking code + * heap tuple qualification validity checking code * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/Attic/heapvalid.c,v 1.16 1997/08/29 09:12:20 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/Attic/heapvalid.c,v 1.17 1997/09/07 04:37:36 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -25,128 +25,138 @@ #include <utils/builtins.h> /* ---------------- - * heap_keytest + * heap_keytest * - * Test a heap tuple with respect to a scan key. + * Test a heap tuple with respect to a scan key. * ---------------- */ bool heap_keytest(HeapTuple t, - TupleDesc tupdesc, - int nkeys, - ScanKey keys) + TupleDesc tupdesc, + int nkeys, + ScanKey keys) { - bool isnull; - Datum atp; - int test; - - for (; nkeys--; keys++) { - atp = (Datum)heap_getattr(t, InvalidBuffer, - keys->sk_attno, - tupdesc, - &isnull); - - if (isnull) - /* XXX eventually should check if SK_ISNULL */ - return false; - - if (keys->sk_flags & SK_ISNULL) { - return (false); + bool isnull; + Datum atp; + int test; + + for (; nkeys--; keys++) + { + atp = (Datum) heap_getattr(t, InvalidBuffer, + keys->sk_attno, + tupdesc, + &isnull); + + if (isnull) + /* XXX eventually should check if SK_ISNULL */ + return false; + + if (keys->sk_flags & SK_ISNULL) + { + return (false); + } + + if (keys->sk_func == (func_ptr) oideq) /* optimization */ + test = (keys->sk_argument == atp); + else if (keys->sk_flags & SK_COMMUTE) + test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure, + keys->sk_argument, atp); + else + test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure, + atp, keys->sk_argument); + + if (!test == !(keys->sk_flags & SK_NEGATE)) + return false; } - if (keys->sk_func == (func_ptr)oideq) /* optimization */ - test = (keys->sk_argument == atp); - else if (keys->sk_flags & SK_COMMUTE) - test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure, - keys->sk_argument, atp); - else - test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure, - atp, keys->sk_argument); - - if (!test == !(keys->sk_flags & SK_NEGATE)) - return false; - } - - return true; + return true; } /* ---------------- - * heap_tuple_satisfies + * heap_tuple_satisfies * - * Returns a valid HeapTuple if it satisfies the timequal and keytest. - * Returns NULL otherwise. Used to be heap_satisifies (sic) which - * returned a boolean. It now returns a tuple so that we can avoid doing two - * PageGetItem's per tuple. + * Returns a valid HeapTuple if it satisfies the timequal and keytest. + * Returns NULL otherwise. Used to be heap_satisifies (sic) which + * returned a boolean. It now returns a tuple so that we can avoid doing two + * PageGetItem's per tuple. * - * Complete check of validity including LP_CTUP and keytest. - * This should perhaps be combined with valid somehow in the - * future. (Also, additional rule tests/time range tests.) + * Complete check of validity including LP_CTUP and keytest. + * This should perhaps be combined with valid somehow in the + * future. (Also, additional rule tests/time range tests.) * - * on 8/21/92 mao says: i rearranged the tests here to do keytest before - * SatisfiesTimeQual. profiling indicated that even for vacuumed relations, - * time qual checking was more expensive than key testing. time qual is - * least likely to fail, too. we should really add the time qual test to - * the restriction and optimize it in the normal way. this has interactions - * with joey's expensive function work. + * on 8/21/92 mao says: i rearranged the tests here to do keytest before + * SatisfiesTimeQual. profiling indicated that even for vacuumed relations, + * time qual checking was more expensive than key testing. time qual is + * least likely to fail, too. we should really add the time qual test to + * the restriction and optimize it in the normal way. this has interactions + * with joey's expensive function work. * ---------------- */ HeapTuple heap_tuple_satisfies(ItemId itemId, - Relation relation, - Buffer buffer, - PageHeader disk_page, - TimeQual qual, - int nKeys, - ScanKey key) + Relation relation, + Buffer buffer, + PageHeader disk_page, + TimeQual qual, + int nKeys, + ScanKey key) { - HeapTuple tuple, result; - bool res; - TransactionId old_tmin, old_tmax; - - if (! ItemIdIsUsed(itemId)) - return NULL; - - tuple = (HeapTuple) PageGetItem((Page) disk_page, itemId); - - if (key != NULL) - res = heap_keytest(tuple, RelationGetTupleDescriptor(relation), - nKeys, key); - else - res = TRUE; - - result = (HeapTuple)NULL; - if (res) { - if(relation->rd_rel->relkind == RELKIND_UNCATALOGED) { - result = tuple; - } else { - old_tmin = tuple->t_tmin; - old_tmax = tuple->t_tmax; - res = HeapTupleSatisfiesTimeQual(tuple,qual); - if(tuple->t_tmin != old_tmin || - tuple->t_tmax != old_tmax) { - SetBufferCommitInfoNeedsSave(buffer); - } - if(res) { - result = tuple; - } + HeapTuple tuple, + result; + bool res; + TransactionId old_tmin, + old_tmax; + + if (!ItemIdIsUsed(itemId)) + return NULL; + + tuple = (HeapTuple) PageGetItem((Page) disk_page, itemId); + + if (key != NULL) + res = heap_keytest(tuple, RelationGetTupleDescriptor(relation), + nKeys, key); + else + res = TRUE; + + result = (HeapTuple) NULL; + if (res) + { + if (relation->rd_rel->relkind == RELKIND_UNCATALOGED) + { + result = tuple; + } + else + { + old_tmin = tuple->t_tmin; + old_tmax = tuple->t_tmax; + res = HeapTupleSatisfiesTimeQual(tuple, qual); + if (tuple->t_tmin != old_tmin || + tuple->t_tmax != old_tmax) + { + SetBufferCommitInfoNeedsSave(buffer); + } + if (res) + { + result = tuple; + } + } } - } - return result; + return result; } /* - * TupleUpdatedByCurXactAndCmd() -- Returns true if this tuple has - * already been updated once by the current transaction/command - * pair. + * TupleUpdatedByCurXactAndCmd() -- Returns true if this tuple has + * already been updated once by the current transaction/command + * pair. */ bool TupleUpdatedByCurXactAndCmd(HeapTuple t) { - if (TransactionIdEquals(t->t_xmax, - GetCurrentTransactionId()) && - CommandIdGEScanCommandId (t->t_cmax)) - return true; - - return false; + if (TransactionIdEquals(t->t_xmax, + GetCurrentTransactionId()) && + CommandIdGEScanCommandId(t->t_cmax)) + return true; + + return false; } diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c index a71fc46dc9..c133693801 100644 --- a/src/backend/access/common/indextuple.c +++ b/src/backend/access/common/indextuple.c @@ -1,14 +1,14 @@ /*------------------------------------------------------------------------- * * indextuple.c-- - * This file contains index tuple accessor and mutator routines, - * as well as a few various tuple utilities. + * This file contains index tuple accessor and mutator routines, + * as well as a few various tuple utilities. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/indextuple.c,v 1.15 1997/08/19 21:28:50 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/indextuple.c,v 1.16 1997/09/07 04:37:37 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -21,402 +21,438 @@ #include <access/tupmacs.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static Size IndexInfoFindDataOffset(unsigned short t_info); -static char *fastgetiattr(IndexTuple tup, int attnum, - TupleDesc att, bool *isnull); +static Size IndexInfoFindDataOffset(unsigned short t_info); +static char * +fastgetiattr(IndexTuple tup, int attnum, + TupleDesc att, bool * isnull); /* ---------------------------------------------------------------- - * index_ tuple interface routines + * index_ tuple interface routines * ---------------------------------------------------------------- */ /* ---------------- - * index_formtuple + * index_formtuple * ---------------- */ IndexTuple index_formtuple(TupleDesc tupleDescriptor, - Datum value[], - char null[]) + Datum value[], + char null[]) { - register char *tp; /* tuple pointer */ - IndexTuple tuple; /* return tuple */ - Size size, hoff; - int i; - unsigned short infomask = 0; - bool hasnull = false; - char tupmask = 0; - int numberOfAttributes = tupleDescriptor->natts; - - if (numberOfAttributes > MaxIndexAttributeNumber) - elog(WARN, "index_formtuple: numberOfAttributes of %d > %d", - numberOfAttributes, MaxIndexAttributeNumber); - - - for (i = 0; i < numberOfAttributes && !hasnull; i++) { - if (null[i] != ' ') hasnull = true; - } - - if (hasnull) infomask |= INDEX_NULL_MASK; - - hoff = IndexInfoFindDataOffset(infomask); - size = hoff - + ComputeDataSize(tupleDescriptor, - value, null); - size = DOUBLEALIGN(size); /* be conservative */ - - tp = (char *) palloc(size); - tuple = (IndexTuple) tp; - memset(tp,0,(int)size); - - DataFill((char *)tp + hoff, - tupleDescriptor, - value, - null, - &tupmask, - (hasnull ? (bits8*)tp + sizeof(*tuple) : NULL)); - - /* - * We do this because DataFill wants to initialize a "tupmask" which - * is used for HeapTuples, but we want an indextuple infomask. The only - * "relevent" info is the "has variable attributes" field, which is in - * mask position 0x02. We have already set the null mask above. - */ - - if (tupmask & 0x02) infomask |= INDEX_VAR_MASK; - - /* - * Here we make sure that we can actually hold the size. We also want - * to make sure that size is not aligned oddly. This actually is a - * rather odd way to make sure the size is not too large overall. - */ - - if (size & 0xE000) - elog(WARN, "index_formtuple: data takes %d bytes: too big", size); - - - infomask |= size; - - /* ---------------- - * initialize metadata - * ---------------- - */ - tuple->t_info = infomask; - return (tuple); + register char *tp; /* tuple pointer */ + IndexTuple tuple; /* return tuple */ + Size size, + hoff; + int i; + unsigned short infomask = 0; + bool hasnull = false; + char tupmask = 0; + int numberOfAttributes = tupleDescriptor->natts; + + if (numberOfAttributes > MaxIndexAttributeNumber) + elog(WARN, "index_formtuple: numberOfAttributes of %d > %d", + numberOfAttributes, MaxIndexAttributeNumber); + + + for (i = 0; i < numberOfAttributes && !hasnull; i++) + { + if (null[i] != ' ') + hasnull = true; + } + + if (hasnull) + infomask |= INDEX_NULL_MASK; + + hoff = IndexInfoFindDataOffset(infomask); + size = hoff + + ComputeDataSize(tupleDescriptor, + value, null); + size = DOUBLEALIGN(size); /* be conservative */ + + tp = (char *) palloc(size); + tuple = (IndexTuple) tp; + memset(tp, 0, (int) size); + + DataFill((char *) tp + hoff, + tupleDescriptor, + value, + null, + &tupmask, + (hasnull ? (bits8 *) tp + sizeof(*tuple) : NULL)); + + /* + * We do this because DataFill wants to initialize a "tupmask" which + * is used for HeapTuples, but we want an indextuple infomask. The + * only "relevent" info is the "has variable attributes" field, which + * is in mask position 0x02. We have already set the null mask above. + */ + + if (tupmask & 0x02) + infomask |= INDEX_VAR_MASK; + + /* + * Here we make sure that we can actually hold the size. We also want + * to make sure that size is not aligned oddly. This actually is a + * rather odd way to make sure the size is not too large overall. + */ + + if (size & 0xE000) + elog(WARN, "index_formtuple: data takes %d bytes: too big", size); + + + infomask |= size; + + /* ---------------- + * initialize metadata + * ---------------- + */ + tuple->t_info = infomask; + return (tuple); } /* ---------------- - * fastgetiattr + * fastgetiattr * - * This is a newer version of fastgetiattr which attempts to be - * faster by caching attribute offsets in the attribute descriptor. + * This is a newer version of fastgetiattr which attempts to be + * faster by caching attribute offsets in the attribute descriptor. * - * an alternate way to speed things up would be to cache offsets - * with the tuple, but that seems more difficult unless you take - * the storage hit of actually putting those offsets into the - * tuple you send to disk. Yuck. + * an alternate way to speed things up would be to cache offsets + * with the tuple, but that seems more difficult unless you take + * the storage hit of actually putting those offsets into the + * tuple you send to disk. Yuck. * - * This scheme will be slightly slower than that, but should - * preform well for queries which hit large #'s of tuples. After - * you cache the offsets once, examining all the other tuples using - * the same attribute descriptor will go much quicker. -cim 5/4/91 + * This scheme will be slightly slower than that, but should + * preform well for queries which hit large #'s of tuples. After + * you cache the offsets once, examining all the other tuples using + * the same attribute descriptor will go much quicker. -cim 5/4/91 * ---------------- */ -static char * +static char * fastgetiattr(IndexTuple tup, - int attnum, - TupleDesc tupleDesc, - bool *isnull) + int attnum, + TupleDesc tupleDesc, + bool * isnull) { - register char *tp; /* ptr to att in tuple */ - register char *bp = NULL; /* ptr to att in tuple */ - int slow; /* do we have to walk nulls? */ - register int data_off; /* tuple data offset */ - AttributeTupleForm *att = tupleDesc->attrs; - - /* ---------------- - * sanity checks - * ---------------- - */ - - Assert(PointerIsValid(isnull)); - Assert(attnum > 0); - - /* ---------------- - * Three cases: - * - * 1: No nulls and no variable length attributes. - * 2: Has a null or a varlena AFTER att. - * 3: Has nulls or varlenas BEFORE att. - * ---------------- - */ - - *isnull = false; - data_off = IndexTupleHasMinHeader(tup) ? sizeof *tup : - IndexInfoFindDataOffset(tup->t_info); - - if (IndexTupleNoNulls(tup)) { - - /* first attribute is always at position zero */ - - if (attnum == 1) { - return(fetchatt(&(att[0]), (char *) tup + data_off)); - } - attnum--; - - if (att[attnum]->attcacheoff > 0) { - return(fetchatt(&(att[attnum]), - (char *) tup + data_off + - att[attnum]->attcacheoff)); - } - - tp = (char *) tup + data_off; - - slow = 0; - }else { /* there's a null somewhere in the tuple */ - - bp = (char *) tup + sizeof(*tup); /* "knows" t_bits are here! */ - slow = 0; + register char *tp; /* ptr to att in tuple */ + register char *bp = NULL; /* ptr to att in tuple */ + int slow; /* do we have to walk nulls? */ + register int data_off; /* tuple data offset */ + AttributeTupleForm *att = tupleDesc->attrs; + /* ---------------- - * check to see if desired att is null + * sanity checks * ---------------- */ - - attnum--; - { - if (att_isnull(attnum, bp)) { - *isnull = true; - return NULL; - } - } + + Assert(PointerIsValid(isnull)); + Assert(attnum > 0); + /* ---------------- - * Now check to see if any preceeding bits are null... + * Three cases: + * + * 1: No nulls and no variable length attributes. + * 2: Has a null or a varlena AFTER att. + * 3: Has nulls or varlenas BEFORE att. * ---------------- */ + + *isnull = false; + data_off = IndexTupleHasMinHeader(tup) ? sizeof *tup : + IndexInfoFindDataOffset(tup->t_info); + + if (IndexTupleNoNulls(tup)) { - register int i = 0; /* current offset in bp */ - register int mask; /* bit in byte we're looking at */ - register char n; /* current byte in bp */ - register int byte, finalbit; - - byte = attnum >> 3; - finalbit = attnum & 0x07; - - for (; i <= byte; i++) { - n = bp[i]; - if (i < byte) { - /* check for nulls in any "earlier" bytes */ - if ((~n) != 0) { - slow++; - break; - } - } else { - /* check for nulls "before" final bit of last byte*/ - mask = (finalbit << 1) - 1; - if ((~n) & mask) - slow++; + + /* first attribute is always at position zero */ + + if (attnum == 1) + { + return (fetchatt(&(att[0]), (char *) tup + data_off)); + } + attnum--; + + if (att[attnum]->attcacheoff > 0) + { + return (fetchatt(&(att[attnum]), + (char *) tup + data_off + + att[attnum]->attcacheoff)); } - } + + tp = (char *) tup + data_off; + + slow = 0; } - tp = (char *) tup + data_off; - } - - /* now check for any non-fixed length attrs before our attribute */ - - if (!slow) { - if (att[attnum]->attcacheoff > 0) { - return(fetchatt(&(att[attnum]), - tp + att[attnum]->attcacheoff)); - }else if (!IndexTupleAllFixed(tup)) { - register int j = 0; - - for (j = 0; j < attnum && !slow; j++) - if (att[j]->attlen < 1) slow = 1; + else + { /* there's a null somewhere in the tuple */ + + bp = (char *) tup + sizeof(*tup); /* "knows" t_bits are + * here! */ + slow = 0; + /* ---------------- + * check to see if desired att is null + * ---------------- + */ + + attnum--; + { + if (att_isnull(attnum, bp)) + { + *isnull = true; + return NULL; + } + } + /* ---------------- + * Now check to see if any preceeding bits are null... + * ---------------- + */ + { + register int i = 0; /* current offset in bp */ + register int mask; /* bit in byte we're looking at */ + register char n; /* current byte in bp */ + register int byte, + finalbit; + + byte = attnum >> 3; + finalbit = attnum & 0x07; + + for (; i <= byte; i++) + { + n = bp[i]; + if (i < byte) + { + /* check for nulls in any "earlier" bytes */ + if ((~n) != 0) + { + slow++; + break; + } + } + else + { + /* check for nulls "before" final bit of last byte */ + mask = (finalbit << 1) - 1; + if ((~n) & mask) + slow++; + } + } + } + tp = (char *) tup + data_off; } - } - - /* - * if slow is zero, and we got here, we know that we have a tuple with - * no nulls. We also know that we have to initialize the remainder of - * the attribute cached offset values. - */ - - if (!slow) { - register int j = 1; - register long off; - - /* - * need to set cache for some atts - */ - - att[0]->attcacheoff = 0; - - while (att[j]->attcacheoff > 0) j++; - - off = att[j-1]->attcacheoff + - att[j-1]->attlen; - - for (; j < attnum + 1; j++) { - /* - * Fix me when going to a machine with more than a four-byte - * word! - */ - - switch(att[j]->attlen) + + /* now check for any non-fixed length attrs before our attribute */ + + if (!slow) + { + if (att[attnum]->attcacheoff > 0) { - case -1: - off = (att[j]->attalign=='d')? - DOUBLEALIGN(off):INTALIGN(off); - break; - case sizeof(char): - break; - case sizeof(short): - off = SHORTALIGN(off); - break; - case sizeof(int32): - off = INTALIGN(off); - break; - default: - if (att[j]->attlen > sizeof(int32)) - off = (att[j]->attalign=='d')? - DOUBLEALIGN(off) : LONGALIGN(off); - else - elog(WARN, "fastgetiattr: attribute %d has len %d", - j, att[j]->attlen); - break; - + return (fetchatt(&(att[attnum]), + tp + att[attnum]->attcacheoff)); + } + else if (!IndexTupleAllFixed(tup)) + { + register int j = 0; + + for (j = 0; j < attnum && !slow; j++) + if (att[j]->attlen < 1) + slow = 1; } - - att[j]->attcacheoff = off; - off += att[j]->attlen; } - - return(fetchatt( &(att[attnum]), - tp + att[attnum]->attcacheoff)); - }else { - register bool usecache = true; - register int off = 0; - register int i; - + /* - * Now we know that we have to walk the tuple CAREFULLY. + * if slow is zero, and we got here, we know that we have a tuple with + * no nulls. We also know that we have to initialize the remainder of + * the attribute cached offset values. */ - - for (i = 0; i < attnum; i++) { - if (!IndexTupleNoNulls(tup)) { - if (att_isnull(i, bp)) { - usecache = false; - continue; + + if (!slow) + { + register int j = 1; + register long off; + + /* + * need to set cache for some atts + */ + + att[0]->attcacheoff = 0; + + while (att[j]->attcacheoff > 0) + j++; + + off = att[j - 1]->attcacheoff + + att[j - 1]->attlen; + + for (; j < attnum + 1; j++) + { + + /* + * Fix me when going to a machine with more than a four-byte + * word! + */ + + switch (att[j]->attlen) + { + case -1: + off = (att[j]->attalign == 'd') ? + DOUBLEALIGN(off) : INTALIGN(off); + break; + case sizeof(char): + break; + case sizeof(short): + off = SHORTALIGN(off); + break; + case sizeof(int32): + off = INTALIGN(off); + break; + default: + if (att[j]->attlen > sizeof(int32)) + off = (att[j]->attalign == 'd') ? + DOUBLEALIGN(off) : LONGALIGN(off); + else + elog(WARN, "fastgetiattr: attribute %d has len %d", + j, att[j]->attlen); + break; + + } + + att[j]->attcacheoff = off; + off += att[j]->attlen; } - } - - if (usecache && att[i]->attcacheoff > 0) { - off = att[i]->attcacheoff; - if (att[i]->attlen == -1) - usecache = false; - else - continue; - } - - if (usecache) att[i]->attcacheoff = off; - switch(att[i]->attlen) + + return (fetchatt(&(att[attnum]), + tp + att[attnum]->attcacheoff)); + } + else + { + register bool usecache = true; + register int off = 0; + register int i; + + /* + * Now we know that we have to walk the tuple CAREFULLY. + */ + + for (i = 0; i < attnum; i++) { + if (!IndexTupleNoNulls(tup)) + { + if (att_isnull(i, bp)) + { + usecache = false; + continue; + } + } + + if (usecache && att[i]->attcacheoff > 0) + { + off = att[i]->attcacheoff; + if (att[i]->attlen == -1) + usecache = false; + else + continue; + } + + if (usecache) + att[i]->attcacheoff = off; + switch (att[i]->attlen) + { + case sizeof(char): + off++; + break; + case sizeof(short): + off = SHORTALIGN(off) +sizeof(short); + break; + case sizeof(int32): + off = INTALIGN(off) + sizeof(int32); + break; + case -1: + usecache = false; + off = (att[i]->attalign == 'd') ? + DOUBLEALIGN(off) : INTALIGN(off); + off += VARSIZE(tp + off); + break; + default: + if (att[i]->attlen > sizeof(int32)) + off = (att[i]->attalign == 'd') ? + DOUBLEALIGN(off) + att[i]->attlen : + LONGALIGN(off) + att[i]->attlen; + else + elog(WARN, "fastgetiattr2: attribute %d has len %d", + i, att[i]->attlen); + + break; + } + } + + /* + * I don't know why this code was missed here! I've got it from + * heaptuple.c:fastgetattr(). - vadim 06/12/97 + */ + switch (att[attnum]->attlen) + { + case -1: + off = (att[attnum]->attalign == 'd') ? + DOUBLEALIGN(off) : INTALIGN(off); + break; case sizeof(char): - off++; - break; + break; case sizeof(short): - off = SHORTALIGN(off) + sizeof(short); - break; + off = SHORTALIGN(off); + break; case sizeof(int32): - off = INTALIGN(off) + sizeof(int32); - break; - case -1: - usecache = false; - off = (att[i]->attalign=='d')? - DOUBLEALIGN(off):INTALIGN(off); - off += VARSIZE(tp + off); - break; + off = INTALIGN(off); + break; default: - if (att[i]->attlen > sizeof(int32)) - off = (att[i]->attalign=='d') ? - DOUBLEALIGN(off) + att[i]->attlen : - LONGALIGN(off) + att[i]->attlen; - else - elog(WARN, "fastgetiattr2: attribute %d has len %d", - i, att[i]->attlen); - - break; + if (att[attnum]->attlen < sizeof(int32)) + elog(WARN, "fastgetattr3: attribute %d has len %d", + attnum, att[attnum]->attlen); + if (att[attnum]->attalign == 'd') + off = DOUBLEALIGN(off); + else + off = LONGALIGN(off); + break; } + + return (fetchatt(&att[attnum], tp + off)); } - /* - * I don't know why this code was missed here! - * I've got it from heaptuple.c:fastgetattr(). - * - vadim 06/12/97 - */ - switch (att[attnum]->attlen) { - case -1: - off = (att[attnum]->attalign=='d')? - DOUBLEALIGN(off) : INTALIGN(off); - break; - case sizeof(char): - break; - case sizeof(short): - off = SHORTALIGN(off); - break; - case sizeof(int32): - off = INTALIGN(off); - break; - default: - if (att[attnum]->attlen < sizeof(int32)) - elog(WARN, "fastgetattr3: attribute %d has len %d", - attnum, att[attnum]->attlen); - if (att[attnum]->attalign == 'd') - off = DOUBLEALIGN(off); - else - off = LONGALIGN(off); - break; - } - - return(fetchatt(&att[attnum], tp + off)); - } } /* ---------------- - * index_getattr + * index_getattr * ---------------- */ Datum index_getattr(IndexTuple tuple, - AttrNumber attNum, - TupleDesc tupDesc, - bool *isNullOutP) + AttrNumber attNum, + TupleDesc tupDesc, + bool * isNullOutP) { - Assert (attNum > 0); + Assert(attNum > 0); - return (Datum) - fastgetiattr(tuple, attNum, tupDesc, isNullOutP); + return (Datum) + fastgetiattr(tuple, attNum, tupDesc, isNullOutP); } RetrieveIndexResult FormRetrieveIndexResult(ItemPointer indexItemPointer, - ItemPointer heapItemPointer) + ItemPointer heapItemPointer) { - RetrieveIndexResult result; - - Assert(ItemPointerIsValid(indexItemPointer)); - Assert(ItemPointerIsValid(heapItemPointer)); - - result = (RetrieveIndexResult) palloc(sizeof *result); - - result->index_iptr = *indexItemPointer; - result->heap_iptr = *heapItemPointer; - - return (result); + RetrieveIndexResult result; + + Assert(ItemPointerIsValid(indexItemPointer)); + Assert(ItemPointerIsValid(heapItemPointer)); + + result = (RetrieveIndexResult) palloc(sizeof *result); + + result->index_iptr = *indexItemPointer; + result->heap_iptr = *heapItemPointer; + + return (result); } /* @@ -425,19 +461,21 @@ FormRetrieveIndexResult(ItemPointer indexItemPointer, * * Change me if adding an attribute to IndexTuples!!!!!!!!!!! */ -static Size +static Size IndexInfoFindDataOffset(unsigned short t_info) { - if (!(t_info & INDEX_NULL_MASK)) - return((Size) sizeof(IndexTupleData)); - else { - Size size = sizeof(IndexTupleData); - - if (t_info & INDEX_NULL_MASK) { - size += sizeof(IndexAttributeBitMapData); + if (!(t_info & INDEX_NULL_MASK)) + return ((Size) sizeof(IndexTupleData)); + else + { + Size size = sizeof(IndexTupleData); + + if (t_info & INDEX_NULL_MASK) + { + size += sizeof(IndexAttributeBitMapData); + } + return DOUBLEALIGN(size); /* be conservative */ } - return DOUBLEALIGN(size); /* be conservative */ - } } /* @@ -445,17 +483,17 @@ IndexInfoFindDataOffset(unsigned short t_info) * we assume we have space that is already palloc'ed. */ void -CopyIndexTuple(IndexTuple source, IndexTuple *target) +CopyIndexTuple(IndexTuple source, IndexTuple * target) { - Size size; - IndexTuple ret; - - size = IndexTupleSize(source); - if (*target == NULL) { - *target = (IndexTuple) palloc(size); - } - - ret = *target; - memmove((char*)ret, (char*)source, size); -} + Size size; + IndexTuple ret; + + size = IndexTupleSize(source); + if (*target == NULL) + { + *target = (IndexTuple) palloc(size); + } + ret = *target; + memmove((char *) ret, (char *) source, size); +} diff --git a/src/backend/access/common/indexvalid.c b/src/backend/access/common/indexvalid.c index aff9af42f8..9f8501beb2 100644 --- a/src/backend/access/common/indexvalid.c +++ b/src/backend/access/common/indexvalid.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * indexvalid.c-- - * index tuple qualification validity checking code + * index tuple qualification validity checking code * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/Attic/indexvalid.c,v 1.14 1997/03/18 18:38:19 scrappy Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/Attic/indexvalid.c,v 1.15 1997/09/07 04:37:38 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -21,64 +21,70 @@ #include <executor/execdebug.h> /* ---------------------------------------------------------------- - * index scan key qualification code + * index scan key qualification code * ---------------------------------------------------------------- */ -int NIndexTupleProcessed; +int NIndexTupleProcessed; /* ---------------- - * index_keytest + * index_keytest * * old comments - * May eventually combine with other tests (like timeranges)? - * Should have Buffer buffer; as an argument and pass it to amgetattr. + * May eventually combine with other tests (like timeranges)? + * Should have Buffer buffer; as an argument and pass it to amgetattr. * ---------------- */ bool index_keytest(IndexTuple tuple, - TupleDesc tupdesc, - int scanKeySize, - ScanKey key) + TupleDesc tupdesc, + int scanKeySize, + ScanKey key) { - bool isNull; - Datum datum; - int test; - - IncrIndexProcessed(); - - while (scanKeySize > 0) { - datum = index_getattr(tuple, - key[0].sk_attno, - tupdesc, - &isNull); - - if (isNull) { - /* XXX eventually should check if SK_ISNULL */ - return (false); - } - - if (key[0].sk_flags & SK_ISNULL) { - return (false); - } + bool isNull; + Datum datum; + int test; - if (key[0].sk_flags & SK_COMMUTE) { - test = (*(key[0].sk_func)) - (DatumGetPointer(key[0].sk_argument), - datum) ? 1 : 0; - } else { - test = (*(key[0].sk_func)) - (datum, - DatumGetPointer(key[0].sk_argument)) ? 1 : 0; - } - - if (!test == !(key[0].sk_flags & SK_NEGATE)) { - return (false); + IncrIndexProcessed(); + + while (scanKeySize > 0) + { + datum = index_getattr(tuple, + key[0].sk_attno, + tupdesc, + &isNull); + + if (isNull) + { + /* XXX eventually should check if SK_ISNULL */ + return (false); + } + + if (key[0].sk_flags & SK_ISNULL) + { + return (false); + } + + if (key[0].sk_flags & SK_COMMUTE) + { + test = (*(key[0].sk_func)) + (DatumGetPointer(key[0].sk_argument), + datum) ? 1 : 0; + } + else + { + test = (*(key[0].sk_func)) + (datum, + DatumGetPointer(key[0].sk_argument)) ? 1 : 0; + } + + if (!test == !(key[0].sk_flags & SK_NEGATE)) + { + return (false); + } + + scanKeySize -= 1; + key++; } - - scanKeySize -= 1; - key++; - } - - return (true); -} + return (true); +} diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c index 98fbddc639..599ac59a45 100644 --- a/src/backend/access/common/printtup.c +++ b/src/backend/access/common/printtup.c @@ -1,14 +1,14 @@ /*------------------------------------------------------------------------- * * printtup.c-- - * Routines to print out tuples to the destination (binary or non-binary - * portals, frontend/interactive backend, etc.). + * Routines to print out tuples to the destination (binary or non-binary + * portals, frontend/interactive backend, etc.). * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/printtup.c,v 1.15 1997/08/26 23:31:23 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/printtup.c,v 1.16 1997/09/07 04:37:39 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -16,279 +16,304 @@ #include <string.h> #include <postgres.h> -#include <fmgr.h> -#include <access/heapam.h> -#include <access/printtup.h> +#include <fmgr.h> +#include <access/heapam.h> +#include <access/printtup.h> #include <catalog/pg_type.h> #include <libpq/libpq.h> #include <utils/syscache.h> /* ---------------------------------------------------------------- - * printtup / debugtup support + * printtup / debugtup support * ---------------------------------------------------------------- */ /* ---------------- - * typtoout - used by printtup and debugtup + * typtoout - used by printtup and debugtup * ---------------- */ Oid typtoout(Oid type) { - HeapTuple typeTuple; - - typeTuple = SearchSysCacheTuple(TYPOID, - ObjectIdGetDatum(type), - 0, 0, 0); - - if (HeapTupleIsValid(typeTuple)) - return((Oid) - ((TypeTupleForm) GETSTRUCT(typeTuple))->typoutput); - - elog(WARN, "typtoout: Cache lookup of type %d failed", type); - return(InvalidOid); + HeapTuple typeTuple; + + typeTuple = SearchSysCacheTuple(TYPOID, + ObjectIdGetDatum(type), + 0, 0, 0); + + if (HeapTupleIsValid(typeTuple)) + return ((Oid) + ((TypeTupleForm) GETSTRUCT(typeTuple))->typoutput); + + elog(WARN, "typtoout: Cache lookup of type %d failed", type); + return (InvalidOid); } Oid gettypelem(Oid type) { - HeapTuple typeTuple; - - typeTuple = SearchSysCacheTuple(TYPOID, - ObjectIdGetDatum(type), - 0,0,0); - - if (HeapTupleIsValid(typeTuple)) - return((Oid) - ((TypeTupleForm) GETSTRUCT(typeTuple))->typelem); - - elog(WARN, "typtoout: Cache lookup of type %d failed", type); - return(InvalidOid); + HeapTuple typeTuple; + + typeTuple = SearchSysCacheTuple(TYPOID, + ObjectIdGetDatum(type), + 0, 0, 0); + + if (HeapTupleIsValid(typeTuple)) + return ((Oid) + ((TypeTupleForm) GETSTRUCT(typeTuple))->typelem); + + elog(WARN, "typtoout: Cache lookup of type %d failed", type); + return (InvalidOid); } /* ---------------- - * printtup + * printtup * ---------------- */ void printtup(HeapTuple tuple, TupleDesc typeinfo) { - int i, j, k; - char *outputstr, *attr; - bool isnull; - Oid typoutput; - - /* ---------------- - * tell the frontend to expect new tuple data - * ---------------- - */ - pq_putnchar("D", 1); - - /* ---------------- - * send a bitmap of which attributes are null - * ---------------- - */ - j = 0; - k = 1 << 7; - for (i = 0; i < tuple->t_natts; ) { - i++; /* heap_getattr is a macro, so no increment */ - attr = heap_getattr(tuple, InvalidBuffer, i, typeinfo, &isnull); - if (!isnull) - j |= k; - k >>= 1; - if (!(i & 7)) { - pq_putint(j, 1); - j = 0; - k = 1 << 7; + int i, + j, + k; + char *outputstr, + *attr; + bool isnull; + Oid typoutput; + + /* ---------------- + * tell the frontend to expect new tuple data + * ---------------- + */ + pq_putnchar("D", 1); + + /* ---------------- + * send a bitmap of which attributes are null + * ---------------- + */ + j = 0; + k = 1 << 7; + for (i = 0; i < tuple->t_natts;) + { + i++; /* heap_getattr is a macro, so no + * increment */ + attr = heap_getattr(tuple, InvalidBuffer, i, typeinfo, &isnull); + if (!isnull) + j |= k; + k >>= 1; + if (!(i & 7)) + { + pq_putint(j, 1); + j = 0; + k = 1 << 7; + } } - } - if (i & 7) - pq_putint(j, 1); - - /* ---------------- - * send the attributes of this tuple - * ---------------- - */ - for (i = 0; i < tuple->t_natts; ++i) { - attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull); - typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid); - - if (!isnull && OidIsValid(typoutput)) { - outputstr = fmgr(typoutput, attr, - gettypelem(typeinfo->attrs[i]->atttypid)); - pq_putint(strlen(outputstr)+4, 4); - pq_putnchar(outputstr, strlen(outputstr)); - pfree(outputstr); + if (i & 7) + pq_putint(j, 1); + + /* ---------------- + * send the attributes of this tuple + * ---------------- + */ + for (i = 0; i < tuple->t_natts; ++i) + { + attr = heap_getattr(tuple, InvalidBuffer, i + 1, typeinfo, &isnull); + typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid); + + if (!isnull && OidIsValid(typoutput)) + { + outputstr = fmgr(typoutput, attr, + gettypelem(typeinfo->attrs[i]->atttypid)); + pq_putint(strlen(outputstr) + 4, 4); + pq_putnchar(outputstr, strlen(outputstr)); + pfree(outputstr); + } } - } } /* ---------------- - * printatt + * printatt * ---------------- */ static void printatt(unsigned attributeId, - AttributeTupleForm attributeP, - char *value) + AttributeTupleForm attributeP, + char *value) { - printf("\t%2d: %s%s%s%s\t(typeid = %u, len = %d, byval = %c)\n", - attributeId, - attributeP->attname.data, - value != NULL ? " = \"" : "", - value != NULL ? value : "", - value != NULL ? "\"" : "", - (unsigned int) (attributeP->atttypid), - attributeP->attlen, - attributeP->attbyval ? 't' : 'f'); + printf("\t%2d: %s%s%s%s\t(typeid = %u, len = %d, byval = %c)\n", + attributeId, + attributeP->attname.data, + value != NULL ? " = \"" : "", + value != NULL ? value : "", + value != NULL ? "\"" : "", + (unsigned int) (attributeP->atttypid), + attributeP->attlen, + attributeP->attbyval ? 't' : 'f'); } /* ---------------- - * showatts + * showatts * ---------------- */ void showatts(char *name, TupleDesc tupleDesc) { - int i; - int natts = tupleDesc->natts; - AttributeTupleForm *attinfo = tupleDesc->attrs; + int i; + int natts = tupleDesc->natts; + AttributeTupleForm *attinfo = tupleDesc->attrs; - puts(name); - for (i = 0; i < natts; ++i) - printatt((unsigned) i+1, attinfo[i], (char *) NULL); - printf("\t----\n"); + puts(name); + for (i = 0; i < natts; ++i) + printatt((unsigned) i + 1, attinfo[i], (char *) NULL); + printf("\t----\n"); } /* ---------------- - * debugtup + * debugtup * ---------------- */ void debugtup(HeapTuple tuple, TupleDesc typeinfo) { - register int i; - char *attr, *value; - bool isnull; - Oid typoutput; - - for (i = 0; i < tuple->t_natts; ++i) { - attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull); - typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid); - - if (!isnull && OidIsValid(typoutput)) { - value = fmgr(typoutput, attr, - gettypelem(typeinfo->attrs[i]->atttypid)); - printatt((unsigned) i+1, typeinfo->attrs[i], value); - pfree(value); + register int i; + char *attr, + *value; + bool isnull; + Oid typoutput; + + for (i = 0; i < tuple->t_natts; ++i) + { + attr = heap_getattr(tuple, InvalidBuffer, i + 1, typeinfo, &isnull); + typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid); + + if (!isnull && OidIsValid(typoutput)) + { + value = fmgr(typoutput, attr, + gettypelem(typeinfo->attrs[i]->atttypid)); + printatt((unsigned) i + 1, typeinfo->attrs[i], value); + pfree(value); + } } - } - printf("\t----\n"); + printf("\t----\n"); } /* ---------------- - * printtup_internal - * Protocol expects either T, D, C, E, or N. - * We use a different data prefix, e.g. 'B' instead of 'D' to - * indicate a tuple in internal (binary) form. + * printtup_internal + * Protocol expects either T, D, C, E, or N. + * We use a different data prefix, e.g. 'B' instead of 'D' to + * indicate a tuple in internal (binary) form. * - * This is same as printtup, except we don't use the typout func. + * This is same as printtup, except we don't use the typout func. * ---------------- */ void printtup_internal(HeapTuple tuple, TupleDesc typeinfo) { - int i, j, k; - char *attr; - bool isnull; - - /* ---------------- - * tell the frontend to expect new tuple data - * ---------------- - */ - pq_putnchar("B", 1); - - /* ---------------- - * send a bitmap of which attributes are null - * ---------------- - */ - j = 0; - k = 1 << 7; - for (i = 0; i < tuple->t_natts; ) { - i++; /* heap_getattr is a macro, so no increment */ - attr = heap_getattr(tuple, InvalidBuffer, i, typeinfo, &isnull); - if (!isnull) - j |= k; - k >>= 1; - if (!(i & 7)) { - pq_putint(j, 1); - j = 0; - k = 1 << 7; + int i, + j, + k; + char *attr; + bool isnull; + + /* ---------------- + * tell the frontend to expect new tuple data + * ---------------- + */ + pq_putnchar("B", 1); + + /* ---------------- + * send a bitmap of which attributes are null + * ---------------- + */ + j = 0; + k = 1 << 7; + for (i = 0; i < tuple->t_natts;) + { + i++; /* heap_getattr is a macro, so no + * increment */ + attr = heap_getattr(tuple, InvalidBuffer, i, typeinfo, &isnull); + if (!isnull) + j |= k; + k >>= 1; + if (!(i & 7)) + { + pq_putint(j, 1); + j = 0; + k = 1 << 7; + } } - } - if (i & 7) - pq_putint(j, 1); - - /* ---------------- - * send the attributes of this tuple - * ---------------- - */ + if (i & 7) + pq_putint(j, 1); + + /* ---------------- + * send the attributes of this tuple + * ---------------- + */ #ifdef IPORTAL_DEBUG - fprintf(stderr, "sending tuple with %d atts\n", tuple->t_natts); + fprintf(stderr, "sending tuple with %d atts\n", tuple->t_natts); #endif - for (i = 0; i < tuple->t_natts; ++i) { - int32 len = typeinfo->attrs[i]->attlen; - - attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull); - if (!isnull) { - /* # of bytes, and opaque data */ - if (len == -1) { - /* variable length, assume a varlena structure */ - len = VARSIZE(attr) - VARHDRSZ; - - pq_putint(len, sizeof(int32)); - pq_putnchar(VARDATA(attr), len); -#ifdef IPORTAL_DEBUG + for (i = 0; i < tuple->t_natts; ++i) + { + int32 len = typeinfo->attrs[i]->attlen; + + attr = heap_getattr(tuple, InvalidBuffer, i + 1, typeinfo, &isnull); + if (!isnull) { - char *d = VARDATA(attr); - - fprintf(stderr, "length %d data %x%x%x%x\n", - len, *d, *(d+1), *(d+2), *(d+3)); - } + /* # of bytes, and opaque data */ + if (len == -1) + { + /* variable length, assume a varlena structure */ + len = VARSIZE(attr) - VARHDRSZ; + + pq_putint(len, sizeof(int32)); + pq_putnchar(VARDATA(attr), len); +#ifdef IPORTAL_DEBUG + { + char *d = VARDATA(attr); + + fprintf(stderr, "length %d data %x%x%x%x\n", + len, *d, *(d + 1), *(d + 2), *(d + 3)); + } #endif - } else { - /* fixed size */ - if (typeinfo->attrs[i]->attbyval) { - int8 i8; - int16 i16; - int32 i32; - - pq_putint(len, sizeof(int32)); - switch (len) { - case sizeof(int8): - i8 = DatumGetChar(attr); - pq_putnchar((char *) &i8, len); - break; - case sizeof(int16): - i16 = DatumGetInt16(attr); - pq_putnchar((char *) &i16, len); - break; - case sizeof(int32): - i32 = DatumGetInt32(attr); - pq_putnchar((char *) &i32, len); - break; - } + } + else + { + /* fixed size */ + if (typeinfo->attrs[i]->attbyval) + { + int8 i8; + int16 i16; + int32 i32; + + pq_putint(len, sizeof(int32)); + switch (len) + { + case sizeof(int8): + i8 = DatumGetChar(attr); + pq_putnchar((char *) &i8, len); + break; + case sizeof(int16): + i16 = DatumGetInt16(attr); + pq_putnchar((char *) &i16, len); + break; + case sizeof(int32): + i32 = DatumGetInt32(attr); + pq_putnchar((char *) &i32, len); + break; + } #ifdef IPORTAL_DEBUG - fprintf(stderr, "byval length %d data %d\n", len, attr); + fprintf(stderr, "byval length %d data %d\n", len, attr); #endif - } else { - pq_putint(len, sizeof(int32)); - pq_putnchar(attr, len); + } + else + { + pq_putint(len, sizeof(int32)); + pq_putnchar(attr, len); #ifdef IPORTAL_DEBUG - fprintf(stderr, "byref length %d data %x\n", len, attr); + fprintf(stderr, "byref length %d data %x\n", len, attr); #endif + } + } } - } } - } } diff --git a/src/backend/access/common/scankey.c b/src/backend/access/common/scankey.c index fb242497eb..9fbe264ae5 100644 --- a/src/backend/access/common/scankey.c +++ b/src/backend/access/common/scankey.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * scan.c-- - * scan direction and key code + * scan direction and key code * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/scankey.c,v 1.9 1996/11/05 07:42:45 scrappy Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/scankey.c,v 1.10 1997/09/07 04:37:39 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -19,49 +19,49 @@ /* * ScanKeyEntryIsLegal -- - * True iff the scan key entry is legal. + * True iff the scan key entry is legal. */ #define ScanKeyEntryIsLegal(entry) \ - ((bool) (AssertMacro(PointerIsValid(entry)) && \ - AttributeNumberIsValid(entry->sk_attno))) + ((bool) (AssertMacro(PointerIsValid(entry)) && \ + AttributeNumberIsValid(entry->sk_attno))) /* * ScanKeyEntrySetIllegal -- - * Marks a scan key entry as illegal. + * Marks a scan key entry as illegal. */ void ScanKeyEntrySetIllegal(ScanKey entry) { - Assert(PointerIsValid(entry)); - - entry->sk_flags = 0; /* just in case... */ - entry->sk_attno = InvalidAttrNumber; - entry->sk_procedure = 0; /* should be InvalidRegProcedure */ + Assert(PointerIsValid(entry)); + + entry->sk_flags = 0; /* just in case... */ + entry->sk_attno = InvalidAttrNumber; + entry->sk_procedure = 0; /* should be InvalidRegProcedure */ } /* * ScanKeyEntryInitialize -- - * Initializes an scan key entry. + * Initializes an scan key entry. * * Note: - * Assumes the scan key entry is valid. - * Assumes the intialized scan key entry will be legal. + * Assumes the scan key entry is valid. + * Assumes the intialized scan key entry will be legal. */ void ScanKeyEntryInitialize(ScanKey entry, - bits16 flags, - AttrNumber attributeNumber, - RegProcedure procedure, - Datum argument) + bits16 flags, + AttrNumber attributeNumber, + RegProcedure procedure, + Datum argument) { - Assert(PointerIsValid(entry)); - - entry->sk_flags = flags; - entry->sk_attno = attributeNumber; - entry->sk_procedure = procedure; - entry->sk_argument = argument; - fmgr_info(procedure, &entry->sk_func, &entry->sk_nargs); - - Assert(ScanKeyEntryIsLegal(entry)); + Assert(PointerIsValid(entry)); + + entry->sk_flags = flags; + entry->sk_attno = attributeNumber; + entry->sk_procedure = procedure; + entry->sk_argument = argument; + fmgr_info(procedure, &entry->sk_func, &entry->sk_nargs); + + Assert(ScanKeyEntryIsLegal(entry)); } diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index a38a5229f2..e616702a8e 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -1,17 +1,17 @@ /*------------------------------------------------------------------------- * * tupdesc.c-- - * POSTGRES tuple descriptor support code + * POSTGRES tuple descriptor support code * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.19 1997/08/22 02:55:39 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.20 1997/09/07 04:37:41 momjian Exp $ * * NOTES - * some of the executor utility code such as "ExecTypeFromTL" should be - * moved here. + * some of the executor utility code such as "ExecTypeFromTL" should be + * moved here. * *------------------------------------------------------------------------- */ @@ -28,518 +28,534 @@ #include <utils/syscache.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif /* ---------------------------------------------------------------- - * CreateTemplateTupleDesc + * CreateTemplateTupleDesc * - * This function allocates and zeros a tuple descriptor structure. + * This function allocates and zeros a tuple descriptor structure. * ---------------------------------------------------------------- */ TupleDesc CreateTemplateTupleDesc(int natts) { - uint32 size; - TupleDesc desc; - - /* ---------------- - * sanity checks - * ---------------- - */ - AssertArg(natts >= 1); - - /* ---------------- - * allocate enough memory for the tuple descriptor and - * zero it as TupleDescInitEntry assumes that the descriptor - * is filled with NULL pointers. - * ---------------- - */ - size = natts * sizeof (AttributeTupleForm); - desc = (TupleDesc) palloc(sizeof(struct tupleDesc)); - desc->attrs = (AttributeTupleForm*) palloc(size); - desc->constr = NULL; - memset(desc->attrs, 0, size); - - desc->natts = natts; - - return (desc); + uint32 size; + TupleDesc desc; + + /* ---------------- + * sanity checks + * ---------------- + */ + AssertArg(natts >= 1); + + /* ---------------- + * allocate enough memory for the tuple descriptor and + * zero it as TupleDescInitEntry assumes that the descriptor + * is filled with NULL pointers. + * ---------------- + */ + size = natts * sizeof(AttributeTupleForm); + desc = (TupleDesc) palloc(sizeof(struct tupleDesc)); + desc->attrs = (AttributeTupleForm *) palloc(size); + desc->constr = NULL; + memset(desc->attrs, 0, size); + + desc->natts = natts; + + return (desc); } /* ---------------------------------------------------------------- - * CreateTupleDesc + * CreateTupleDesc * - * This function allocates a new TupleDesc from AttributeTupleForm array + * This function allocates a new TupleDesc from AttributeTupleForm array * ---------------------------------------------------------------- */ TupleDesc -CreateTupleDesc(int natts, AttributeTupleForm* attrs) +CreateTupleDesc(int natts, AttributeTupleForm * attrs) { - TupleDesc desc; - - /* ---------------- - * sanity checks - * ---------------- - */ - AssertArg(natts >= 1); - - desc = (TupleDesc) palloc(sizeof(struct tupleDesc)); - desc->attrs = attrs; - desc->natts = natts; - desc->constr = NULL; - - return (desc); + TupleDesc desc; + + /* ---------------- + * sanity checks + * ---------------- + */ + AssertArg(natts >= 1); + + desc = (TupleDesc) palloc(sizeof(struct tupleDesc)); + desc->attrs = attrs; + desc->natts = natts; + desc->constr = NULL; + + return (desc); } /* ---------------------------------------------------------------- - * CreateTupleDescCopy + * CreateTupleDescCopy * - * This function creates a new TupleDesc by copying from an existing - * TupleDesc - * - * !!! Constraints are not copied !!! + * This function creates a new TupleDesc by copying from an existing + * TupleDesc + * + * !!! Constraints are not copied !!! * ---------------------------------------------------------------- */ TupleDesc CreateTupleDescCopy(TupleDesc tupdesc) { - TupleDesc desc; - int i, size; - - desc = (TupleDesc) palloc(sizeof(struct tupleDesc)); - desc->natts = tupdesc->natts; - size = desc->natts * sizeof (AttributeTupleForm); - desc->attrs = (AttributeTupleForm*) palloc(size); - for (i=0;i<desc->natts;i++) { - desc->attrs[i] = - (AttributeTupleForm)palloc(ATTRIBUTE_TUPLE_SIZE); - memmove(desc->attrs[i], - tupdesc->attrs[i], - ATTRIBUTE_TUPLE_SIZE); - desc->attrs[i]->attnotnull = false; - desc->attrs[i]->atthasdef = false; - } - desc->constr = NULL; - - return desc; + TupleDesc desc; + int i, + size; + + desc = (TupleDesc) palloc(sizeof(struct tupleDesc)); + desc->natts = tupdesc->natts; + size = desc->natts * sizeof(AttributeTupleForm); + desc->attrs = (AttributeTupleForm *) palloc(size); + for (i = 0; i < desc->natts; i++) + { + desc->attrs[i] = + (AttributeTupleForm) palloc(ATTRIBUTE_TUPLE_SIZE); + memmove(desc->attrs[i], + tupdesc->attrs[i], + ATTRIBUTE_TUPLE_SIZE); + desc->attrs[i]->attnotnull = false; + desc->attrs[i]->atthasdef = false; + } + desc->constr = NULL; + + return desc; } /* ---------------------------------------------------------------- - * CreateTupleDescCopyConstr + * CreateTupleDescCopyConstr + * + * This function creates a new TupleDesc by copying from an existing + * TupleDesc (with Constraints) * - * This function creates a new TupleDesc by copying from an existing - * TupleDesc (with Constraints) - * * ---------------------------------------------------------------- */ TupleDesc CreateTupleDescCopyConstr(TupleDesc tupdesc) { - TupleDesc desc; - TupleConstr *constr = tupdesc->constr; - int i, size; - - desc = (TupleDesc) palloc(sizeof(struct tupleDesc)); - desc->natts = tupdesc->natts; - size = desc->natts * sizeof (AttributeTupleForm); - desc->attrs = (AttributeTupleForm*) palloc(size); - for (i=0;i<desc->natts;i++) { - desc->attrs[i] = - (AttributeTupleForm)palloc(ATTRIBUTE_TUPLE_SIZE); - memmove(desc->attrs[i], - tupdesc->attrs[i], - ATTRIBUTE_TUPLE_SIZE); - } - if (constr) - { - TupleConstr *cpy = (TupleConstr *) palloc(sizeof(TupleConstr)); - - cpy->has_not_null = constr->has_not_null; - - if ( ( cpy->num_defval = constr->num_defval ) > 0 ) - { - cpy->defval = (AttrDefault *) palloc (cpy->num_defval * sizeof (AttrDefault)); - memcpy (cpy->defval, constr->defval, cpy->num_defval * sizeof (AttrDefault)); - for (i = cpy->num_defval - 1; i >= 0; i--) - { - if ( constr->defval[i].adbin ) - cpy->defval[i].adbin = pstrdup (constr->defval[i].adbin); - if ( constr->defval[i].adsrc ) - cpy->defval[i].adsrc = pstrdup (constr->defval[i].adsrc); - } - } - - if ( ( cpy->num_check = constr->num_check ) > 0 ) - { - cpy->check = (ConstrCheck *) palloc (cpy->num_check * sizeof (ConstrCheck)); - memcpy (cpy->check, constr->check, cpy->num_check * sizeof (ConstrCheck)); - for (i = cpy->num_check - 1; i >= 0; i--) - { - if ( constr->check[i].ccname ) - cpy->check[i].ccname = pstrdup (constr->check[i].ccname); - if ( constr->check[i].ccbin ) - cpy->check[i].ccbin = pstrdup (constr->check[i].ccbin); - if ( constr->check[i].ccsrc ) - cpy->check[i].ccsrc = pstrdup (constr->check[i].ccsrc); - } - } - - desc->constr = cpy; - } - else - desc->constr = NULL; - - return desc; + TupleDesc desc; + TupleConstr *constr = tupdesc->constr; + int i, + size; + + desc = (TupleDesc) palloc(sizeof(struct tupleDesc)); + desc->natts = tupdesc->natts; + size = desc->natts * sizeof(AttributeTupleForm); + desc->attrs = (AttributeTupleForm *) palloc(size); + for (i = 0; i < desc->natts; i++) + { + desc->attrs[i] = + (AttributeTupleForm) palloc(ATTRIBUTE_TUPLE_SIZE); + memmove(desc->attrs[i], + tupdesc->attrs[i], + ATTRIBUTE_TUPLE_SIZE); + } + if (constr) + { + TupleConstr *cpy = (TupleConstr *) palloc(sizeof(TupleConstr)); + + cpy->has_not_null = constr->has_not_null; + + if ((cpy->num_defval = constr->num_defval) > 0) + { + cpy->defval = (AttrDefault *) palloc(cpy->num_defval * sizeof(AttrDefault)); + memcpy(cpy->defval, constr->defval, cpy->num_defval * sizeof(AttrDefault)); + for (i = cpy->num_defval - 1; i >= 0; i--) + { + if (constr->defval[i].adbin) + cpy->defval[i].adbin = pstrdup(constr->defval[i].adbin); + if (constr->defval[i].adsrc) + cpy->defval[i].adsrc = pstrdup(constr->defval[i].adsrc); + } + } + + if ((cpy->num_check = constr->num_check) > 0) + { + cpy->check = (ConstrCheck *) palloc(cpy->num_check * sizeof(ConstrCheck)); + memcpy(cpy->check, constr->check, cpy->num_check * sizeof(ConstrCheck)); + for (i = cpy->num_check - 1; i >= 0; i--) + { + if (constr->check[i].ccname) + cpy->check[i].ccname = pstrdup(constr->check[i].ccname); + if (constr->check[i].ccbin) + cpy->check[i].ccbin = pstrdup(constr->check[i].ccbin); + if (constr->check[i].ccsrc) + cpy->check[i].ccsrc = pstrdup(constr->check[i].ccsrc); + } + } + + desc->constr = cpy; + } + else + desc->constr = NULL; + + return desc; } void -FreeTupleDesc (TupleDesc tupdesc) +FreeTupleDesc(TupleDesc tupdesc) { - int i; - - for (i = 0; i < tupdesc->natts; i++) - pfree (tupdesc->attrs[i]); - pfree (tupdesc->attrs); - if ( tupdesc->constr ) - { - if ( tupdesc->constr->num_defval > 0 ) - { - AttrDefault *attrdef = tupdesc->constr->defval; - - for (i = tupdesc->constr->num_defval - 1; i >= 0; i--) - { - if ( attrdef[i].adbin ) - pfree (attrdef[i].adbin); - if ( attrdef[i].adsrc ) - pfree (attrdef[i].adsrc); - } - pfree (attrdef); - } - if ( tupdesc->constr->num_check > 0 ) - { - ConstrCheck *check = tupdesc->constr->check; - - for (i = tupdesc->constr->num_check - 1; i >= 0; i--) - { - if ( check[i].ccname ) - pfree (check[i].ccname); - if ( check[i].ccbin ) - pfree (check[i].ccbin); - if ( check[i].ccsrc ) - pfree (check[i].ccsrc); - } - pfree (check); - } - pfree (tupdesc->constr); - } - - pfree (tupdesc); + int i; + + for (i = 0; i < tupdesc->natts; i++) + pfree(tupdesc->attrs[i]); + pfree(tupdesc->attrs); + if (tupdesc->constr) + { + if (tupdesc->constr->num_defval > 0) + { + AttrDefault *attrdef = tupdesc->constr->defval; + + for (i = tupdesc->constr->num_defval - 1; i >= 0; i--) + { + if (attrdef[i].adbin) + pfree(attrdef[i].adbin); + if (attrdef[i].adsrc) + pfree(attrdef[i].adsrc); + } + pfree(attrdef); + } + if (tupdesc->constr->num_check > 0) + { + ConstrCheck *check = tupdesc->constr->check; + + for (i = tupdesc->constr->num_check - 1; i >= 0; i--) + { + if (check[i].ccname) + pfree(check[i].ccname); + if (check[i].ccbin) + pfree(check[i].ccbin); + if (check[i].ccsrc) + pfree(check[i].ccsrc); + } + pfree(check); + } + pfree(tupdesc->constr); + } + + pfree(tupdesc); } /* ---------------------------------------------------------------- - * TupleDescInitEntry + * TupleDescInitEntry * - * This function initializes a single attribute structure in - * a preallocated tuple descriptor. + * This function initializes a single attribute structure in + * a preallocated tuple descriptor. * ---------------------------------------------------------------- */ bool TupleDescInitEntry(TupleDesc desc, - AttrNumber attributeNumber, - char *attributeName, - char *typeName, - int attdim, - bool attisset) + AttrNumber attributeNumber, + char *attributeName, + char *typeName, + int attdim, + bool attisset) { - HeapTuple tuple; - TypeTupleForm typeForm; - AttributeTupleForm att; - - /* ---------------- - * sanity checks - * ---------------- - */ - AssertArg(PointerIsValid(desc)); - AssertArg(attributeNumber >= 1); - /* attributeName's are sometimes NULL, - from resdom's. I don't know why that is, though -- Jolly */ -/* AssertArg(NameIsValid(attributeName));*/ -/* AssertArg(NameIsValid(typeName));*/ - - AssertArg(!PointerIsValid(desc->attrs[attributeNumber - 1])); - - - /* ---------------- - * allocate storage for this attribute - * ---------------- - */ - - att = (AttributeTupleForm) palloc(ATTRIBUTE_TUPLE_SIZE); - desc->attrs[attributeNumber - 1] = att; - - /* ---------------- - * initialize some of the attribute fields - * ---------------- - */ - att->attrelid = 0; /* dummy value */ - - if (attributeName != NULL) - namestrcpy(&(att->attname), attributeName); - else - memset(att->attname.data,0,NAMEDATALEN); - - - att->attdisbursion = 0; /* dummy value */ - att->attcacheoff = -1; - - att->attnum = attributeNumber; - att->attnelems = attdim; - att->attisset = attisset; - - att->attnotnull = false; - att->atthasdef = false; - - /* ---------------- - * search the system cache for the type tuple of the attribute - * we are creating so that we can get the typeid and some other - * stuff. - * - * Note: in the special case of - * - * create EMP (name = char16, manager = EMP) - * - * RelationNameCreateHeapRelation() calls BuildDesc() which - * calls this routine and since EMP does not exist yet, the - * system cache lookup below fails. That's fine, but rather - * then doing a elog(WARN) we just leave that information - * uninitialized, return false, then fix things up later. - * -cim 6/14/90 - * ---------------- - */ - tuple = SearchSysCacheTuple(TYPNAME, PointerGetDatum(typeName), - 0,0,0); - if (! HeapTupleIsValid(tuple)) { + HeapTuple tuple; + TypeTupleForm typeForm; + AttributeTupleForm att; + /* ---------------- - * here type info does not exist yet so we just fill - * the attribute with dummy information and return false. + * sanity checks * ---------------- */ - att->atttypid = InvalidOid; - att->attlen = (int16) 0; - att->attbyval = (bool) 0; - att->attalign = 'i'; - return false; - } - - /* ---------------- - * type info exists so we initialize our attribute - * information from the type tuple we found.. - * ---------------- - */ - typeForm = (TypeTupleForm) GETSTRUCT(tuple); - - att->atttypid = tuple->t_oid; - att->attalign = typeForm->typalign; - - /* ------------------------ - If this attribute is a set, what is really stored in the - attribute is the OID of a tuple in the pg_proc catalog. - The pg_proc tuple contains the query string which defines - this set - i.e., the query to run to get the set. - So the atttypid (just assigned above) refers to the type returned - by this query, but the actual length of this attribute is the - length (size) of an OID. - - Why not just make the atttypid point to the OID type, instead - of the type the query returns? Because the executor uses the atttypid - to tell the front end what type will be returned (in BeginCommand), - and in the end the type returned will be the result of the query, not - an OID. - - Why not wait until the return type of the set is known (i.e., the - recursive call to the executor to execute the set has returned) - before telling the front end what the return type will be? Because - the executor is a delicate thing, and making sure that the correct - order of front-end commands is maintained is messy, especially - considering that target lists may change as inherited attributes - are considered, etc. Ugh. - ----------------------------------------- - */ - if (attisset) { - Type t = type("oid"); - att->attlen = tlen(t); - att->attbyval = tbyval(t); - } else { - att->attlen = typeForm->typlen; - att->attbyval = typeForm->typbyval; - } - - - return true; + AssertArg(PointerIsValid(desc)); + AssertArg(attributeNumber >= 1); + + /* + * attributeName's are sometimes NULL, from resdom's. I don't know + * why that is, though -- Jolly + */ +/* AssertArg(NameIsValid(attributeName));*/ +/* AssertArg(NameIsValid(typeName));*/ + + AssertArg(!PointerIsValid(desc->attrs[attributeNumber - 1])); + + + /* ---------------- + * allocate storage for this attribute + * ---------------- + */ + + att = (AttributeTupleForm) palloc(ATTRIBUTE_TUPLE_SIZE); + desc->attrs[attributeNumber - 1] = att; + + /* ---------------- + * initialize some of the attribute fields + * ---------------- + */ + att->attrelid = 0; /* dummy value */ + + if (attributeName != NULL) + namestrcpy(&(att->attname), attributeName); + else + memset(att->attname.data, 0, NAMEDATALEN); + + + att->attdisbursion = 0; /* dummy value */ + att->attcacheoff = -1; + + att->attnum = attributeNumber; + att->attnelems = attdim; + att->attisset = attisset; + + att->attnotnull = false; + att->atthasdef = false; + + /* ---------------- + * search the system cache for the type tuple of the attribute + * we are creating so that we can get the typeid and some other + * stuff. + * + * Note: in the special case of + * + * create EMP (name = char16, manager = EMP) + * + * RelationNameCreateHeapRelation() calls BuildDesc() which + * calls this routine and since EMP does not exist yet, the + * system cache lookup below fails. That's fine, but rather + * then doing a elog(WARN) we just leave that information + * uninitialized, return false, then fix things up later. + * -cim 6/14/90 + * ---------------- + */ + tuple = SearchSysCacheTuple(TYPNAME, PointerGetDatum(typeName), + 0, 0, 0); + if (!HeapTupleIsValid(tuple)) + { + /* ---------------- + * here type info does not exist yet so we just fill + * the attribute with dummy information and return false. + * ---------------- + */ + att->atttypid = InvalidOid; + att->attlen = (int16) 0; + att->attbyval = (bool) 0; + att->attalign = 'i'; + return false; + } + + /* ---------------- + * type info exists so we initialize our attribute + * information from the type tuple we found.. + * ---------------- + */ + typeForm = (TypeTupleForm) GETSTRUCT(tuple); + + att->atttypid = tuple->t_oid; + att->attalign = typeForm->typalign; + + /* ------------------------ + If this attribute is a set, what is really stored in the + attribute is the OID of a tuple in the pg_proc catalog. + The pg_proc tuple contains the query string which defines + this set - i.e., the query to run to get the set. + So the atttypid (just assigned above) refers to the type returned + by this query, but the actual length of this attribute is the + length (size) of an OID. + + Why not just make the atttypid point to the OID type, instead + of the type the query returns? Because the executor uses the atttypid + to tell the front end what type will be returned (in BeginCommand), + and in the end the type returned will be the result of the query, not + an OID. + + Why not wait until the return type of the set is known (i.e., the + recursive call to the executor to execute the set has returned) + before telling the front end what the return type will be? Because + the executor is a delicate thing, and making sure that the correct + order of front-end commands is maintained is messy, especially + considering that target lists may change as inherited attributes + are considered, etc. Ugh. + ----------------------------------------- + */ + if (attisset) + { + Type t = type("oid"); + + att->attlen = tlen(t); + att->attbyval = tbyval(t); + } + else + { + att->attlen = typeForm->typlen; + att->attbyval = typeForm->typbyval; + } + + + return true; } /* ---------------------------------------------------------------- - * TupleDescMakeSelfReference + * TupleDescMakeSelfReference * - * This function initializes a "self-referential" attribute like - * manager in "create EMP (name=text, manager = EMP)". - * It calls TypeShellMake() which inserts a "shell" type - * tuple into pg_type. A self-reference is one kind of set, so - * its size and byval are the same as for a set. See the comments - * above in TupleDescInitEntry. + * This function initializes a "self-referential" attribute like + * manager in "create EMP (name=text, manager = EMP)". + * It calls TypeShellMake() which inserts a "shell" type + * tuple into pg_type. A self-reference is one kind of set, so + * its size and byval are the same as for a set. See the comments + * above in TupleDescInitEntry. * ---------------------------------------------------------------- */ static void TupleDescMakeSelfReference(TupleDesc desc, - AttrNumber attnum, - char *relname) + AttrNumber attnum, + char *relname) { - AttributeTupleForm att; - Type t = type("oid"); - - att = desc->attrs[attnum-1]; - att->atttypid = TypeShellMake(relname); - att->attlen = tlen(t); - att->attbyval = tbyval(t); - att->attnelems = 0; + AttributeTupleForm att; + Type t = type("oid"); + + att = desc->attrs[attnum - 1]; + att->atttypid = TypeShellMake(relname); + att->attlen = tlen(t); + att->attbyval = tbyval(t); + att->attnelems = 0; } /* ---------------------------------------------------------------- - * BuildDescForRelation + * BuildDescForRelation * - * This is a general purpose function identical to BuildDesc - * but is used by the DefineRelation() code to catch the - * special case where you + * This is a general purpose function identical to BuildDesc + * but is used by the DefineRelation() code to catch the + * special case where you * - * create FOO ( ..., x = FOO ) + * create FOO ( ..., x = FOO ) * - * here, the initial type lookup for "x = FOO" will fail - * because FOO isn't in the catalogs yet. But since we - * are creating FOO, instead of doing an elog() we add - * a shell type tuple to pg_type and fix things later - * in amcreate(). + * here, the initial type lookup for "x = FOO" will fail + * because FOO isn't in the catalogs yet. But since we + * are creating FOO, instead of doing an elog() we add + * a shell type tuple to pg_type and fix things later + * in amcreate(). * ---------------------------------------------------------------- */ TupleDesc -BuildDescForRelation(List *schema, char *relname) +BuildDescForRelation(List * schema, char *relname) { - int natts; - AttrNumber attnum; - List *p; - TupleDesc desc; - AttrDefault *attrdef = NULL; - TupleConstr *constr = (TupleConstr *) palloc(sizeof(TupleConstr)); - char *attname; - char *typename; - int attdim; - int ndef = 0; - bool attisset; - - /* ---------------- - * allocate a new tuple descriptor - * ---------------- - */ - natts = length(schema); - desc = CreateTemplateTupleDesc(natts); - constr->has_not_null = false; - - attnum = 0; - - typename = palloc(NAMEDATALEN); - - foreach(p, schema) { - ColumnDef *entry; - List *arry; + int natts; + AttrNumber attnum; + List *p; + TupleDesc desc; + AttrDefault *attrdef = NULL; + TupleConstr *constr = (TupleConstr *) palloc(sizeof(TupleConstr)); + char *attname; + char *typename; + int attdim; + int ndef = 0; + bool attisset; /* ---------------- - * for each entry in the list, get the name and type - * information from the list and have TupleDescInitEntry - * fill in the attribute information we need. + * allocate a new tuple descriptor * ---------------- - */ - attnum++; - - entry = lfirst(p); - attname = entry->colname; - arry = entry->typename->arrayBounds; - attisset = entry->typename->setof; - - strNcpy(typename, entry->typename->name,NAMEDATALEN-1); - if (arry != NIL) - attdim = length(arry); - else - attdim = 0; - - if (! TupleDescInitEntry(desc, attnum, attname, - typename, attdim, attisset)) { - /* ---------------- - * if TupleDescInitEntry() fails, it means there is - * no type in the system catalogs. So now we check if - * the type name equals the relation name. If so we - * have a self reference, otherwise it's an error. - * ---------------- - */ - if (!strcmp(typename, relname)) { - TupleDescMakeSelfReference(desc, attnum, relname); - } else - elog(WARN, "DefineRelation: no such type %s", - typename); - } - - /* - * this is for char() and varchar(). When an entry is of type - * char() or varchar(), typlen is set to the appropriate length, - * which we'll use here instead. (The catalog lookup only returns - * the length of bpchar and varchar which is not what we want!) - * - ay 6/95 */ - if (entry->typename->typlen > 0) { - desc->attrs[attnum - 1]->attlen = entry->typename->typlen; - } + natts = length(schema); + desc = CreateTemplateTupleDesc(natts); + constr->has_not_null = false; - /* This is for constraints */ - if (entry->is_not_null) - constr->has_not_null = true; - desc->attrs[attnum-1]->attnotnull = entry->is_not_null; - - if ( entry->defval != NULL ) + attnum = 0; + + typename = palloc(NAMEDATALEN); + + foreach(p, schema) { - if ( attrdef == NULL ) - attrdef = (AttrDefault*) palloc (natts * sizeof (AttrDefault)); - attrdef[ndef].adnum = attnum; - attrdef[ndef].adbin = NULL; - attrdef[ndef].adsrc = entry->defval; - ndef++; - desc->attrs[attnum-1]->atthasdef = true; + ColumnDef *entry; + List *arry; + + /* ---------------- + * for each entry in the list, get the name and type + * information from the list and have TupleDescInitEntry + * fill in the attribute information we need. + * ---------------- + */ + attnum++; + + entry = lfirst(p); + attname = entry->colname; + arry = entry->typename->arrayBounds; + attisset = entry->typename->setof; + + strNcpy(typename, entry->typename->name, NAMEDATALEN - 1); + if (arry != NIL) + attdim = length(arry); + else + attdim = 0; + + if (!TupleDescInitEntry(desc, attnum, attname, + typename, attdim, attisset)) + { + /* ---------------- + * if TupleDescInitEntry() fails, it means there is + * no type in the system catalogs. So now we check if + * the type name equals the relation name. If so we + * have a self reference, otherwise it's an error. + * ---------------- + */ + if (!strcmp(typename, relname)) + { + TupleDescMakeSelfReference(desc, attnum, relname); + } + else + elog(WARN, "DefineRelation: no such type %s", + typename); + } + + /* + * this is for char() and varchar(). When an entry is of type + * char() or varchar(), typlen is set to the appropriate length, + * which we'll use here instead. (The catalog lookup only returns + * the length of bpchar and varchar which is not what we want!) - + * ay 6/95 + */ + if (entry->typename->typlen > 0) + { + desc->attrs[attnum - 1]->attlen = entry->typename->typlen; + } + + /* This is for constraints */ + if (entry->is_not_null) + constr->has_not_null = true; + desc->attrs[attnum - 1]->attnotnull = entry->is_not_null; + + if (entry->defval != NULL) + { + if (attrdef == NULL) + attrdef = (AttrDefault *) palloc(natts * sizeof(AttrDefault)); + attrdef[ndef].adnum = attnum; + attrdef[ndef].adbin = NULL; + attrdef[ndef].adsrc = entry->defval; + ndef++; + desc->attrs[attnum - 1]->atthasdef = true; + } + } + if (constr->has_not_null || ndef > 0) + { + desc->constr = constr; - } - if ( constr->has_not_null || ndef > 0 ) - { - desc->constr = constr; - - if ( ndef > 0 ) /* DEFAULTs */ - { - if ( ndef < natts ) - constr->defval = (AttrDefault*) - repalloc (attrdef, ndef * sizeof (AttrDefault)); - else - constr->defval = attrdef; - constr->num_defval = ndef; - } - else - constr->num_defval = 0; - constr->num_check = 0; - } - else - { - pfree (constr); - desc->constr = NULL; - } - return desc; + if (ndef > 0) /* DEFAULTs */ + { + if (ndef < natts) + constr->defval = (AttrDefault *) + repalloc(attrdef, ndef * sizeof(AttrDefault)); + else + constr->defval = attrdef; + constr->num_defval = ndef; + } + else + constr->num_defval = 0; + constr->num_check = 0; + } + else + { + pfree(constr); + desc->constr = NULL; + } + return desc; } - diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 1d36f340ed..598f9ed8f0 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * gist.c-- - * interface routines for the postgres GiST index access method. + * interface routines for the postgres GiST index access method. * * * @@ -26,308 +26,345 @@ #include <utils/syscache.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif /* non-export function prototypes */ -static InsertIndexResult gistdoinsert(Relation r, IndexTuple itup, - GISTSTATE *GISTstate); -static InsertIndexResult gistentryinsert(Relation r, GISTSTACK *stk, - IndexTuple tup, - GISTSTATE *giststate); -static void gistentryinserttwo(Relation r, GISTSTACK *stk, IndexTuple ltup, - IndexTuple rtup, GISTSTATE *giststate); -static void gistAdjustKeys(Relation r, GISTSTACK *stk, BlockNumber blk, - char *datum, int att_size, GISTSTATE *giststate); -static void gistintinsert(Relation r, GISTSTACK *stk, IndexTuple ltup, - IndexTuple rtup, GISTSTATE *giststate); -static InsertIndexResult gistSplit(Relation r, Buffer buffer, - GISTSTACK *stack, IndexTuple itup, - GISTSTATE *giststate); -static void gistnewroot(GISTSTATE *giststate, Relation r, IndexTuple lt, +static InsertIndexResult +gistdoinsert(Relation r, IndexTuple itup, + GISTSTATE * GISTstate); +static InsertIndexResult +gistentryinsert(Relation r, GISTSTACK * stk, + IndexTuple tup, + GISTSTATE * giststate); +static void +gistentryinserttwo(Relation r, GISTSTACK * stk, IndexTuple ltup, + IndexTuple rtup, GISTSTATE * giststate); +static void +gistAdjustKeys(Relation r, GISTSTACK * stk, BlockNumber blk, + char *datum, int att_size, GISTSTATE * giststate); +static void +gistintinsert(Relation r, GISTSTACK * stk, IndexTuple ltup, + IndexTuple rtup, GISTSTATE * giststate); +static InsertIndexResult +gistSplit(Relation r, Buffer buffer, + GISTSTACK * stack, IndexTuple itup, + GISTSTATE * giststate); +static void +gistnewroot(GISTSTATE * giststate, Relation r, IndexTuple lt, IndexTuple rt); -static void GISTInitBuffer(Buffer b, uint32 f); -static BlockNumber gistChooseSubtree(Relation r, IndexTuple itup, int level, - GISTSTATE *giststate, - GISTSTACK **retstack, Buffer *leafbuf); -static OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, - GISTSTATE *giststate); -static int gistnospace(Page p, IndexTuple it); -void gistdelete(Relation r, ItemPointer tid); +static void GISTInitBuffer(Buffer b, uint32 f); +static BlockNumber +gistChooseSubtree(Relation r, IndexTuple itup, int level, + GISTSTATE * giststate, + GISTSTACK ** retstack, Buffer * leafbuf); +static OffsetNumber +gistchoose(Relation r, Page p, IndexTuple it, + GISTSTATE * giststate); +static int gistnospace(Page p, IndexTuple it); +void gistdelete(Relation r, ItemPointer tid); static IndexTuple gist_tuple_replacekey(Relation r, GISTENTRY entry, IndexTuple t); -static void gistcentryinit(GISTSTATE *giststate, GISTENTRY *e, char *pr, - Relation r, Page pg, OffsetNumber o, int b, bool l) ; -static char *int_range_out(INTRANGE *r); +static void +gistcentryinit(GISTSTATE * giststate, GISTENTRY * e, char *pr, + Relation r, Page pg, OffsetNumber o, int b, bool l); +static char *int_range_out(INTRANGE * r); /* ** routine to build an index. Basically calls insert over and over */ void gistbuild(Relation heap, - Relation index, - int natts, - AttrNumber *attnum, - IndexStrategy istrat, - uint16 pint, - Datum *params, - FuncIndexInfo *finfo, - PredInfo *predInfo) + Relation index, + int natts, + AttrNumber * attnum, + IndexStrategy istrat, + uint16 pint, + Datum * params, + FuncIndexInfo * finfo, + PredInfo * predInfo) { - HeapScanDesc scan; - Buffer buffer; - AttrNumber i; - HeapTuple htup; - IndexTuple itup; - TupleDesc hd, id; - InsertIndexResult res; - Datum *d; - bool *nulls; - int nb, nh, ni; + HeapScanDesc scan; + Buffer buffer; + AttrNumber i; + HeapTuple htup; + IndexTuple itup; + TupleDesc hd, + id; + InsertIndexResult res; + Datum *d; + bool *nulls; + int nb, + nh, + ni; + #ifndef OMIT_PARTIAL_INDEX - ExprContext *econtext; - TupleTable tupleTable; - TupleTableSlot *slot; + ExprContext *econtext; + TupleTable tupleTable; + TupleTableSlot *slot; + #endif - Oid hrelid, irelid; - Node *pred, *oldPred; - GISTSTATE giststate; - GISTENTRY tmpcentry; - bool *compvec; - - /* GiSTs only know how to do stupid locking now */ - RelationSetLockForWrite(index); - - setheapoverride(TRUE); /* so we can see the new pg_index tuple */ - initGISTstate(&giststate, index); - setheapoverride(FALSE); - - pred = predInfo->pred; - oldPred = predInfo->oldPred; - - /* - * We expect to be called exactly once for any index relation. - * If that's not the case, big trouble's what we have. - */ - - if (oldPred == NULL && (nb = RelationGetNumberOfBlocks(index)) != 0) - elog(WARN, "%.16s already contains data", &(index->rd_rel->relname.data[0])); - - /* initialize the root page (if this is a new index) */ - if (oldPred == NULL) { - buffer = ReadBuffer(index, P_NEW); - GISTInitBuffer(buffer, F_LEAF); - WriteBuffer(buffer); - } - - /* init the tuple descriptors and get set for a heap scan */ - hd = RelationGetTupleDescriptor(heap); - id = RelationGetTupleDescriptor(index); - d = (Datum *)palloc(natts * sizeof (*d)); - nulls = (bool *)palloc(natts * sizeof (*nulls)); - - /* - * If this is a predicate (partial) index, we will need to evaluate the - * predicate using ExecQual, which requires the current tuple to be in a - * slot of a TupleTable. In addition, ExecQual must have an ExprContext - * referring to that slot. Here, we initialize dummy TupleTable and - * ExprContext objects for this purpose. --Nels, Feb '92 - */ + Oid hrelid, + irelid; + Node *pred, + *oldPred; + GISTSTATE giststate; + GISTENTRY tmpcentry; + bool *compvec; + + /* GiSTs only know how to do stupid locking now */ + RelationSetLockForWrite(index); + + setheapoverride(TRUE); /* so we can see the new pg_index tuple */ + initGISTstate(&giststate, index); + setheapoverride(FALSE); + + pred = predInfo->pred; + oldPred = predInfo->oldPred; + + /* + * We expect to be called exactly once for any index relation. If + * that's not the case, big trouble's what we have. + */ + + if (oldPred == NULL && (nb = RelationGetNumberOfBlocks(index)) != 0) + elog(WARN, "%.16s already contains data", &(index->rd_rel->relname.data[0])); + + /* initialize the root page (if this is a new index) */ + if (oldPred == NULL) + { + buffer = ReadBuffer(index, P_NEW); + GISTInitBuffer(buffer, F_LEAF); + WriteBuffer(buffer); + } + + /* init the tuple descriptors and get set for a heap scan */ + hd = RelationGetTupleDescriptor(heap); + id = RelationGetTupleDescriptor(index); + d = (Datum *) palloc(natts * sizeof(*d)); + nulls = (bool *) palloc(natts * sizeof(*nulls)); + + /* + * If this is a predicate (partial) index, we will need to evaluate + * the predicate using ExecQual, which requires the current tuple to + * be in a slot of a TupleTable. In addition, ExecQual must have an + * ExprContext referring to that slot. Here, we initialize dummy + * TupleTable and ExprContext objects for this purpose. --Nels, Feb + * '92 + */ #ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) { - tupleTable = ExecCreateTupleTable(1); - slot = ExecAllocTableSlot(tupleTable); - econtext = makeNode(ExprContext); - FillDummyExprContext(econtext, slot, hd, buffer); - } - else /* shut the compiler up */ + if (pred != NULL || oldPred != NULL) + { + tupleTable = ExecCreateTupleTable(1); + slot = ExecAllocTableSlot(tupleTable); + econtext = makeNode(ExprContext); + FillDummyExprContext(econtext, slot, hd, buffer); + } + else +/* shut the compiler up */ { tupleTable = NULL; slot = NULL; econtext = NULL; } -#endif /* OMIT_PARTIAL_INDEX */ - scan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); - htup = heap_getnext(scan, 0, &buffer); - - /* int the tuples as we insert them */ - nh = ni = 0; - - for (; HeapTupleIsValid(htup); htup = heap_getnext(scan, 0, &buffer)) { - - nh++; - - /* - * If oldPred != NULL, this is an EXTEND INDEX command, so skip - * this tuple if it was already in the existing partial index - */ - if (oldPred != NULL) { +#endif /* OMIT_PARTIAL_INDEX */ + scan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); + htup = heap_getnext(scan, 0, &buffer); + + /* int the tuples as we insert them */ + nh = ni = 0; + + for (; HeapTupleIsValid(htup); htup = heap_getnext(scan, 0, &buffer)) + { + + nh++; + + /* + * If oldPred != NULL, this is an EXTEND INDEX command, so skip + * this tuple if it was already in the existing partial index + */ + if (oldPred != NULL) + { #ifndef OMIT_PARTIAL_INDEX - /*SetSlotContents(slot, htup); */ - slot->val = htup; - if (ExecQual((List*)oldPred, econtext) == true) { + /* SetSlotContents(slot, htup); */ + slot->val = htup; + if (ExecQual((List *) oldPred, econtext) == true) + { + ni++; + continue; + } +#endif /* OMIT_PARTIAL_INDEX */ + } + + /* + * Skip this tuple if it doesn't satisfy the partial-index + * predicate + */ + if (pred != NULL) + { +#ifndef OMIT_PARTIAL_INDEX + /* SetSlotContents(slot, htup); */ + slot->val = htup; + if (ExecQual((List *) pred, econtext) == false) + continue; +#endif /* OMIT_PARTIAL_INDEX */ + } + ni++; - continue; - } -#endif /* OMIT_PARTIAL_INDEX */ + + /* + * For the current heap tuple, extract all the attributes we use + * in this index, and note which are null. + */ + + for (i = 1; i <= natts; i++) + { + int attoff; + bool attnull; + + /* + * Offsets are from the start of the tuple, and are + * zero-based; indices are one-based. The next call returns i + * - 1. That's data hiding for you. + */ + + attoff = AttrNumberGetAttrOffset(i); + + /* + * d[attoff] = HeapTupleGetAttributeValue(htup, buffer, + */ + d[attoff] = GetIndexValue(htup, + hd, + attoff, + attnum, + finfo, + &attnull, + buffer); + nulls[attoff] = (attnull ? 'n' : ' '); + } + + /* immediately compress keys to normalize */ + compvec = (bool *) palloc(sizeof(bool) * natts); + for (i = 0; i < natts; i++) + { + gistcentryinit(&giststate, &tmpcentry, (char *) d[i], + (Relation) NULL, (Page) NULL, (OffsetNumber) 0, + -1 /* size is currently bogus */ , TRUE); + if (d[i] != (Datum) tmpcentry.pred && !(giststate.keytypbyval)) + compvec[i] = TRUE; + else + compvec[i] = FALSE; + d[i] = (Datum) tmpcentry.pred; + } + + /* form an index tuple and point it at the heap tuple */ + itup = index_formtuple(id, &d[0], nulls); + itup->t_tid = htup->t_ctid; + + /* + * Since we already have the index relation locked, we call + * gistdoinsert directly. Normal access method calls dispatch + * through gistinsert, which locks the relation for write. This + * is the right thing to do if you're inserting single tups, but + * not when you're initializing the whole index at once. + */ + + res = gistdoinsert(index, itup, &giststate); + for (i = 0; i < natts; i++) + if (compvec[i] == TRUE) + pfree((char *) d[i]); + pfree(itup); + pfree(res); + pfree(compvec); } - - /* Skip this tuple if it doesn't satisfy the partial-index predicate */ - if (pred != NULL) { + + /* okay, all heap tuples are indexed */ + heap_endscan(scan); + RelationUnsetLockForWrite(index); + + if (pred != NULL || oldPred != NULL) + { #ifndef OMIT_PARTIAL_INDEX - /*SetSlotContents(slot, htup); */ - slot->val = htup; - if (ExecQual((List*)pred, econtext) == false) - continue; -#endif /* OMIT_PARTIAL_INDEX */ + ExecDestroyTupleTable(tupleTable, true); + pfree(econtext); +#endif /* OMIT_PARTIAL_INDEX */ } - - ni++; - + /* - * For the current heap tuple, extract all the attributes - * we use in this index, and note which are null. + * Since we just inted the tuples in the heap, we update its stats in + * pg_relation to guarantee that the planner takes advantage of the + * index we just created. UpdateStats() does a + * CommandinterIncrement(), which flushes changed entries from the + * system relcache. The act of constructing an index changes these + * heap and index tuples in the system catalogs, so they need to be + * flushed. We close them to guarantee that they will be. */ - - for (i = 1; i <= natts; i++) { - int attoff; - bool attnull; - - /* - * Offsets are from the start of the tuple, and are - * zero-based; indices are one-based. The next call - * returns i - 1. That's data hiding for you. - */ - - attoff = AttrNumberGetAttrOffset(i); - /* - d[attoff] = HeapTupleGetAttributeValue(htup, buffer, - */ - d[attoff] = GetIndexValue(htup, - hd, - attoff, - attnum, - finfo, - &attnull, - buffer); - nulls[attoff] = (attnull ? 'n' : ' '); - } - - /* immediately compress keys to normalize */ - compvec = (bool *)palloc(sizeof(bool) * natts); - for (i = 0; i < natts; i++) { - gistcentryinit(&giststate, &tmpcentry, (char *)d[i], - (Relation) NULL, (Page) NULL, (OffsetNumber) 0, - -1 /* size is currently bogus */, TRUE); - if (d[i] != (Datum)tmpcentry.pred && !(giststate.keytypbyval)) - compvec[i] = TRUE; - else compvec[i] = FALSE; - d[i] = (Datum)tmpcentry.pred; + + hrelid = heap->rd_id; + irelid = index->rd_id; + heap_close(heap); + index_close(index); + + UpdateStats(hrelid, nh, true); + UpdateStats(irelid, ni, false); + + if (oldPred != NULL) + { + if (ni == nh) + pred = NULL; + UpdateIndexPredicate(irelid, oldPred, pred); } - /* form an index tuple and point it at the heap tuple */ - itup = index_formtuple(id, &d[0], nulls); - itup->t_tid = htup->t_ctid; - - /* - * Since we already have the index relation locked, we - * call gistdoinsert directly. Normal access method calls - * dispatch through gistinsert, which locks the relation - * for write. This is the right thing to do if you're - * inserting single tups, but not when you're initializing - * the whole index at once. - */ - - res = gistdoinsert(index, itup, &giststate); - for (i = 0; i < natts; i++) - if (compvec[i] == TRUE) pfree((char *)d[i]); - pfree(itup); - pfree(res); - pfree(compvec); - } - - /* okay, all heap tuples are indexed */ - heap_endscan(scan); - RelationUnsetLockForWrite(index); - - if (pred != NULL || oldPred != NULL) { -#ifndef OMIT_PARTIAL_INDEX - ExecDestroyTupleTable(tupleTable, true); - pfree(econtext); -#endif /* OMIT_PARTIAL_INDEX */ - } - - /* - * Since we just inted the tuples in the heap, we update its - * stats in pg_relation to guarantee that the planner takes - * advantage of the index we just created. UpdateStats() does a - * CommandinterIncrement(), which flushes changed entries from - * the system relcache. The act of constructing an index changes - * these heap and index tuples in the system catalogs, so they - * need to be flushed. We close them to guarantee that they - * will be. - */ - - hrelid = heap->rd_id; - irelid = index->rd_id; - heap_close(heap); - index_close(index); - - UpdateStats(hrelid, nh, true); - UpdateStats(irelid, ni, false); - - if (oldPred != NULL) { - if (ni == nh) pred = NULL; - UpdateIndexPredicate(irelid, oldPred, pred); - } - - /* be tidy */ - pfree(nulls); - pfree(d); + /* be tidy */ + pfree(nulls); + pfree(d); } /* - * gistinsert -- wrapper for GiST tuple insertion. + * gistinsert -- wrapper for GiST tuple insertion. * - * This is the public interface routine for tuple insertion in GiSTs. - * It doesn't do any work; just locks the relation and passes the buck. + * This is the public interface routine for tuple insertion in GiSTs. + * It doesn't do any work; just locks the relation and passes the buck. */ InsertIndexResult -gistinsert(Relation r, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation heapRel) +gistinsert(Relation r, Datum * datum, char *nulls, ItemPointer ht_ctid, Relation heapRel) { - InsertIndexResult res; - IndexTuple itup; - GISTSTATE giststate; - GISTENTRY tmpentry; - int i; - bool *compvec; - - initGISTstate(&giststate, r); - - /* immediately compress keys to normalize */ - compvec = (bool *)palloc(sizeof(bool) * r->rd_att->natts); - for (i = 0; i < r->rd_att->natts; i++) { - gistcentryinit(&giststate, &tmpentry, (char *)datum[i], - (Relation) NULL, (Page) NULL, (OffsetNumber) 0, - -1 /* size is currently bogus */, TRUE); - if (datum[i] != (Datum)tmpentry.pred && !(giststate.keytypbyval)) - compvec[i] = TRUE; - else compvec[i] = FALSE; - datum[i] = (Datum)tmpentry.pred; - } - itup = index_formtuple(RelationGetTupleDescriptor(r), datum, nulls); - itup->t_tid = *ht_ctid; - - RelationSetLockForWrite(r); - res = gistdoinsert(r, itup, &giststate); - for (i = 0; i < r->rd_att->natts; i++) - if (compvec[i] == TRUE) pfree((char *)datum[i]); - pfree(itup); - pfree(compvec); - - /* XXX two-phase locking -- don't unlock the relation until EOT */ - return (res); + InsertIndexResult res; + IndexTuple itup; + GISTSTATE giststate; + GISTENTRY tmpentry; + int i; + bool *compvec; + + initGISTstate(&giststate, r); + + /* immediately compress keys to normalize */ + compvec = (bool *) palloc(sizeof(bool) * r->rd_att->natts); + for (i = 0; i < r->rd_att->natts; i++) + { + gistcentryinit(&giststate, &tmpentry, (char *) datum[i], + (Relation) NULL, (Page) NULL, (OffsetNumber) 0, + -1 /* size is currently bogus */ , TRUE); + if (datum[i] != (Datum) tmpentry.pred && !(giststate.keytypbyval)) + compvec[i] = TRUE; + else + compvec[i] = FALSE; + datum[i] = (Datum) tmpentry.pred; + } + itup = index_formtuple(RelationGetTupleDescriptor(r), datum, nulls); + itup->t_tid = *ht_ctid; + + RelationSetLockForWrite(r); + res = gistdoinsert(r, itup, &giststate); + for (i = 0; i < r->rd_att->natts; i++) + if (compvec[i] == TRUE) + pfree((char *) datum[i]); + pfree(itup); + pfree(compvec); + + /* XXX two-phase locking -- don't unlock the relation until EOT */ + return (res); } /* @@ -336,475 +373,509 @@ gistinsert(Relation r, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation ** that knowledge (some compression routines may want to fish around ** on the page, for example, or do something special for leaf nodes.) */ -static OffsetNumber -gistPageAddItem(GISTSTATE *giststate, - Relation r, - Page page, - Item item, - Size size, - OffsetNumber offsetNumber, - ItemIdFlags flags, - GISTENTRY *dentry, - IndexTuple *newtup) +static OffsetNumber +gistPageAddItem(GISTSTATE * giststate, + Relation r, + Page page, + Item item, + Size size, + OffsetNumber offsetNumber, + ItemIdFlags flags, + GISTENTRY * dentry, + IndexTuple * newtup) { - GISTENTRY tmpcentry; - IndexTuple itup = (IndexTuple)item; - - /* recompress the item given that we now know the exact page and - offset for insertion */ - gistdentryinit(giststate, dentry, - (((char *) itup) + sizeof(IndexTupleData)), - (Relation)0, (Page)0, (OffsetNumber)InvalidOffsetNumber, - IndexTupleSize(itup) - sizeof(IndexTupleData), FALSE); - gistcentryinit(giststate, &tmpcentry, dentry->pred, r, page, - offsetNumber, dentry->bytes, FALSE); - *newtup = gist_tuple_replacekey(r, *dentry, itup); - /* be tidy */ - if (tmpcentry.pred != dentry->pred - && tmpcentry.pred != (((char *) itup) + sizeof(IndexTupleData))) - pfree(tmpcentry.pred); - - return(PageAddItem(page, (Item) *newtup, IndexTupleSize(*newtup), - offsetNumber, flags)); + GISTENTRY tmpcentry; + IndexTuple itup = (IndexTuple) item; + + /* + * recompress the item given that we now know the exact page and + * offset for insertion + */ + gistdentryinit(giststate, dentry, + (((char *) itup) + sizeof(IndexTupleData)), + (Relation) 0, (Page) 0, (OffsetNumber) InvalidOffsetNumber, + IndexTupleSize(itup) - sizeof(IndexTupleData), FALSE); + gistcentryinit(giststate, &tmpcentry, dentry->pred, r, page, + offsetNumber, dentry->bytes, FALSE); + *newtup = gist_tuple_replacekey(r, *dentry, itup); + /* be tidy */ + if (tmpcentry.pred != dentry->pred + && tmpcentry.pred != (((char *) itup) + sizeof(IndexTupleData))) + pfree(tmpcentry.pred); + + return (PageAddItem(page, (Item) * newtup, IndexTupleSize(*newtup), + offsetNumber, flags)); } -static InsertIndexResult -gistdoinsert(Relation r, - IndexTuple itup, /* itup contains compressed entry */ - GISTSTATE *giststate) +static InsertIndexResult +gistdoinsert(Relation r, + IndexTuple itup, /* itup contains compressed entry */ + GISTSTATE * giststate) { - GISTENTRY tmpdentry; - InsertIndexResult res; - OffsetNumber l; - GISTSTACK *stack; - Buffer buffer; - BlockNumber blk; - Page page; - OffsetNumber off; - IndexTuple newtup; - - /* 3rd arg is ignored for now */ - blk = gistChooseSubtree(r, itup, 0, giststate, &stack, &buffer); - page = (Page) BufferGetPage(buffer); - - if (gistnospace(page, itup)) { - /* need to do a split */ - res = gistSplit(r, buffer, stack, itup, giststate); + GISTENTRY tmpdentry; + InsertIndexResult res; + OffsetNumber l; + GISTSTACK *stack; + Buffer buffer; + BlockNumber blk; + Page page; + OffsetNumber off; + IndexTuple newtup; + + /* 3rd arg is ignored for now */ + blk = gistChooseSubtree(r, itup, 0, giststate, &stack, &buffer); + page = (Page) BufferGetPage(buffer); + + if (gistnospace(page, itup)) + { + /* need to do a split */ + res = gistSplit(r, buffer, stack, itup, giststate); + gistfreestack(stack); + WriteBuffer(buffer); /* don't forget to release buffer! */ + return (res); + } + + if (PageIsEmpty(page)) + off = FirstOffsetNumber; + else + off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + + /* add the item and write the buffer */ + l = gistPageAddItem(giststate, r, page, (Item) itup, IndexTupleSize(itup), + off, LP_USED, &tmpdentry, &newtup); + WriteBuffer(buffer); + + /* now expand the page boundary in the parent to include the new child */ + gistAdjustKeys(r, stack, blk, tmpdentry.pred, tmpdentry.bytes, giststate); gistfreestack(stack); - WriteBuffer(buffer); /* don't forget to release buffer! */ + + /* be tidy */ + if (itup != newtup) + pfree(newtup); + if (tmpdentry.pred != (((char *) itup) + sizeof(IndexTupleData))) + pfree(tmpdentry.pred); + + /* build and return an InsertIndexResult for this insertion */ + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + ItemPointerSet(&(res->pointerData), blk, l); + return (res); - } - - if (PageIsEmpty(page)) - off = FirstOffsetNumber; - else - off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); - - /* add the item and write the buffer */ - l = gistPageAddItem(giststate, r, page, (Item) itup, IndexTupleSize(itup), - off, LP_USED, &tmpdentry, &newtup); - WriteBuffer(buffer); - - /* now expand the page boundary in the parent to include the new child */ - gistAdjustKeys(r, stack, blk, tmpdentry.pred, tmpdentry.bytes, giststate); - gistfreestack(stack); - - /* be tidy */ - if (itup != newtup) - pfree(newtup); - if (tmpdentry.pred != (((char *) itup) + sizeof(IndexTupleData))) - pfree(tmpdentry.pred); - - /* build and return an InsertIndexResult for this insertion */ - res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); - ItemPointerSet(&(res->pointerData), blk, l); - - return (res); } -static BlockNumber -gistChooseSubtree(Relation r, IndexTuple itup, /* itup has compressed entry */ - int level, - GISTSTATE *giststate, - GISTSTACK **retstack /*out*/, - Buffer *leafbuf /*out*/) +static BlockNumber +gistChooseSubtree(Relation r, IndexTuple itup, /* itup has compressed + * entry */ + int level, + GISTSTATE * giststate, + GISTSTACK ** retstack /* out */ , + Buffer * leafbuf /* out */ ) { - Buffer buffer; - BlockNumber blk; - GISTSTACK *stack; - Page page; - GISTPageOpaque opaque; - IndexTuple which; - - blk = GISTP_ROOT; - buffer = InvalidBuffer; - stack = (GISTSTACK *) NULL; - - do { - /* let go of current buffer before getting next */ - if (buffer != InvalidBuffer) - ReleaseBuffer(buffer); - - /* get next buffer */ - buffer = ReadBuffer(r, blk); - page = (Page) BufferGetPage(buffer); - - opaque = (GISTPageOpaque) PageGetSpecialPointer(page); - if (!(opaque->flags & F_LEAF)) { - GISTSTACK *n; - ItemId iid; - - n = (GISTSTACK *) palloc(sizeof(GISTSTACK)); - n->gs_parent = stack; - n->gs_blk = blk; - n->gs_child = gistchoose(r, page, itup, giststate); - stack = n; - - iid = PageGetItemId(page, n->gs_child); - which = (IndexTuple) PageGetItem(page, iid); - blk = ItemPointerGetBlockNumber(&(which->t_tid)); - } - } while (!(opaque->flags & F_LEAF)); - - *retstack = stack; - *leafbuf = buffer; - - return(blk); + Buffer buffer; + BlockNumber blk; + GISTSTACK *stack; + Page page; + GISTPageOpaque opaque; + IndexTuple which; + + blk = GISTP_ROOT; + buffer = InvalidBuffer; + stack = (GISTSTACK *) NULL; + + do + { + /* let go of current buffer before getting next */ + if (buffer != InvalidBuffer) + ReleaseBuffer(buffer); + + /* get next buffer */ + buffer = ReadBuffer(r, blk); + page = (Page) BufferGetPage(buffer); + + opaque = (GISTPageOpaque) PageGetSpecialPointer(page); + if (!(opaque->flags & F_LEAF)) + { + GISTSTACK *n; + ItemId iid; + + n = (GISTSTACK *) palloc(sizeof(GISTSTACK)); + n->gs_parent = stack; + n->gs_blk = blk; + n->gs_child = gistchoose(r, page, itup, giststate); + stack = n; + + iid = PageGetItemId(page, n->gs_child); + which = (IndexTuple) PageGetItem(page, iid); + blk = ItemPointerGetBlockNumber(&(which->t_tid)); + } + } while (!(opaque->flags & F_LEAF)); + + *retstack = stack; + *leafbuf = buffer; + + return (blk); } static void gistAdjustKeys(Relation r, - GISTSTACK *stk, - BlockNumber blk, - char *datum, /* datum is uncompressed */ - int att_size, - GISTSTATE *giststate) + GISTSTACK * stk, + BlockNumber blk, + char *datum, /* datum is uncompressed */ + int att_size, + GISTSTATE * giststate) { - char *oldud; - Page p; - Buffer b; - bool result; - bytea *evec; - GISTENTRY centry, *ev0p, *ev1p; - int size, datumsize; - IndexTuple tid; - - if (stk == (GISTSTACK *) NULL) - return; - - b = ReadBuffer(r, stk->gs_blk); - p = BufferGetPage(b); - - oldud = (char *) PageGetItem(p, PageGetItemId(p, stk->gs_child)); - tid = (IndexTuple) oldud; - size = IndexTupleSize((IndexTuple)oldud) - sizeof(IndexTupleData); - oldud += sizeof(IndexTupleData); - - evec = (bytea *) palloc(2*sizeof(GISTENTRY) + VARHDRSZ); - VARSIZE(evec) = 2*sizeof(GISTENTRY) + VARHDRSZ; - - /* insert decompressed oldud into entry vector */ - gistdentryinit(giststate, &((GISTENTRY *)VARDATA(evec))[0], - oldud, r, p, stk->gs_child, - size, FALSE); - ev0p = &((GISTENTRY *)VARDATA(evec))[0]; - - /* insert datum entry into entry vector */ - gistentryinit(((GISTENTRY *)VARDATA(evec))[1], datum, - (Relation)NULL,(Page)NULL,(OffsetNumber)0, att_size, FALSE); - ev1p = &((GISTENTRY *)VARDATA(evec))[1]; - - /* form union of decompressed entries */ - datum = (char *) (giststate->unionFn)(evec, &datumsize); - - /* did union leave decompressed version of oldud unchanged? */ - (giststate->equalFn)(ev0p->pred, datum, &result); - if (!result) { - TupleDesc td = RelationGetTupleDescriptor(r); - - /* compress datum for storage on page */ - gistcentryinit(giststate, ¢ry, datum, ev0p->rel, ev0p->page, - ev0p->offset, datumsize, FALSE); - if (td->attrs[0]->attlen >= 0) { - memmove(oldud, centry.pred, att_size); - gistAdjustKeys(r, stk->gs_parent, stk->gs_blk, datum, att_size, - giststate); + char *oldud; + Page p; + Buffer b; + bool result; + bytea *evec; + GISTENTRY centry, + *ev0p, + *ev1p; + int size, + datumsize; + IndexTuple tid; + + if (stk == (GISTSTACK *) NULL) + return; + + b = ReadBuffer(r, stk->gs_blk); + p = BufferGetPage(b); + + oldud = (char *) PageGetItem(p, PageGetItemId(p, stk->gs_child)); + tid = (IndexTuple) oldud; + size = IndexTupleSize((IndexTuple) oldud) - sizeof(IndexTupleData); + oldud += sizeof(IndexTupleData); + + evec = (bytea *) palloc(2 * sizeof(GISTENTRY) + VARHDRSZ); + VARSIZE(evec) = 2 * sizeof(GISTENTRY) + VARHDRSZ; + + /* insert decompressed oldud into entry vector */ + gistdentryinit(giststate, &((GISTENTRY *) VARDATA(evec))[0], + oldud, r, p, stk->gs_child, + size, FALSE); + ev0p = &((GISTENTRY *) VARDATA(evec))[0]; + + /* insert datum entry into entry vector */ + gistentryinit(((GISTENTRY *) VARDATA(evec))[1], datum, + (Relation) NULL, (Page) NULL, (OffsetNumber) 0, att_size, FALSE); + ev1p = &((GISTENTRY *) VARDATA(evec))[1]; + + /* form union of decompressed entries */ + datum = (char *) (giststate->unionFn) (evec, &datumsize); + + /* did union leave decompressed version of oldud unchanged? */ + (giststate->equalFn) (ev0p->pred, datum, &result); + if (!result) + { + TupleDesc td = RelationGetTupleDescriptor(r); + + /* compress datum for storage on page */ + gistcentryinit(giststate, ¢ry, datum, ev0p->rel, ev0p->page, + ev0p->offset, datumsize, FALSE); + if (td->attrs[0]->attlen >= 0) + { + memmove(oldud, centry.pred, att_size); + gistAdjustKeys(r, stk->gs_parent, stk->gs_blk, datum, att_size, + giststate); + } + else if (VARSIZE(centry.pred) == VARSIZE(oldud)) + { + memmove(oldud, centry.pred, VARSIZE(centry.pred)); + gistAdjustKeys(r, stk->gs_parent, stk->gs_blk, datum, att_size, + giststate); + } + else + { + + /* + * * new datum is not the same size as the old. * We have to + * delete the old entry and insert the new * one. Note that + * this may cause a split here! + */ + IndexTuple newtup; + ItemPointerData oldtid; + char *isnull; + TupleDesc tupDesc; + InsertIndexResult res; + + /* delete old tuple */ + ItemPointerSet(&oldtid, stk->gs_blk, stk->gs_child); + gistdelete(r, (ItemPointer) & oldtid); + + /* generate and insert new tuple */ + tupDesc = r->rd_att; + isnull = (char *) palloc(r->rd_rel->relnatts); + memset(isnull, ' ', r->rd_rel->relnatts); + newtup = (IndexTuple) index_formtuple(tupDesc, + (Datum *) & centry.pred, isnull); + pfree(isnull); + /* set pointer in new tuple to point to current child */ + ItemPointerSet(&oldtid, blk, 1); + newtup->t_tid = oldtid; + + /* inserting the new entry also adjust keys above */ + res = gistentryinsert(r, stk, newtup, giststate); + + /* in stack, set info to point to new tuple */ + stk->gs_blk = ItemPointerGetBlockNumber(&(res->pointerData)); + stk->gs_child = ItemPointerGetOffsetNumber(&(res->pointerData)); + + pfree(res); + } + WriteBuffer(b); + + if (centry.pred != datum) + pfree(datum); } - else if (VARSIZE(centry.pred) == VARSIZE(oldud)) { - memmove(oldud, centry.pred, VARSIZE(centry.pred)); - gistAdjustKeys(r, stk->gs_parent, stk->gs_blk, datum, att_size, - giststate); + else + { + ReleaseBuffer(b); } - else { - /* - ** new datum is not the same size as the old. - ** We have to delete the old entry and insert the new - ** one. Note that this may cause a split here! - */ - IndexTuple newtup; - ItemPointerData oldtid; - char *isnull; - TupleDesc tupDesc; - InsertIndexResult res; - - /* delete old tuple */ - ItemPointerSet(&oldtid, stk->gs_blk, stk->gs_child); - gistdelete(r, (ItemPointer)&oldtid); - - /* generate and insert new tuple */ - tupDesc = r->rd_att; - isnull = (char *) palloc(r->rd_rel->relnatts); - memset(isnull, ' ', r->rd_rel->relnatts); - newtup = (IndexTuple) index_formtuple(tupDesc, - (Datum *) ¢ry.pred, isnull); - pfree(isnull); - /* set pointer in new tuple to point to current child */ - ItemPointerSet(&oldtid, blk, 1); - newtup->t_tid = oldtid; - - /* inserting the new entry also adjust keys above */ - res = gistentryinsert(r, stk, newtup, giststate); - - /* in stack, set info to point to new tuple */ - stk->gs_blk = ItemPointerGetBlockNumber(&(res->pointerData)); - stk->gs_child = ItemPointerGetOffsetNumber(&(res->pointerData)); - - pfree(res); - } - WriteBuffer(b); - - if (centry.pred != datum) - pfree(datum); - } - else { - ReleaseBuffer(b); - } - pfree(evec); + pfree(evec); } /* - * gistSplit -- split a page in the tree. + * gistSplit -- split a page in the tree. * */ -static InsertIndexResult +static InsertIndexResult gistSplit(Relation r, - Buffer buffer, - GISTSTACK *stack, - IndexTuple itup, /* contains compressed entry */ - GISTSTATE *giststate) + Buffer buffer, + GISTSTACK * stack, + IndexTuple itup, /* contains compressed entry */ + GISTSTATE * giststate) { - Page p; - Buffer leftbuf, rightbuf; - Page left, right; - ItemId itemid; - IndexTuple item; - IndexTuple ltup, rtup, newtup; - OffsetNumber maxoff; - OffsetNumber i; - OffsetNumber leftoff, rightoff; - BlockNumber lbknum, rbknum; - BlockNumber bufblock; - GISTPageOpaque opaque; - int blank; - InsertIndexResult res; - char *isnull; - GIST_SPLITVEC v; - TupleDesc tupDesc; - bytea *entryvec; - bool *decompvec; - IndexTuple item_1; - GISTENTRY tmpdentry, tmpentry; - - isnull = (char *) palloc(r->rd_rel->relnatts); - for (blank = 0; blank < r->rd_rel->relnatts; blank++) - isnull[blank] = ' '; - p = (Page) BufferGetPage(buffer); - opaque = (GISTPageOpaque) PageGetSpecialPointer(p); - - - /* - * The root of the tree is the first block in the relation. If - * we're about to split the root, we need to do some hocus-pocus - * to enforce this guarantee. - */ - - if (BufferGetBlockNumber(buffer) == GISTP_ROOT) { - leftbuf = ReadBuffer(r, P_NEW); - GISTInitBuffer(leftbuf, opaque->flags); - lbknum = BufferGetBlockNumber(leftbuf); - left = (Page) BufferGetPage(leftbuf); - } else { - leftbuf = buffer; - IncrBufferRefCount(buffer); - lbknum = BufferGetBlockNumber(buffer); - left = (Page) PageGetTempPage(p, sizeof(GISTPageOpaqueData)); - } - - rightbuf = ReadBuffer(r, P_NEW); - GISTInitBuffer(rightbuf, opaque->flags); - rbknum = BufferGetBlockNumber(rightbuf); - right = (Page) BufferGetPage(rightbuf); - - /* generate the item array */ - maxoff = PageGetMaxOffsetNumber(p); - entryvec = (bytea *)palloc(VARHDRSZ + (maxoff + 2) * sizeof(GISTENTRY)); - decompvec = (bool *)palloc(VARHDRSZ + (maxoff + 2) * sizeof(bool)); - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - item_1 = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); - gistdentryinit(giststate, &((GISTENTRY *)VARDATA(entryvec))[i], - (((char *) item_1) + sizeof(IndexTupleData)), - r, p, i, - IndexTupleSize(item_1) - sizeof(IndexTupleData), FALSE); - if ((char *)(((GISTENTRY *)VARDATA(entryvec))[i].pred) - == (((char *) item_1) + sizeof(IndexTupleData))) - decompvec[i] = FALSE; - else decompvec[i] = TRUE; - } - - /* add the new datum as the last entry */ - gistdentryinit(giststate, &(((GISTENTRY *)VARDATA(entryvec))[maxoff+1]), - (((char *) itup) + sizeof(IndexTupleData)), - (Relation)NULL, (Page)NULL, - (OffsetNumber)0, tmpentry.bytes, FALSE); - if ((char *)(((GISTENTRY *)VARDATA(entryvec))[maxoff+1]).pred != - (((char *) itup) + sizeof(IndexTupleData))) - decompvec[maxoff+1] = TRUE; - else decompvec[maxoff+1] = FALSE; - - VARSIZE(entryvec) = (maxoff + 2) * sizeof(GISTENTRY) + VARHDRSZ; - - /* now let the user-defined picksplit function set up the split vector */ - (giststate->picksplitFn)(entryvec, &v); - - /* compress ldatum and rdatum */ - gistcentryinit(giststate, &tmpentry, v.spl_ldatum, (Relation)NULL, - (Page)NULL, (OffsetNumber)0, - ((GISTENTRY *)VARDATA(entryvec))[i].bytes, FALSE); - if (v.spl_ldatum != tmpentry.pred) - pfree(v.spl_ldatum); - v.spl_ldatum = tmpentry.pred; - - gistcentryinit(giststate, &tmpentry, v.spl_rdatum, (Relation)NULL, - (Page)NULL, (OffsetNumber)0, - ((GISTENTRY *)VARDATA(entryvec))[i].bytes, FALSE); - if (v.spl_rdatum != tmpentry.pred) - pfree(v.spl_rdatum); - v.spl_rdatum = tmpentry.pred; - - /* clean up the entry vector: its preds need to be deleted, too */ - for (i = FirstOffsetNumber; i <= maxoff+1; i = OffsetNumberNext(i)) - if (decompvec[i]) - pfree(((GISTENTRY *)VARDATA(entryvec))[i].pred); - pfree(entryvec); - pfree(decompvec); - - leftoff = rightoff = FirstOffsetNumber; - maxoff = PageGetMaxOffsetNumber(p); - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - itemid = PageGetItemId(p, i); - item = (IndexTuple) PageGetItem(p, itemid); - - if (i == *(v.spl_left)) { - gistPageAddItem(giststate, r, left, (Item) item, - IndexTupleSize(item), - leftoff, LP_USED, &tmpdentry, &newtup); - leftoff = OffsetNumberNext(leftoff); - v.spl_left++; /* advance in left split vector */ - /* be tidy */ - if (tmpdentry.pred != (((char *) item) + sizeof(IndexTupleData))) - pfree(tmpdentry.pred); - if ((IndexTuple)item != newtup) - pfree(newtup); - } - else { - gistPageAddItem(giststate, r, right, (Item) item, - IndexTupleSize(item), - rightoff, LP_USED, &tmpdentry, &newtup); - rightoff = OffsetNumberNext(rightoff); - v.spl_right++; /* advance in right split vector */ - /* be tidy */ - if (tmpdentry.pred != (((char *) item) + sizeof(IndexTupleData))) - pfree(tmpdentry.pred); - if (item != newtup) - pfree(newtup); + Page p; + Buffer leftbuf, + rightbuf; + Page left, + right; + ItemId itemid; + IndexTuple item; + IndexTuple ltup, + rtup, + newtup; + OffsetNumber maxoff; + OffsetNumber i; + OffsetNumber leftoff, + rightoff; + BlockNumber lbknum, + rbknum; + BlockNumber bufblock; + GISTPageOpaque opaque; + int blank; + InsertIndexResult res; + char *isnull; + GIST_SPLITVEC v; + TupleDesc tupDesc; + bytea *entryvec; + bool *decompvec; + IndexTuple item_1; + GISTENTRY tmpdentry, + tmpentry; + + isnull = (char *) palloc(r->rd_rel->relnatts); + for (blank = 0; blank < r->rd_rel->relnatts; blank++) + isnull[blank] = ' '; + p = (Page) BufferGetPage(buffer); + opaque = (GISTPageOpaque) PageGetSpecialPointer(p); + + + /* + * The root of the tree is the first block in the relation. If we're + * about to split the root, we need to do some hocus-pocus to enforce + * this guarantee. + */ + + if (BufferGetBlockNumber(buffer) == GISTP_ROOT) + { + leftbuf = ReadBuffer(r, P_NEW); + GISTInitBuffer(leftbuf, opaque->flags); + lbknum = BufferGetBlockNumber(leftbuf); + left = (Page) BufferGetPage(leftbuf); } - } - - /* build an InsertIndexResult for this insertion */ - res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); - - /* now insert the new index tuple */ - if (*(v.spl_left) != FirstOffsetNumber) { - gistPageAddItem(giststate, r, left, (Item) itup, - IndexTupleSize(itup), - leftoff, LP_USED, &tmpdentry, &newtup); - leftoff = OffsetNumberNext(leftoff); - ItemPointerSet(&(res->pointerData), lbknum, leftoff); - /* be tidy */ - if (tmpdentry.pred != (((char *) itup) + sizeof(IndexTupleData))) - pfree(tmpdentry.pred); - if (itup != newtup) - pfree(newtup); - } else { - gistPageAddItem(giststate, r, right, (Item) itup, - IndexTupleSize(itup), - rightoff, LP_USED, &tmpdentry, &newtup); - rightoff = OffsetNumberNext(rightoff); - ItemPointerSet(&(res->pointerData), rbknum, rightoff); - /* be tidy */ - if (tmpdentry.pred != (((char *) itup) + sizeof(IndexTupleData))) - pfree(tmpdentry.pred); - if (itup != newtup) - pfree(newtup); - } - - if ((bufblock = BufferGetBlockNumber(buffer)) != GISTP_ROOT) { - PageRestoreTempPage(left, p); - } - WriteBuffer(leftbuf); - WriteBuffer(rightbuf); - - /* - * Okay, the page is split. We have three things left to do: - * - * 1) Adjust any active scans on this index to cope with changes - * we introduced in its structure by splitting this page. - * - * 2) "Tighten" the bounding box of the pointer to the left - * page in the parent node in the tree, if any. Since we - * moved a bunch of stuff off the left page, we expect it - * to get smaller. This happens in the internal insertion - * routine. - * - * 3) Insert a pointer to the right page in the parent. This - * may cause the parent to split. If it does, we need to - * repeat steps one and two for each split node in the tree. - */ - - /* adjust active scans */ - gistadjscans(r, GISTOP_SPLIT, bufblock, FirstOffsetNumber); - - tupDesc = r->rd_att; - - ltup = (IndexTuple) index_formtuple(tupDesc, - (Datum *) &(v.spl_ldatum), isnull); - rtup = (IndexTuple) index_formtuple(tupDesc, - (Datum *) &(v.spl_rdatum), isnull); - pfree(isnull); - - /* set pointers to new child pages in the internal index tuples */ - ItemPointerSet(&(ltup->t_tid), lbknum, 1); - ItemPointerSet(&(rtup->t_tid), rbknum, 1); - - gistintinsert(r, stack, ltup, rtup, giststate); - - pfree(ltup); - pfree(rtup); - - return (res); + else + { + leftbuf = buffer; + IncrBufferRefCount(buffer); + lbknum = BufferGetBlockNumber(buffer); + left = (Page) PageGetTempPage(p, sizeof(GISTPageOpaqueData)); + } + + rightbuf = ReadBuffer(r, P_NEW); + GISTInitBuffer(rightbuf, opaque->flags); + rbknum = BufferGetBlockNumber(rightbuf); + right = (Page) BufferGetPage(rightbuf); + + /* generate the item array */ + maxoff = PageGetMaxOffsetNumber(p); + entryvec = (bytea *) palloc(VARHDRSZ + (maxoff + 2) * sizeof(GISTENTRY)); + decompvec = (bool *) palloc(VARHDRSZ + (maxoff + 2) * sizeof(bool)); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + item_1 = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); + gistdentryinit(giststate, &((GISTENTRY *) VARDATA(entryvec))[i], + (((char *) item_1) + sizeof(IndexTupleData)), + r, p, i, + IndexTupleSize(item_1) - sizeof(IndexTupleData), FALSE); + if ((char *) (((GISTENTRY *) VARDATA(entryvec))[i].pred) + == (((char *) item_1) + sizeof(IndexTupleData))) + decompvec[i] = FALSE; + else + decompvec[i] = TRUE; + } + + /* add the new datum as the last entry */ + gistdentryinit(giststate, &(((GISTENTRY *) VARDATA(entryvec))[maxoff + 1]), + (((char *) itup) + sizeof(IndexTupleData)), + (Relation) NULL, (Page) NULL, + (OffsetNumber) 0, tmpentry.bytes, FALSE); + if ((char *) (((GISTENTRY *) VARDATA(entryvec))[maxoff + 1]).pred != + (((char *) itup) + sizeof(IndexTupleData))) + decompvec[maxoff + 1] = TRUE; + else + decompvec[maxoff + 1] = FALSE; + + VARSIZE(entryvec) = (maxoff + 2) * sizeof(GISTENTRY) + VARHDRSZ; + + /* now let the user-defined picksplit function set up the split vector */ + (giststate->picksplitFn) (entryvec, &v); + + /* compress ldatum and rdatum */ + gistcentryinit(giststate, &tmpentry, v.spl_ldatum, (Relation) NULL, + (Page) NULL, (OffsetNumber) 0, + ((GISTENTRY *) VARDATA(entryvec))[i].bytes, FALSE); + if (v.spl_ldatum != tmpentry.pred) + pfree(v.spl_ldatum); + v.spl_ldatum = tmpentry.pred; + + gistcentryinit(giststate, &tmpentry, v.spl_rdatum, (Relation) NULL, + (Page) NULL, (OffsetNumber) 0, + ((GISTENTRY *) VARDATA(entryvec))[i].bytes, FALSE); + if (v.spl_rdatum != tmpentry.pred) + pfree(v.spl_rdatum); + v.spl_rdatum = tmpentry.pred; + + /* clean up the entry vector: its preds need to be deleted, too */ + for (i = FirstOffsetNumber; i <= maxoff + 1; i = OffsetNumberNext(i)) + if (decompvec[i]) + pfree(((GISTENTRY *) VARDATA(entryvec))[i].pred); + pfree(entryvec); + pfree(decompvec); + + leftoff = rightoff = FirstOffsetNumber; + maxoff = PageGetMaxOffsetNumber(p); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + itemid = PageGetItemId(p, i); + item = (IndexTuple) PageGetItem(p, itemid); + + if (i == *(v.spl_left)) + { + gistPageAddItem(giststate, r, left, (Item) item, + IndexTupleSize(item), + leftoff, LP_USED, &tmpdentry, &newtup); + leftoff = OffsetNumberNext(leftoff); + v.spl_left++; /* advance in left split vector */ + /* be tidy */ + if (tmpdentry.pred != (((char *) item) + sizeof(IndexTupleData))) + pfree(tmpdentry.pred); + if ((IndexTuple) item != newtup) + pfree(newtup); + } + else + { + gistPageAddItem(giststate, r, right, (Item) item, + IndexTupleSize(item), + rightoff, LP_USED, &tmpdentry, &newtup); + rightoff = OffsetNumberNext(rightoff); + v.spl_right++; /* advance in right split vector */ + /* be tidy */ + if (tmpdentry.pred != (((char *) item) + sizeof(IndexTupleData))) + pfree(tmpdentry.pred); + if (item != newtup) + pfree(newtup); + } + } + + /* build an InsertIndexResult for this insertion */ + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + + /* now insert the new index tuple */ + if (*(v.spl_left) != FirstOffsetNumber) + { + gistPageAddItem(giststate, r, left, (Item) itup, + IndexTupleSize(itup), + leftoff, LP_USED, &tmpdentry, &newtup); + leftoff = OffsetNumberNext(leftoff); + ItemPointerSet(&(res->pointerData), lbknum, leftoff); + /* be tidy */ + if (tmpdentry.pred != (((char *) itup) + sizeof(IndexTupleData))) + pfree(tmpdentry.pred); + if (itup != newtup) + pfree(newtup); + } + else + { + gistPageAddItem(giststate, r, right, (Item) itup, + IndexTupleSize(itup), + rightoff, LP_USED, &tmpdentry, &newtup); + rightoff = OffsetNumberNext(rightoff); + ItemPointerSet(&(res->pointerData), rbknum, rightoff); + /* be tidy */ + if (tmpdentry.pred != (((char *) itup) + sizeof(IndexTupleData))) + pfree(tmpdentry.pred); + if (itup != newtup) + pfree(newtup); + } + + if ((bufblock = BufferGetBlockNumber(buffer)) != GISTP_ROOT) + { + PageRestoreTempPage(left, p); + } + WriteBuffer(leftbuf); + WriteBuffer(rightbuf); + + /* + * Okay, the page is split. We have three things left to do: + * + * 1) Adjust any active scans on this index to cope with changes we + * introduced in its structure by splitting this page. + * + * 2) "Tighten" the bounding box of the pointer to the left page in the + * parent node in the tree, if any. Since we moved a bunch of stuff + * off the left page, we expect it to get smaller. This happens in + * the internal insertion routine. + * + * 3) Insert a pointer to the right page in the parent. This may cause + * the parent to split. If it does, we need to repeat steps one and + * two for each split node in the tree. + */ + + /* adjust active scans */ + gistadjscans(r, GISTOP_SPLIT, bufblock, FirstOffsetNumber); + + tupDesc = r->rd_att; + + ltup = (IndexTuple) index_formtuple(tupDesc, + (Datum *) & (v.spl_ldatum), isnull); + rtup = (IndexTuple) index_formtuple(tupDesc, + (Datum *) & (v.spl_rdatum), isnull); + pfree(isnull); + + /* set pointers to new child pages in the internal index tuples */ + ItemPointerSet(&(ltup->t_tid), lbknum, 1); + ItemPointerSet(&(rtup->t_tid), rbknum, 1); + + gistintinsert(r, stack, ltup, rtup, giststate); + + pfree(ltup); + pfree(rtup); + + return (res); } /* @@ -813,22 +884,23 @@ gistSplit(Relation r, */ static void gistintinsert(Relation r, - GISTSTACK *stk, - IndexTuple ltup, /* new version of entry for old page */ - IndexTuple rtup, /* entry for new page */ - GISTSTATE *giststate) + GISTSTACK * stk, + IndexTuple ltup, /* new version of entry for old page */ + IndexTuple rtup, /* entry for new page */ + GISTSTATE * giststate) { - ItemPointerData ltid; + ItemPointerData ltid; - if (stk == (GISTSTACK *) NULL) { - gistnewroot(giststate, r, ltup, rtup); - return; - } - - /* remove old left pointer, insert the 2 new entries */ - ItemPointerSet(<id, stk->gs_blk, stk->gs_child); - gistdelete(r, (ItemPointer)<id); - gistentryinserttwo(r, stk, ltup, rtup, giststate); + if (stk == (GISTSTACK *) NULL) + { + gistnewroot(giststate, r, ltup, rtup); + return; + } + + /* remove old left pointer, insert the 2 new entries */ + ItemPointerSet(<id, stk->gs_blk, stk->gs_child); + gistdelete(r, (ItemPointer) & ltid); + gistentryinserttwo(r, stk, ltup, rtup, giststate); } @@ -836,280 +908,299 @@ gistintinsert(Relation r, ** Insert two entries onto one page, handling a split for either one! */ static void -gistentryinserttwo(Relation r, GISTSTACK *stk, IndexTuple ltup, - IndexTuple rtup, GISTSTATE *giststate) +gistentryinserttwo(Relation r, GISTSTACK * stk, IndexTuple ltup, + IndexTuple rtup, GISTSTATE * giststate) { - Buffer b; - Page p; - InsertIndexResult res; - GISTENTRY tmpentry; - IndexTuple newtup; - - b = ReadBuffer(r, stk->gs_blk); - p = BufferGetPage(b); - - if (gistnospace(p, ltup)) { - res = gistSplit(r, b, stk->gs_parent, ltup, giststate); - WriteBuffer(b); /* don't forget to release buffer! - 01/31/94 */ - pfree(res); - gistdoinsert(r, rtup, giststate); - } else { - gistPageAddItem(giststate, r, p, (Item)ltup, - IndexTupleSize(ltup), InvalidOffsetNumber, - LP_USED, &tmpentry, &newtup); - WriteBuffer(b); - gistAdjustKeys(r, stk->gs_parent, stk->gs_blk, tmpentry.pred, - tmpentry.bytes, giststate); - /* be tidy */ - if (tmpentry.pred != (((char *) ltup) + sizeof(IndexTupleData))) - pfree(tmpentry.pred); - if (ltup != newtup) - pfree(newtup); - gistentryinsert(r, stk, rtup, giststate); - } -} + Buffer b; + Page p; + InsertIndexResult res; + GISTENTRY tmpentry; + IndexTuple newtup; + + b = ReadBuffer(r, stk->gs_blk); + p = BufferGetPage(b); + + if (gistnospace(p, ltup)) + { + res = gistSplit(r, b, stk->gs_parent, ltup, giststate); + WriteBuffer(b); /* don't forget to release buffer! - + * 01/31/94 */ + pfree(res); + gistdoinsert(r, rtup, giststate); + } + else + { + gistPageAddItem(giststate, r, p, (Item) ltup, + IndexTupleSize(ltup), InvalidOffsetNumber, + LP_USED, &tmpentry, &newtup); + WriteBuffer(b); + gistAdjustKeys(r, stk->gs_parent, stk->gs_blk, tmpentry.pred, + tmpentry.bytes, giststate); + /* be tidy */ + if (tmpentry.pred != (((char *) ltup) + sizeof(IndexTupleData))) + pfree(tmpentry.pred); + if (ltup != newtup) + pfree(newtup); + gistentryinsert(r, stk, rtup, giststate); + } +} /* ** Insert an entry onto a page */ -static InsertIndexResult -gistentryinsert(Relation r, GISTSTACK *stk, IndexTuple tup, - GISTSTATE *giststate) +static InsertIndexResult +gistentryinsert(Relation r, GISTSTACK * stk, IndexTuple tup, + GISTSTATE * giststate) { - Buffer b; - Page p; - InsertIndexResult res; - OffsetNumber off; - GISTENTRY tmpentry; - IndexTuple newtup; - - b = ReadBuffer(r, stk->gs_blk); - p = BufferGetPage(b); - - if (gistnospace(p, tup)) { - res = gistSplit(r, b, stk->gs_parent, tup, giststate); - WriteBuffer(b); /* don't forget to release buffer! - 01/31/94 */ - return(res); - } - else { - res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); - off = gistPageAddItem(giststate, r, p, (Item) tup, IndexTupleSize(tup), - InvalidOffsetNumber, LP_USED, &tmpentry, &newtup); - WriteBuffer(b); - ItemPointerSet(&(res->pointerData), stk->gs_blk, off); - gistAdjustKeys(r, stk->gs_parent, stk->gs_blk, tmpentry.pred, - tmpentry.bytes, giststate); - /* be tidy */ - if (tmpentry.pred != (((char *) tup) + sizeof(IndexTupleData))) - pfree(tmpentry.pred); - if (tup != newtup) - pfree(newtup); - return(res); - } -} + Buffer b; + Page p; + InsertIndexResult res; + OffsetNumber off; + GISTENTRY tmpentry; + IndexTuple newtup; + + b = ReadBuffer(r, stk->gs_blk); + p = BufferGetPage(b); + + if (gistnospace(p, tup)) + { + res = gistSplit(r, b, stk->gs_parent, tup, giststate); + WriteBuffer(b); /* don't forget to release buffer! - + * 01/31/94 */ + return (res); + } + else + { + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + off = gistPageAddItem(giststate, r, p, (Item) tup, IndexTupleSize(tup), + InvalidOffsetNumber, LP_USED, &tmpentry, &newtup); + WriteBuffer(b); + ItemPointerSet(&(res->pointerData), stk->gs_blk, off); + gistAdjustKeys(r, stk->gs_parent, stk->gs_blk, tmpentry.pred, + tmpentry.bytes, giststate); + /* be tidy */ + if (tmpentry.pred != (((char *) tup) + sizeof(IndexTupleData))) + pfree(tmpentry.pred); + if (tup != newtup) + pfree(newtup); + return (res); + } +} static void -gistnewroot(GISTSTATE *giststate, Relation r, IndexTuple lt, IndexTuple rt) +gistnewroot(GISTSTATE * giststate, Relation r, IndexTuple lt, IndexTuple rt) { - Buffer b; - Page p; - GISTENTRY tmpentry; - IndexTuple newtup; - - b = ReadBuffer(r, GISTP_ROOT); - GISTInitBuffer(b, 0); - p = BufferGetPage(b); - gistPageAddItem(giststate, r, p, (Item) lt, IndexTupleSize(lt), - FirstOffsetNumber, - LP_USED, &tmpentry, &newtup); - /* be tidy */ - if (tmpentry.pred != (((char *) lt) + sizeof(IndexTupleData))) - pfree(tmpentry.pred); - if (lt != newtup) - pfree(newtup); - gistPageAddItem(giststate, r, p, (Item) rt, IndexTupleSize(rt), - OffsetNumberNext(FirstOffsetNumber), LP_USED, - &tmpentry, &newtup); - /* be tidy */ - if (tmpentry.pred != (((char *) rt) + sizeof(IndexTupleData))) - pfree(tmpentry.pred); - if (rt != newtup) - pfree(newtup); - WriteBuffer(b); + Buffer b; + Page p; + GISTENTRY tmpentry; + IndexTuple newtup; + + b = ReadBuffer(r, GISTP_ROOT); + GISTInitBuffer(b, 0); + p = BufferGetPage(b); + gistPageAddItem(giststate, r, p, (Item) lt, IndexTupleSize(lt), + FirstOffsetNumber, + LP_USED, &tmpentry, &newtup); + /* be tidy */ + if (tmpentry.pred != (((char *) lt) + sizeof(IndexTupleData))) + pfree(tmpentry.pred); + if (lt != newtup) + pfree(newtup); + gistPageAddItem(giststate, r, p, (Item) rt, IndexTupleSize(rt), + OffsetNumberNext(FirstOffsetNumber), LP_USED, + &tmpentry, &newtup); + /* be tidy */ + if (tmpentry.pred != (((char *) rt) + sizeof(IndexTupleData))) + pfree(tmpentry.pred); + if (rt != newtup) + pfree(newtup); + WriteBuffer(b); } static void GISTInitBuffer(Buffer b, uint32 f) { - GISTPageOpaque opaque; - Page page; - Size pageSize; - - pageSize = BufferGetPageSize(b); - - page = BufferGetPage(b); - memset(page, 0, (int) pageSize); - PageInit(page, pageSize, sizeof(GISTPageOpaqueData)); - - opaque = (GISTPageOpaque) PageGetSpecialPointer(page); - opaque->flags = f; + GISTPageOpaque opaque; + Page page; + Size pageSize; + + pageSize = BufferGetPageSize(b); + + page = BufferGetPage(b); + memset(page, 0, (int) pageSize); + PageInit(page, pageSize, sizeof(GISTPageOpaqueData)); + + opaque = (GISTPageOpaque) PageGetSpecialPointer(page); + opaque->flags = f; } /* ** find entry with lowest penalty */ -static OffsetNumber -gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ - GISTSTATE *giststate) +static OffsetNumber +gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ + GISTSTATE * giststate) { - OffsetNumber maxoff; - OffsetNumber i; - char *id; - char *datum; - float usize; - OffsetNumber which; - float which_grow; - GISTENTRY entry, identry; - int size, idsize; - - idsize = IndexTupleSize(it) - sizeof(IndexTupleData); - id = ((char *) it) + sizeof(IndexTupleData); - maxoff = PageGetMaxOffsetNumber(p); - which_grow = -1.0; - which = -1; - - gistdentryinit(giststate,&identry,id,(Relation)NULL,(Page)NULL, - (OffsetNumber)0, idsize, FALSE); - - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - datum = (char *) PageGetItem(p, PageGetItemId(p, i)); - size = IndexTupleSize(datum) - sizeof(IndexTupleData); - datum += sizeof(IndexTupleData); - gistdentryinit(giststate,&entry,datum,r,p,i,size,FALSE); - (giststate->penaltyFn)(&entry, &identry, &usize); - if (which_grow < 0 || usize < which_grow) { - which = i; - which_grow = usize; - if (which_grow == 0) - break; + OffsetNumber maxoff; + OffsetNumber i; + char *id; + char *datum; + float usize; + OffsetNumber which; + float which_grow; + GISTENTRY entry, + identry; + int size, + idsize; + + idsize = IndexTupleSize(it) - sizeof(IndexTupleData); + id = ((char *) it) + sizeof(IndexTupleData); + maxoff = PageGetMaxOffsetNumber(p); + which_grow = -1.0; + which = -1; + + gistdentryinit(giststate, &identry, id, (Relation) NULL, (Page) NULL, + (OffsetNumber) 0, idsize, FALSE); + + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + datum = (char *) PageGetItem(p, PageGetItemId(p, i)); + size = IndexTupleSize(datum) - sizeof(IndexTupleData); + datum += sizeof(IndexTupleData); + gistdentryinit(giststate, &entry, datum, r, p, i, size, FALSE); + (giststate->penaltyFn) (&entry, &identry, &usize); + if (which_grow < 0 || usize < which_grow) + { + which = i; + which_grow = usize; + if (which_grow == 0) + break; + } + if (entry.pred != datum) + pfree(entry.pred); } - if (entry.pred != datum) - pfree(entry.pred); - } - if (identry.pred != id) - pfree(identry.pred); - - return (which); + if (identry.pred != id) + pfree(identry.pred); + + return (which); } static int gistnospace(Page p, IndexTuple it) { - return (PageGetFreeSpace(p) < IndexTupleSize(it)); + return (PageGetFreeSpace(p) < IndexTupleSize(it)); } void -gistfreestack(GISTSTACK *s) +gistfreestack(GISTSTACK * s) { - GISTSTACK *p; - - while (s != (GISTSTACK *) NULL) { - p = s->gs_parent; - pfree(s); - s = p; - } + GISTSTACK *p; + + while (s != (GISTSTACK *) NULL) + { + p = s->gs_parent; + pfree(s); + s = p; + } } -/* -** remove an entry from a page +/* +** remove an entry from a page */ void gistdelete(Relation r, ItemPointer tid) { - BlockNumber blkno; - OffsetNumber offnum; - Buffer buf; - Page page; - - /* must write-lock on delete */ - RelationSetLockForWrite(r); - - blkno = ItemPointerGetBlockNumber(tid); - offnum = ItemPointerGetOffsetNumber(tid); - - /* adjust any scans that will be affected by this deletion */ - gistadjscans(r, GISTOP_DEL, blkno, offnum); - - /* delete the index tuple */ - buf = ReadBuffer(r, blkno); - page = BufferGetPage(buf); - - PageIndexTupleDelete(page, offnum); - - WriteBuffer(buf); - - /* XXX -- two-phase locking, don't release the write lock */ + BlockNumber blkno; + OffsetNumber offnum; + Buffer buf; + Page page; + + /* must write-lock on delete */ + RelationSetLockForWrite(r); + + blkno = ItemPointerGetBlockNumber(tid); + offnum = ItemPointerGetOffsetNumber(tid); + + /* adjust any scans that will be affected by this deletion */ + gistadjscans(r, GISTOP_DEL, blkno, offnum); + + /* delete the index tuple */ + buf = ReadBuffer(r, blkno); + page = BufferGetPage(buf); + + PageIndexTupleDelete(page, offnum); + + WriteBuffer(buf); + + /* XXX -- two-phase locking, don't release the write lock */ } -void -initGISTstate(GISTSTATE *giststate, Relation index) +void +initGISTstate(GISTSTATE * giststate, Relation index) { - RegProcedure consistent_proc, union_proc, compress_proc, decompress_proc; - RegProcedure penalty_proc, picksplit_proc, equal_proc; - func_ptr user_fn; - int pronargs; - HeapTuple htup; - IndexTupleForm itupform; - - consistent_proc = index_getprocid(index, 1, GIST_CONSISTENT_PROC); - union_proc = index_getprocid(index, 1, GIST_UNION_PROC); - compress_proc = index_getprocid(index, 1, GIST_COMPRESS_PROC); - decompress_proc = index_getprocid(index, 1, GIST_DECOMPRESS_PROC); - penalty_proc = index_getprocid(index, 1, GIST_PENALTY_PROC); - picksplit_proc = index_getprocid(index, 1, GIST_PICKSPLIT_PROC); - equal_proc = index_getprocid(index, 1, GIST_EQUAL_PROC); - fmgr_info(consistent_proc, &user_fn, &pronargs); - giststate->consistentFn = user_fn; - fmgr_info(union_proc, &user_fn, &pronargs); - giststate->unionFn = user_fn; - fmgr_info(compress_proc, &user_fn, &pronargs); - giststate->compressFn = user_fn; - fmgr_info(decompress_proc, &user_fn, &pronargs); - giststate->decompressFn = user_fn; - fmgr_info(penalty_proc, &user_fn, &pronargs); - giststate->penaltyFn = user_fn; - fmgr_info(picksplit_proc, &user_fn, &pronargs); - giststate->picksplitFn = user_fn; - fmgr_info(equal_proc, &user_fn, &pronargs); - giststate->equalFn = user_fn; - - /* see if key type is different from type of attribute being indexed */ - htup = SearchSysCacheTuple(INDEXRELID, ObjectIdGetDatum(index->rd_id), - 0,0,0); - itupform = (IndexTupleForm)GETSTRUCT(htup); - if (!HeapTupleIsValid(htup)) - elog(WARN, "initGISTstate: index %d not found", index->rd_id); - giststate->haskeytype = itupform->indhaskeytype; - if (giststate->haskeytype) { - /* key type is different -- is it byval? */ - htup = SearchSysCacheTuple(ATTNUM, - ObjectIdGetDatum(itupform->indexrelid), - UInt16GetDatum(FirstOffsetNumber), - 0,0); - if (!HeapTupleIsValid(htup)) { - elog(WARN, "initGISTstate: no attribute tuple %d %d", - itupform->indexrelid, FirstOffsetNumber); - return; + RegProcedure consistent_proc, + union_proc, + compress_proc, + decompress_proc; + RegProcedure penalty_proc, + picksplit_proc, + equal_proc; + func_ptr user_fn; + int pronargs; + HeapTuple htup; + IndexTupleForm itupform; + + consistent_proc = index_getprocid(index, 1, GIST_CONSISTENT_PROC); + union_proc = index_getprocid(index, 1, GIST_UNION_PROC); + compress_proc = index_getprocid(index, 1, GIST_COMPRESS_PROC); + decompress_proc = index_getprocid(index, 1, GIST_DECOMPRESS_PROC); + penalty_proc = index_getprocid(index, 1, GIST_PENALTY_PROC); + picksplit_proc = index_getprocid(index, 1, GIST_PICKSPLIT_PROC); + equal_proc = index_getprocid(index, 1, GIST_EQUAL_PROC); + fmgr_info(consistent_proc, &user_fn, &pronargs); + giststate->consistentFn = user_fn; + fmgr_info(union_proc, &user_fn, &pronargs); + giststate->unionFn = user_fn; + fmgr_info(compress_proc, &user_fn, &pronargs); + giststate->compressFn = user_fn; + fmgr_info(decompress_proc, &user_fn, &pronargs); + giststate->decompressFn = user_fn; + fmgr_info(penalty_proc, &user_fn, &pronargs); + giststate->penaltyFn = user_fn; + fmgr_info(picksplit_proc, &user_fn, &pronargs); + giststate->picksplitFn = user_fn; + fmgr_info(equal_proc, &user_fn, &pronargs); + giststate->equalFn = user_fn; + + /* see if key type is different from type of attribute being indexed */ + htup = SearchSysCacheTuple(INDEXRELID, ObjectIdGetDatum(index->rd_id), + 0, 0, 0); + itupform = (IndexTupleForm) GETSTRUCT(htup); + if (!HeapTupleIsValid(htup)) + elog(WARN, "initGISTstate: index %d not found", index->rd_id); + giststate->haskeytype = itupform->indhaskeytype; + if (giststate->haskeytype) + { + /* key type is different -- is it byval? */ + htup = SearchSysCacheTuple(ATTNUM, + ObjectIdGetDatum(itupform->indexrelid), + UInt16GetDatum(FirstOffsetNumber), + 0, 0); + if (!HeapTupleIsValid(htup)) + { + elog(WARN, "initGISTstate: no attribute tuple %d %d", + itupform->indexrelid, FirstOffsetNumber); + return; + } + giststate->keytypbyval = (((AttributeTupleForm) htup)->attbyval); } - giststate->keytypbyval = (((AttributeTupleForm)htup)->attbyval); - } - else - giststate->keytypbyval = FALSE; - return; + else + giststate->keytypbyval = FALSE; + return; } @@ -1118,56 +1209,61 @@ initGISTstate(GISTSTATE *giststate, Relation index) ** the key with another key, which may involve generating a new IndexTuple ** if the sizes don't match */ -static IndexTuple +static IndexTuple gist_tuple_replacekey(Relation r, GISTENTRY entry, IndexTuple t) { - char * datum = (((char *) t) + sizeof(IndexTupleData)); - - /* if new entry fits in index tuple, copy it in */ - if (entry.bytes < IndexTupleSize(t) - sizeof(IndexTupleData)) { - memcpy(datum, entry.pred, entry.bytes); - /* clear out old size */ - t->t_info &= 0xe000; - /* or in new size */ - t->t_info |= MAXALIGN(entry.bytes + sizeof(IndexTupleData)); - - return(t); - } - else { - /* generate a new index tuple for the compressed entry */ - TupleDesc tupDesc = r->rd_att; - IndexTuple newtup; - char *isnull; - int blank; - - isnull = (char *) palloc(r->rd_rel->relnatts); - for (blank = 0; blank < r->rd_rel->relnatts; blank++) - isnull[blank] = ' '; - newtup = (IndexTuple) index_formtuple(tupDesc, - (Datum *)&(entry.pred), - isnull); - newtup->t_tid = t->t_tid; - pfree(isnull); - return(newtup); - } + char *datum = (((char *) t) + sizeof(IndexTupleData)); + + /* if new entry fits in index tuple, copy it in */ + if (entry.bytes < IndexTupleSize(t) - sizeof(IndexTupleData)) + { + memcpy(datum, entry.pred, entry.bytes); + /* clear out old size */ + t->t_info &= 0xe000; + /* or in new size */ + t->t_info |= MAXALIGN(entry.bytes + sizeof(IndexTupleData)); + + return (t); + } + else + { + /* generate a new index tuple for the compressed entry */ + TupleDesc tupDesc = r->rd_att; + IndexTuple newtup; + char *isnull; + int blank; + + isnull = (char *) palloc(r->rd_rel->relnatts); + for (blank = 0; blank < r->rd_rel->relnatts; blank++) + isnull[blank] = ' '; + newtup = (IndexTuple) index_formtuple(tupDesc, + (Datum *) & (entry.pred), + isnull); + newtup->t_tid = t->t_tid; + pfree(isnull); + return (newtup); + } } - + /* ** initialize a GiST entry with a decompressed version of pred */ void -gistdentryinit(GISTSTATE *giststate, GISTENTRY *e, char *pr, Relation r, - Page pg, OffsetNumber o, int b, bool l) -{ - GISTENTRY *dep; - gistentryinit(*e, pr, r, pg, o, b, l); - if (giststate->haskeytype) { - dep = (GISTENTRY *)((giststate->decompressFn)(e)); - gistentryinit(*e, dep->pred, dep->rel, dep->page, dep->offset, dep->bytes, - dep->leafkey); - if (dep != e) pfree(dep); - } +gistdentryinit(GISTSTATE * giststate, GISTENTRY * e, char *pr, Relation r, + Page pg, OffsetNumber o, int b, bool l) +{ + GISTENTRY *dep; + + gistentryinit(*e, pr, r, pg, o, b, l); + if (giststate->haskeytype) + { + dep = (GISTENTRY *) ((giststate->decompressFn) (e)); + gistentryinit(*e, dep->pred, dep->rel, dep->page, dep->offset, dep->bytes, + dep->leafkey); + if (dep != e) + pfree(dep); + } } @@ -1175,19 +1271,22 @@ gistdentryinit(GISTSTATE *giststate, GISTENTRY *e, char *pr, Relation r, ** initialize a GiST entry with a compressed version of pred */ static void -gistcentryinit(GISTSTATE *giststate, GISTENTRY *e, char *pr, Relation r, - Page pg, OffsetNumber o, int b, bool l) -{ - GISTENTRY *cep; - gistentryinit(*e, pr, r, pg, o, b, l); - if (giststate->haskeytype) { - cep = (GISTENTRY *)((giststate->compressFn)(e)); - gistentryinit(*e, cep->pred, cep->rel, cep->page, cep->offset, cep->bytes, - cep->leafkey); - if (cep != e) pfree(cep); - } +gistcentryinit(GISTSTATE * giststate, GISTENTRY * e, char *pr, Relation r, + Page pg, OffsetNumber o, int b, bool l) +{ + GISTENTRY *cep; + + gistentryinit(*e, pr, r, pg, o, b, l); + if (giststate->haskeytype) + { + cep = (GISTENTRY *) ((giststate->compressFn) (e)); + gistentryinit(*e, cep->pred, cep->rel, cep->page, cep->offset, cep->bytes, + cep->leafkey); + if (cep != e) + pfree(cep); + } } - + #ifdef GISTDEBUG @@ -1200,89 +1299,95 @@ gistcentryinit(GISTSTATE *giststate, GISTENTRY *e, char *pr, Relation r, void _gistdump(Relation r) { - Buffer buf; - Page page; - OffsetNumber offnum, maxoff; - BlockNumber blkno; - BlockNumber nblocks; - GISTPageOpaque po; - IndexTuple itup; - BlockNumber itblkno; - OffsetNumber itoffno; - char *datum; - char *itkey; - - nblocks = RelationGetNumberOfBlocks(r); - for (blkno = 0; blkno < nblocks; blkno++) { - buf = ReadBuffer(r, blkno); - page = BufferGetPage(buf); - po = (GISTPageOpaque) PageGetSpecialPointer(page); - maxoff = PageGetMaxOffsetNumber(page); - printf("Page %d maxoff %d <%s>\n", blkno, maxoff, - (po->flags & F_LEAF ? "LEAF" : "INTERNAL")); - - if (PageIsEmpty(page)) { - ReleaseBuffer(buf); - continue; - } - - for (offnum = FirstOffsetNumber; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) { - itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); - itblkno = ItemPointerGetBlockNumber(&(itup->t_tid)); - itoffno = ItemPointerGetOffsetNumber(&(itup->t_tid)); - datum = ((char *) itup); - datum += sizeof(IndexTupleData); - /* get out function for type of key, and out it! */ - itkey = (char *) int_range_out((INTRANGE *)datum); - /* itkey = " unable to print"; */ - printf("\t[%d] size %d heap <%d,%d> key:%s\n", - offnum, IndexTupleSize(itup), itblkno, itoffno, itkey); - pfree(itkey); + Buffer buf; + Page page; + OffsetNumber offnum, + maxoff; + BlockNumber blkno; + BlockNumber nblocks; + GISTPageOpaque po; + IndexTuple itup; + BlockNumber itblkno; + OffsetNumber itoffno; + char *datum; + char *itkey; + + nblocks = RelationGetNumberOfBlocks(r); + for (blkno = 0; blkno < nblocks; blkno++) + { + buf = ReadBuffer(r, blkno); + page = BufferGetPage(buf); + po = (GISTPageOpaque) PageGetSpecialPointer(page); + maxoff = PageGetMaxOffsetNumber(page); + printf("Page %d maxoff %d <%s>\n", blkno, maxoff, + (po->flags & F_LEAF ? "LEAF" : "INTERNAL")); + + if (PageIsEmpty(page)) + { + ReleaseBuffer(buf); + continue; + } + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); + itblkno = ItemPointerGetBlockNumber(&(itup->t_tid)); + itoffno = ItemPointerGetOffsetNumber(&(itup->t_tid)); + datum = ((char *) itup); + datum += sizeof(IndexTupleData); + /* get out function for type of key, and out it! */ + itkey = (char *) int_range_out((INTRANGE *) datum); + /* itkey = " unable to print"; */ + printf("\t[%d] size %d heap <%d,%d> key:%s\n", + offnum, IndexTupleSize(itup), itblkno, itoffno, itkey); + pfree(itkey); + } + + ReleaseBuffer(buf); } - - ReleaseBuffer(buf); - } } #ifdef NOT_USED -static char *text_range_out(TXTRANGE *r) +static char * +text_range_out(TXTRANGE * r) { - char *result; - char *lower, *upper; - - if (r == NULL) - return(NULL); - result = (char *)palloc(16 + VARSIZE(TRLOWER(r)) + VARSIZE(TRUPPER(r)) - - 2*VARHDRSZ); - - lower = (char *)palloc(VARSIZE(TRLOWER(r)) + 1 - VARHDRSZ); - memcpy(lower, VARDATA(TRLOWER(r)), VARSIZE(TRLOWER(r)) - VARHDRSZ); - lower[VARSIZE(TRLOWER(r)) - VARHDRSZ] = '\0'; - upper = (char *)palloc(VARSIZE(TRUPPER(r)) + 1 - VARHDRSZ); - memcpy(upper, VARDATA(TRUPPER(r)), VARSIZE(TRUPPER(r)) - VARHDRSZ); - upper[VARSIZE(TRUPPER(r)) - VARHDRSZ] = '\0'; - - sprintf(result, "[%s,%s): %d", lower, upper, r->flag); - pfree(lower); - pfree(upper); - return(result); + char *result; + char *lower, + *upper; + + if (r == NULL) + return (NULL); + result = (char *) palloc(16 + VARSIZE(TRLOWER(r)) + VARSIZE(TRUPPER(r)) + - 2 * VARHDRSZ); + + lower = (char *) palloc(VARSIZE(TRLOWER(r)) + 1 - VARHDRSZ); + memcpy(lower, VARDATA(TRLOWER(r)), VARSIZE(TRLOWER(r)) - VARHDRSZ); + lower[VARSIZE(TRLOWER(r)) - VARHDRSZ] = '\0'; + upper = (char *) palloc(VARSIZE(TRUPPER(r)) + 1 - VARHDRSZ); + memcpy(upper, VARDATA(TRUPPER(r)), VARSIZE(TRUPPER(r)) - VARHDRSZ); + upper[VARSIZE(TRUPPER(r)) - VARHDRSZ] = '\0'; + + sprintf(result, "[%s,%s): %d", lower, upper, r->flag); + pfree(lower); + pfree(upper); + return (result); } + #endif -static char * -int_range_out(INTRANGE *r) +static char * +int_range_out(INTRANGE * r) { - char *result; - - if (r == NULL) - return(NULL); - result = (char *)palloc(80); - sprintf(result, "[%d,%d): %d",r->lower, r->upper, r->flag); - - return(result); -} + char *result; + + if (r == NULL) + return (NULL); + result = (char *) palloc(80); + sprintf(result, "[%d,%d): %d", r->lower, r->upper, r->flag); -#endif /* defined GISTDEBUG */ + return (result); +} +#endif /* defined GISTDEBUG */ diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index ac1697e5ed..cad4cef267 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -1,12 +1,12 @@ /*------------------------------------------------------------------------- * * gistget.c-- - * fetch tuples from a GiST scan. + * fetch tuples from a GiST scan. * * * * IDENTIFICATION - * /usr/local/devel/pglite/cvs/src/backend/access/gisr/gistget.c,v 1.9.1 1996/11/21 01:00:00 vadim Exp + * /usr/local/devel/pglite/cvs/src/backend/access/gisr/gistget.c,v 1.9.1 1996/11/21 01:00:00 vadim Exp * *------------------------------------------------------------------------- */ @@ -22,350 +22,392 @@ #include <storage/bufmgr.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static OffsetNumber gistfindnext(IndexScanDesc s, Page p, OffsetNumber n, - ScanDirection dir); +static OffsetNumber +gistfindnext(IndexScanDesc s, Page p, OffsetNumber n, + ScanDirection dir); static RetrieveIndexResult gistscancache(IndexScanDesc s, ScanDirection dir); static RetrieveIndexResult gistfirst(IndexScanDesc s, ScanDirection dir); static RetrieveIndexResult gistnext(IndexScanDesc s, ScanDirection dir); static ItemPointer gistheapptr(Relation r, ItemPointer itemp); -static bool gistindex_keytest(IndexTuple tuple, TupleDesc tupdesc, - int scanKeySize, ScanKey key, GISTSTATE *giststate, - Relation r, Page p, OffsetNumber offset); +static bool +gistindex_keytest(IndexTuple tuple, TupleDesc tupdesc, + int scanKeySize, ScanKey key, GISTSTATE * giststate, + Relation r, Page p, OffsetNumber offset); RetrieveIndexResult gistgettuple(IndexScanDesc s, ScanDirection dir) { - RetrieveIndexResult res; - - /* if we have it cached in the scan desc, just return the value */ - if ((res = gistscancache(s, dir)) != (RetrieveIndexResult) NULL) + RetrieveIndexResult res; + + /* if we have it cached in the scan desc, just return the value */ + if ((res = gistscancache(s, dir)) != (RetrieveIndexResult) NULL) + return (res); + + /* not cached, so we'll have to do some work */ + if (ItemPointerIsValid(&(s->currentItemData))) + { + res = gistnext(s, dir); + } + else + { + res = gistfirst(s, dir); + } return (res); - - /* not cached, so we'll have to do some work */ - if (ItemPointerIsValid(&(s->currentItemData))) { - res = gistnext(s, dir); - } else { - res = gistfirst(s, dir); - } - return (res); } -static RetrieveIndexResult +static RetrieveIndexResult gistfirst(IndexScanDesc s, ScanDirection dir) { - Buffer b; - Page p; - OffsetNumber n; - OffsetNumber maxoff; - RetrieveIndexResult res; - GISTPageOpaque po; - GISTScanOpaque so; - GISTSTACK *stk; - BlockNumber blk; - IndexTuple it; - - b = ReadBuffer(s->relation, GISTP_ROOT); - p = BufferGetPage(b); - po = (GISTPageOpaque) PageGetSpecialPointer(p); - so = (GISTScanOpaque) s->opaque; - - for (;;) { - maxoff = PageGetMaxOffsetNumber(p); - if (ScanDirectionIsBackward(dir)) - n = gistfindnext(s, p, maxoff, dir); - else - n = gistfindnext(s, p, FirstOffsetNumber, dir); - - while (n < FirstOffsetNumber || n > maxoff) { - - ReleaseBuffer(b); - if (so->s_stack == (GISTSTACK *) NULL) - return ((RetrieveIndexResult) NULL); - - stk = so->s_stack; - b = ReadBuffer(s->relation, stk->gs_blk); - p = BufferGetPage(b); - po = (GISTPageOpaque) PageGetSpecialPointer(p); - maxoff = PageGetMaxOffsetNumber(p); - - if (ScanDirectionIsBackward(dir)) { - n = OffsetNumberPrev(stk->gs_child); - } else { - n = OffsetNumberNext(stk->gs_child); - } - so->s_stack = stk->gs_parent; - pfree(stk); - - n = gistfindnext(s, p, n, dir); - } - if (po->flags & F_LEAF) { - ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n); - - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - - res = FormRetrieveIndexResult(&(s->currentItemData), &(it->t_tid)); - - ReleaseBuffer(b); - return (res); - } else { - stk = (GISTSTACK *) palloc(sizeof(GISTSTACK)); - stk->gs_child = n; - stk->gs_blk = BufferGetBlockNumber(b); - stk->gs_parent = so->s_stack; - so->s_stack = stk; - - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - blk = ItemPointerGetBlockNumber(&(it->t_tid)); - - ReleaseBuffer(b); - b = ReadBuffer(s->relation, blk); - p = BufferGetPage(b); - po = (GISTPageOpaque) PageGetSpecialPointer(p); + Buffer b; + Page p; + OffsetNumber n; + OffsetNumber maxoff; + RetrieveIndexResult res; + GISTPageOpaque po; + GISTScanOpaque so; + GISTSTACK *stk; + BlockNumber blk; + IndexTuple it; + + b = ReadBuffer(s->relation, GISTP_ROOT); + p = BufferGetPage(b); + po = (GISTPageOpaque) PageGetSpecialPointer(p); + so = (GISTScanOpaque) s->opaque; + + for (;;) + { + maxoff = PageGetMaxOffsetNumber(p); + if (ScanDirectionIsBackward(dir)) + n = gistfindnext(s, p, maxoff, dir); + else + n = gistfindnext(s, p, FirstOffsetNumber, dir); + + while (n < FirstOffsetNumber || n > maxoff) + { + + ReleaseBuffer(b); + if (so->s_stack == (GISTSTACK *) NULL) + return ((RetrieveIndexResult) NULL); + + stk = so->s_stack; + b = ReadBuffer(s->relation, stk->gs_blk); + p = BufferGetPage(b); + po = (GISTPageOpaque) PageGetSpecialPointer(p); + maxoff = PageGetMaxOffsetNumber(p); + + if (ScanDirectionIsBackward(dir)) + { + n = OffsetNumberPrev(stk->gs_child); + } + else + { + n = OffsetNumberNext(stk->gs_child); + } + so->s_stack = stk->gs_parent; + pfree(stk); + + n = gistfindnext(s, p, n, dir); + } + if (po->flags & F_LEAF) + { + ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n); + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + + res = FormRetrieveIndexResult(&(s->currentItemData), &(it->t_tid)); + + ReleaseBuffer(b); + return (res); + } + else + { + stk = (GISTSTACK *) palloc(sizeof(GISTSTACK)); + stk->gs_child = n; + stk->gs_blk = BufferGetBlockNumber(b); + stk->gs_parent = so->s_stack; + so->s_stack = stk; + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + blk = ItemPointerGetBlockNumber(&(it->t_tid)); + + ReleaseBuffer(b); + b = ReadBuffer(s->relation, blk); + p = BufferGetPage(b); + po = (GISTPageOpaque) PageGetSpecialPointer(p); + } } - } } -static RetrieveIndexResult +static RetrieveIndexResult gistnext(IndexScanDesc s, ScanDirection dir) { - Buffer b; - Page p; - OffsetNumber n; - OffsetNumber maxoff; - RetrieveIndexResult res; - GISTPageOpaque po; - GISTScanOpaque so; - GISTSTACK *stk; - BlockNumber blk; - IndexTuple it; - - blk = ItemPointerGetBlockNumber(&(s->currentItemData)); - n = ItemPointerGetOffsetNumber(&(s->currentItemData)); - - if (ScanDirectionIsForward(dir)) { - n = OffsetNumberNext(n); - } else { - n = OffsetNumberPrev(n); - } - - b = ReadBuffer(s->relation, blk); - p = BufferGetPage(b); - po = (GISTPageOpaque) PageGetSpecialPointer(p); - so = (GISTScanOpaque) s->opaque; - - for (;;) { - maxoff = PageGetMaxOffsetNumber(p); - n = gistfindnext(s, p, n, dir); - - while (n < FirstOffsetNumber || n > maxoff) { - - ReleaseBuffer(b); - if (so->s_stack == (GISTSTACK *) NULL) - return ((RetrieveIndexResult) NULL); - - stk = so->s_stack; - b = ReadBuffer(s->relation, stk->gs_blk); - p = BufferGetPage(b); - maxoff = PageGetMaxOffsetNumber(p); - po = (GISTPageOpaque) PageGetSpecialPointer(p); - - if (ScanDirectionIsBackward(dir)) { - n = OffsetNumberPrev(stk->gs_child); - } else { - n = OffsetNumberNext(stk->gs_child); - } - so->s_stack = stk->gs_parent; - pfree(stk); - - n = gistfindnext(s, p, n, dir); + Buffer b; + Page p; + OffsetNumber n; + OffsetNumber maxoff; + RetrieveIndexResult res; + GISTPageOpaque po; + GISTScanOpaque so; + GISTSTACK *stk; + BlockNumber blk; + IndexTuple it; + + blk = ItemPointerGetBlockNumber(&(s->currentItemData)); + n = ItemPointerGetOffsetNumber(&(s->currentItemData)); + + if (ScanDirectionIsForward(dir)) + { + n = OffsetNumberNext(n); + } + else + { + n = OffsetNumberPrev(n); } - if (po->flags & F_LEAF) { - ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n); - - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - - res = FormRetrieveIndexResult(&(s->currentItemData), &(it->t_tid)); - - ReleaseBuffer(b); - return (res); - } else { - stk = (GISTSTACK *) palloc(sizeof(GISTSTACK)); - stk->gs_child = n; - stk->gs_blk = BufferGetBlockNumber(b); - stk->gs_parent = so->s_stack; - so->s_stack = stk; - - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - blk = ItemPointerGetBlockNumber(&(it->t_tid)); - - ReleaseBuffer(b); - b = ReadBuffer(s->relation, blk); - p = BufferGetPage(b); - po = (GISTPageOpaque) PageGetSpecialPointer(p); - - if (ScanDirectionIsBackward(dir)) { - n = PageGetMaxOffsetNumber(p); - } else { - n = FirstOffsetNumber; - } + + b = ReadBuffer(s->relation, blk); + p = BufferGetPage(b); + po = (GISTPageOpaque) PageGetSpecialPointer(p); + so = (GISTScanOpaque) s->opaque; + + for (;;) + { + maxoff = PageGetMaxOffsetNumber(p); + n = gistfindnext(s, p, n, dir); + + while (n < FirstOffsetNumber || n > maxoff) + { + + ReleaseBuffer(b); + if (so->s_stack == (GISTSTACK *) NULL) + return ((RetrieveIndexResult) NULL); + + stk = so->s_stack; + b = ReadBuffer(s->relation, stk->gs_blk); + p = BufferGetPage(b); + maxoff = PageGetMaxOffsetNumber(p); + po = (GISTPageOpaque) PageGetSpecialPointer(p); + + if (ScanDirectionIsBackward(dir)) + { + n = OffsetNumberPrev(stk->gs_child); + } + else + { + n = OffsetNumberNext(stk->gs_child); + } + so->s_stack = stk->gs_parent; + pfree(stk); + + n = gistfindnext(s, p, n, dir); + } + if (po->flags & F_LEAF) + { + ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n); + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + + res = FormRetrieveIndexResult(&(s->currentItemData), &(it->t_tid)); + + ReleaseBuffer(b); + return (res); + } + else + { + stk = (GISTSTACK *) palloc(sizeof(GISTSTACK)); + stk->gs_child = n; + stk->gs_blk = BufferGetBlockNumber(b); + stk->gs_parent = so->s_stack; + so->s_stack = stk; + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + blk = ItemPointerGetBlockNumber(&(it->t_tid)); + + ReleaseBuffer(b); + b = ReadBuffer(s->relation, blk); + p = BufferGetPage(b); + po = (GISTPageOpaque) PageGetSpecialPointer(p); + + if (ScanDirectionIsBackward(dir)) + { + n = PageGetMaxOffsetNumber(p); + } + else + { + n = FirstOffsetNumber; + } + } } - } } /* Similar to index_keytest, but decompresses the key in the IndexTuple */ -static bool +static bool gistindex_keytest(IndexTuple tuple, - TupleDesc tupdesc, - int scanKeySize, - ScanKey key, - GISTSTATE *giststate, - Relation r, - Page p, - OffsetNumber offset) + TupleDesc tupdesc, + int scanKeySize, + ScanKey key, + GISTSTATE * giststate, + Relation r, + Page p, + OffsetNumber offset) { - bool isNull; - Datum datum; - int test; - GISTENTRY de; - - IncrIndexProcessed(); - - - while (scanKeySize > 0) { - datum = index_getattr(tuple, - 1, - tupdesc, - &isNull); - gistdentryinit(giststate, &de, (char *)datum, r, p, offset, - IndexTupleSize(tuple) - sizeof(IndexTupleData), - FALSE); - - if (isNull) { - /* XXX eventually should check if SK_ISNULL */ - return (false); - } - - if (key[0].sk_flags & SK_COMMUTE) { - test = (*(key[0].sk_func)) - (DatumGetPointer(key[0].sk_argument), - &de, key[0].sk_procedure) ? 1 : 0; - } else { - test = (*(key[0].sk_func)) - (&de, - DatumGetPointer(key[0].sk_argument), - key[0].sk_procedure) ? 1 : 0; - } - - if (!test == !(key[0].sk_flags & SK_NEGATE)) { - return (false); + bool isNull; + Datum datum; + int test; + GISTENTRY de; + + IncrIndexProcessed(); + + + while (scanKeySize > 0) + { + datum = index_getattr(tuple, + 1, + tupdesc, + &isNull); + gistdentryinit(giststate, &de, (char *) datum, r, p, offset, + IndexTupleSize(tuple) - sizeof(IndexTupleData), + FALSE); + + if (isNull) + { + /* XXX eventually should check if SK_ISNULL */ + return (false); + } + + if (key[0].sk_flags & SK_COMMUTE) + { + test = (*(key[0].sk_func)) + (DatumGetPointer(key[0].sk_argument), + &de, key[0].sk_procedure) ? 1 : 0; + } + else + { + test = (*(key[0].sk_func)) + (&de, + DatumGetPointer(key[0].sk_argument), + key[0].sk_procedure) ? 1 : 0; + } + + if (!test == !(key[0].sk_flags & SK_NEGATE)) + { + return (false); + } + + scanKeySize -= 1; + key++; } - - scanKeySize -= 1; - key++; - } - - return (true); + + return (true); } -static OffsetNumber +static OffsetNumber gistfindnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir) { - OffsetNumber maxoff; - char *it; - GISTPageOpaque po; - GISTScanOpaque so; - GISTSTATE *giststate; - - maxoff = PageGetMaxOffsetNumber(p); - po = (GISTPageOpaque) PageGetSpecialPointer(p); - so = (GISTScanOpaque) s->opaque; - giststate = so->giststate; - - /* - * If we modified the index during the scan, we may have a pointer to - * a ghost tuple, before the scan. If this is the case, back up one. - */ - - if (so->s_flags & GS_CURBEFORE) { - so->s_flags &= ~GS_CURBEFORE; - n = OffsetNumberPrev(n); - } - - while (n >= FirstOffsetNumber && n <= maxoff) { - it = (char *) PageGetItem(p, PageGetItemId(p, n)); - if (gistindex_keytest((IndexTuple) it, - RelationGetTupleDescriptor(s->relation), - s->numberOfKeys, s->keyData, giststate, - s->relation, p, n)) - break; - - if (ScanDirectionIsBackward(dir)) { - n = OffsetNumberPrev(n); - } else { - n = OffsetNumberNext(n); + OffsetNumber maxoff; + char *it; + GISTPageOpaque po; + GISTScanOpaque so; + GISTSTATE *giststate; + + maxoff = PageGetMaxOffsetNumber(p); + po = (GISTPageOpaque) PageGetSpecialPointer(p); + so = (GISTScanOpaque) s->opaque; + giststate = so->giststate; + + /* + * If we modified the index during the scan, we may have a pointer to + * a ghost tuple, before the scan. If this is the case, back up one. + */ + + if (so->s_flags & GS_CURBEFORE) + { + so->s_flags &= ~GS_CURBEFORE; + n = OffsetNumberPrev(n); } - } - - return (n); + + while (n >= FirstOffsetNumber && n <= maxoff) + { + it = (char *) PageGetItem(p, PageGetItemId(p, n)); + if (gistindex_keytest((IndexTuple) it, + RelationGetTupleDescriptor(s->relation), + s->numberOfKeys, s->keyData, giststate, + s->relation, p, n)) + break; + + if (ScanDirectionIsBackward(dir)) + { + n = OffsetNumberPrev(n); + } + else + { + n = OffsetNumberNext(n); + } + } + + return (n); } -static RetrieveIndexResult +static RetrieveIndexResult gistscancache(IndexScanDesc s, ScanDirection dir) { - RetrieveIndexResult res; - ItemPointer ip; - - if (!(ScanDirectionIsNoMovement(dir) - && ItemPointerIsValid(&(s->currentItemData)))) { - - return ((RetrieveIndexResult) NULL); - } - - ip = gistheapptr(s->relation, &(s->currentItemData)); - - if (ItemPointerIsValid(ip)) - res = FormRetrieveIndexResult(&(s->currentItemData), ip); - else - res = (RetrieveIndexResult) NULL; - - pfree (ip); - - return (res); + RetrieveIndexResult res; + ItemPointer ip; + + if (!(ScanDirectionIsNoMovement(dir) + && ItemPointerIsValid(&(s->currentItemData)))) + { + + return ((RetrieveIndexResult) NULL); + } + + ip = gistheapptr(s->relation, &(s->currentItemData)); + + if (ItemPointerIsValid(ip)) + res = FormRetrieveIndexResult(&(s->currentItemData), ip); + else + res = (RetrieveIndexResult) NULL; + + pfree(ip); + + return (res); } /* - * gistheapptr returns the item pointer to the tuple in the heap relation - * for which itemp is the index relation item pointer. + * gistheapptr returns the item pointer to the tuple in the heap relation + * for which itemp is the index relation item pointer. */ -static ItemPointer +static ItemPointer gistheapptr(Relation r, ItemPointer itemp) { - Buffer b; - Page p; - IndexTuple it; - ItemPointer ip; - OffsetNumber n; - - ip = (ItemPointer) palloc(sizeof(ItemPointerData)); - if (ItemPointerIsValid(itemp)) { - b = ReadBuffer(r, ItemPointerGetBlockNumber(itemp)); - p = BufferGetPage(b); - n = ItemPointerGetOffsetNumber(itemp); - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - memmove((char *) ip, (char *) &(it->t_tid), - sizeof(ItemPointerData)); - ReleaseBuffer(b); - } else { - ItemPointerSetInvalid(ip); - } - - return (ip); + Buffer b; + Page p; + IndexTuple it; + ItemPointer ip; + OffsetNumber n; + + ip = (ItemPointer) palloc(sizeof(ItemPointerData)); + if (ItemPointerIsValid(itemp)) + { + b = ReadBuffer(r, ItemPointerGetBlockNumber(itemp)); + p = BufferGetPage(b); + n = ItemPointerGetOffsetNumber(itemp); + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + memmove((char *) ip, (char *) &(it->t_tid), + sizeof(ItemPointerData)); + ReleaseBuffer(b); + } + else + { + ItemPointerSetInvalid(ip); + } + + return (ip); } diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index c877538472..ec680558d8 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -1,11 +1,11 @@ /*------------------------------------------------------------------------- * * gistscan.c-- - * routines to manage scans on index relations + * routines to manage scans on index relations * * * IDENTIFICATION - * /usr/local/devel/pglite/cvs/src/backend/access/gist/gistscan.c,v 1.7 1995/06/14 00:10:05 jolly Exp + * /usr/local/devel/pglite/cvs/src/backend/access/gist/gistscan.c,v 1.7 1995/06/14 00:10:05 jolly Exp * *------------------------------------------------------------------------- */ @@ -18,375 +18,411 @@ #include <access/rtree.h> #include <storage/bufmgr.h> #include <access/giststrat.h> -#include <storage/lmgr.h> +#include <storage/lmgr.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif /* routines defined and used here */ -static void gistregscan(IndexScanDesc s); -static void gistdropscan(IndexScanDesc s); -static void gistadjone(IndexScanDesc s, int op, BlockNumber blkno, - OffsetNumber offnum); -static void adjuststack(GISTSTACK *stk, BlockNumber blkno, +static void gistregscan(IndexScanDesc s); +static void gistdropscan(IndexScanDesc s); +static void +gistadjone(IndexScanDesc s, int op, BlockNumber blkno, + OffsetNumber offnum); +static void +adjuststack(GISTSTACK * stk, BlockNumber blkno, OffsetNumber offnum); -static void adjustiptr(IndexScanDesc s, ItemPointer iptr, - int op, BlockNumber blkno, OffsetNumber offnum); +static void +adjustiptr(IndexScanDesc s, ItemPointer iptr, + int op, BlockNumber blkno, OffsetNumber offnum); /* - * Whenever we start a GiST scan in a backend, we register it in private - * space. Then if the GiST index gets updated, we check all registered - * scans and adjust them if the tuple they point at got moved by the - * update. We only need to do this in private space, because when we update - * an GiST we have a write lock on the tree, so no other process can have - * any locks at all on it. A single transaction can have write and read - * locks on the same object, so that's why we need to handle this case. + * Whenever we start a GiST scan in a backend, we register it in private + * space. Then if the GiST index gets updated, we check all registered + * scans and adjust them if the tuple they point at got moved by the + * update. We only need to do this in private space, because when we update + * an GiST we have a write lock on the tree, so no other process can have + * any locks at all on it. A single transaction can have write and read + * locks on the same object, so that's why we need to handle this case. */ -typedef struct GISTScanListData { - IndexScanDesc gsl_scan; - struct GISTScanListData *gsl_next; -} GISTScanListData; +typedef struct GISTScanListData +{ + IndexScanDesc gsl_scan; + struct GISTScanListData *gsl_next; +} GISTScanListData; -typedef GISTScanListData *GISTScanList; +typedef GISTScanListData *GISTScanList; /* pointer to list of local scans on GiSTs */ static GISTScanList GISTScans = (GISTScanList) NULL; - + IndexScanDesc gistbeginscan(Relation r, - bool fromEnd, - uint16 nkeys, - ScanKey key) + bool fromEnd, + uint16 nkeys, + ScanKey key) { - IndexScanDesc s; - - RelationSetLockForRead(r); - s = RelationGetIndexScan(r, fromEnd, nkeys, key); - gistregscan(s); - - return (s); + IndexScanDesc s; + + RelationSetLockForRead(r); + s = RelationGetIndexScan(r, fromEnd, nkeys, key); + gistregscan(s); + + return (s); } void gistrescan(IndexScanDesc s, bool fromEnd, ScanKey key) { - GISTScanOpaque p; - int i; - - if (!IndexScanIsValid(s)) { - elog(WARN, "gistrescan: invalid scan."); - return; - } - - /* - * Clear all the pointers. - */ - - ItemPointerSetInvalid(&s->previousItemData); - ItemPointerSetInvalid(&s->currentItemData); - ItemPointerSetInvalid(&s->nextItemData); - ItemPointerSetInvalid(&s->previousMarkData); - ItemPointerSetInvalid(&s->currentMarkData); - ItemPointerSetInvalid(&s->nextMarkData); - - /* - * Set flags. - */ - if (RelationGetNumberOfBlocks(s->relation) == 0) { - s->flags = ScanUnmarked; - } else if (fromEnd) { - s->flags = ScanUnmarked | ScanUncheckedPrevious; - } else { - s->flags = ScanUnmarked | ScanUncheckedNext; - } - - s->scanFromEnd = fromEnd; - - if (s->numberOfKeys > 0) { - memmove(s->keyData, - key, - s->numberOfKeys * sizeof(ScanKeyData)); - } - - p = (GISTScanOpaque) s->opaque; - if (p != (GISTScanOpaque) NULL) { - gistfreestack(p->s_stack); - gistfreestack(p->s_markstk); - p->s_stack = p->s_markstk = (GISTSTACK *) NULL; - p->s_flags = 0x0; - for (i = 0; i < s->numberOfKeys; i++) + GISTScanOpaque p; + int i; + + if (!IndexScanIsValid(s)) { - s->keyData[i].sk_procedure - = RelationGetGISTStrategy(s->relation, s->keyData[i].sk_attno, - s->keyData[i].sk_procedure); - s->keyData[i].sk_func = p->giststate->consistentFn; + elog(WARN, "gistrescan: invalid scan."); + return; + } + + /* + * Clear all the pointers. + */ + + ItemPointerSetInvalid(&s->previousItemData); + ItemPointerSetInvalid(&s->currentItemData); + ItemPointerSetInvalid(&s->nextItemData); + ItemPointerSetInvalid(&s->previousMarkData); + ItemPointerSetInvalid(&s->currentMarkData); + ItemPointerSetInvalid(&s->nextMarkData); + + /* + * Set flags. + */ + if (RelationGetNumberOfBlocks(s->relation) == 0) + { + s->flags = ScanUnmarked; + } + else if (fromEnd) + { + s->flags = ScanUnmarked | ScanUncheckedPrevious; + } + else + { + s->flags = ScanUnmarked | ScanUncheckedNext; + } + + s->scanFromEnd = fromEnd; + + if (s->numberOfKeys > 0) + { + memmove(s->keyData, + key, + s->numberOfKeys * sizeof(ScanKeyData)); + } + + p = (GISTScanOpaque) s->opaque; + if (p != (GISTScanOpaque) NULL) + { + gistfreestack(p->s_stack); + gistfreestack(p->s_markstk); + p->s_stack = p->s_markstk = (GISTSTACK *) NULL; + p->s_flags = 0x0; + for (i = 0; i < s->numberOfKeys; i++) + { + s->keyData[i].sk_procedure + = RelationGetGISTStrategy(s->relation, s->keyData[i].sk_attno, + s->keyData[i].sk_procedure); + s->keyData[i].sk_func = p->giststate->consistentFn; + } + } + else + { + /* initialize opaque data */ + p = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData)); + p->s_stack = p->s_markstk = (GISTSTACK *) NULL; + p->s_flags = 0x0; + s->opaque = p; + p->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE)); + initGISTstate(p->giststate, s->relation); + if (s->numberOfKeys > 0) + + /* + * * Play games here with the scan key to use the Consistent * + * function for all comparisons: * 1) the sk_procedure field + * will now be used to hold the * strategy number * 2) the + * sk_func field will point to the Consistent function + */ + for (i = 0; i < s->numberOfKeys; i++) + { + + /* + * s->keyData[i].sk_procedure = + * index_getprocid(s->relation, 1, GIST_CONSISTENT_PROC); + */ + s->keyData[i].sk_procedure + = RelationGetGISTStrategy(s->relation, s->keyData[i].sk_attno, + s->keyData[i].sk_procedure); + s->keyData[i].sk_func = p->giststate->consistentFn; + } } - } else { - /* initialize opaque data */ - p = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData)); - p->s_stack = p->s_markstk = (GISTSTACK *) NULL; - p->s_flags = 0x0; - s->opaque = p; - p->giststate = (GISTSTATE *)palloc(sizeof(GISTSTATE)); - initGISTstate(p->giststate, s->relation); - if (s->numberOfKeys > 0) - /* - ** Play games here with the scan key to use the Consistent - ** function for all comparisons: - ** 1) the sk_procedure field will now be used to hold the - ** strategy number - ** 2) the sk_func field will point to the Consistent function - */ - for (i = 0; i < s->numberOfKeys; i++) { - /* s->keyData[i].sk_procedure - = index_getprocid(s->relation, 1, GIST_CONSISTENT_PROC); */ - s->keyData[i].sk_procedure - = RelationGetGISTStrategy(s->relation, s->keyData[i].sk_attno, - s->keyData[i].sk_procedure); - s->keyData[i].sk_func = p->giststate->consistentFn; - } - } } void gistmarkpos(IndexScanDesc s) { - GISTScanOpaque p; - GISTSTACK *o, *n, *tmp; - - s->currentMarkData = s->currentItemData; - p = (GISTScanOpaque) s->opaque; - if (p->s_flags & GS_CURBEFORE) - p->s_flags |= GS_MRKBEFORE; - else - p->s_flags &= ~GS_MRKBEFORE; - - o = (GISTSTACK *) NULL; - n = p->s_stack; - - /* copy the parent stack from the current item data */ - while (n != (GISTSTACK *) NULL) { - tmp = (GISTSTACK *) palloc(sizeof(GISTSTACK)); - tmp->gs_child = n->gs_child; - tmp->gs_blk = n->gs_blk; - tmp->gs_parent = o; - o = tmp; - n = n->gs_parent; - } - - gistfreestack(p->s_markstk); - p->s_markstk = o; + GISTScanOpaque p; + GISTSTACK *o, + *n, + *tmp; + + s->currentMarkData = s->currentItemData; + p = (GISTScanOpaque) s->opaque; + if (p->s_flags & GS_CURBEFORE) + p->s_flags |= GS_MRKBEFORE; + else + p->s_flags &= ~GS_MRKBEFORE; + + o = (GISTSTACK *) NULL; + n = p->s_stack; + + /* copy the parent stack from the current item data */ + while (n != (GISTSTACK *) NULL) + { + tmp = (GISTSTACK *) palloc(sizeof(GISTSTACK)); + tmp->gs_child = n->gs_child; + tmp->gs_blk = n->gs_blk; + tmp->gs_parent = o; + o = tmp; + n = n->gs_parent; + } + + gistfreestack(p->s_markstk); + p->s_markstk = o; } void gistrestrpos(IndexScanDesc s) { - GISTScanOpaque p; - GISTSTACK *o, *n, *tmp; - - s->currentItemData = s->currentMarkData; - p = (GISTScanOpaque) s->opaque; - if (p->s_flags & GS_MRKBEFORE) - p->s_flags |= GS_CURBEFORE; - else - p->s_flags &= ~GS_CURBEFORE; - - o = (GISTSTACK *) NULL; - n = p->s_markstk; - - /* copy the parent stack from the current item data */ - while (n != (GISTSTACK *) NULL) { - tmp = (GISTSTACK *) palloc(sizeof(GISTSTACK)); - tmp->gs_child = n->gs_child; - tmp->gs_blk = n->gs_blk; - tmp->gs_parent = o; - o = tmp; - n = n->gs_parent; - } - - gistfreestack(p->s_stack); - p->s_stack = o; + GISTScanOpaque p; + GISTSTACK *o, + *n, + *tmp; + + s->currentItemData = s->currentMarkData; + p = (GISTScanOpaque) s->opaque; + if (p->s_flags & GS_MRKBEFORE) + p->s_flags |= GS_CURBEFORE; + else + p->s_flags &= ~GS_CURBEFORE; + + o = (GISTSTACK *) NULL; + n = p->s_markstk; + + /* copy the parent stack from the current item data */ + while (n != (GISTSTACK *) NULL) + { + tmp = (GISTSTACK *) palloc(sizeof(GISTSTACK)); + tmp->gs_child = n->gs_child; + tmp->gs_blk = n->gs_blk; + tmp->gs_parent = o; + o = tmp; + n = n->gs_parent; + } + + gistfreestack(p->s_stack); + p->s_stack = o; } void gistendscan(IndexScanDesc s) { - GISTScanOpaque p; - - p = (GISTScanOpaque) s->opaque; - - if (p != (GISTScanOpaque) NULL) { - gistfreestack(p->s_stack); - gistfreestack(p->s_markstk); - pfree (s->opaque); - } - - gistdropscan(s); - /* XXX don't unset read lock -- two-phase locking */ + GISTScanOpaque p; + + p = (GISTScanOpaque) s->opaque; + + if (p != (GISTScanOpaque) NULL) + { + gistfreestack(p->s_stack); + gistfreestack(p->s_markstk); + pfree(s->opaque); + } + + gistdropscan(s); + /* XXX don't unset read lock -- two-phase locking */ } static void gistregscan(IndexScanDesc s) { - GISTScanList l; - - l = (GISTScanList) palloc(sizeof(GISTScanListData)); - l->gsl_scan = s; - l->gsl_next = GISTScans; - GISTScans = l; + GISTScanList l; + + l = (GISTScanList) palloc(sizeof(GISTScanListData)); + l->gsl_scan = s; + l->gsl_next = GISTScans; + GISTScans = l; } static void gistdropscan(IndexScanDesc s) { - GISTScanList l; - GISTScanList prev; - - prev = (GISTScanList) NULL; - - for (l = GISTScans; - l != (GISTScanList) NULL && l->gsl_scan != s; - l = l->gsl_next) { - prev = l; - } - - if (l == (GISTScanList) NULL) - elog(WARN, "GiST scan list corrupted -- cannot find 0x%lx", s); - - if (prev == (GISTScanList) NULL) - GISTScans = l->gsl_next; - else - prev->gsl_next = l->gsl_next; - - pfree(l); + GISTScanList l; + GISTScanList prev; + + prev = (GISTScanList) NULL; + + for (l = GISTScans; + l != (GISTScanList) NULL && l->gsl_scan != s; + l = l->gsl_next) + { + prev = l; + } + + if (l == (GISTScanList) NULL) + elog(WARN, "GiST scan list corrupted -- cannot find 0x%lx", s); + + if (prev == (GISTScanList) NULL) + GISTScans = l->gsl_next; + else + prev->gsl_next = l->gsl_next; + + pfree(l); } void gistadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum) { - GISTScanList l; - Oid relid; - - relid = r->rd_id; - for (l = GISTScans; l != (GISTScanList) NULL; l = l->gsl_next) { - if (l->gsl_scan->relation->rd_id == relid) - gistadjone(l->gsl_scan, op, blkno, offnum); - } + GISTScanList l; + Oid relid; + + relid = r->rd_id; + for (l = GISTScans; l != (GISTScanList) NULL; l = l->gsl_next) + { + if (l->gsl_scan->relation->rd_id == relid) + gistadjone(l->gsl_scan, op, blkno, offnum); + } } /* - * gistadjone() -- adjust one scan for update. + * gistadjone() -- adjust one scan for update. * - * By here, the scan passed in is on a modified relation. Op tells - * us what the modification is, and blkno and offind tell us what - * block and offset index were affected. This routine checks the - * current and marked positions, and the current and marked stacks, - * to see if any stored location needs to be changed because of the - * update. If so, we make the change here. + * By here, the scan passed in is on a modified relation. Op tells + * us what the modification is, and blkno and offind tell us what + * block and offset index were affected. This routine checks the + * current and marked positions, and the current and marked stacks, + * to see if any stored location needs to be changed because of the + * update. If so, we make the change here. */ static void gistadjone(IndexScanDesc s, - int op, - BlockNumber blkno, - OffsetNumber offnum) + int op, + BlockNumber blkno, + OffsetNumber offnum) { - GISTScanOpaque so; - - adjustiptr(s, &(s->currentItemData), op, blkno, offnum); - adjustiptr(s, &(s->currentMarkData), op, blkno, offnum); - - so = (GISTScanOpaque) s->opaque; - - if (op == GISTOP_SPLIT) { - adjuststack(so->s_stack, blkno, offnum); - adjuststack(so->s_markstk, blkno, offnum); - } + GISTScanOpaque so; + + adjustiptr(s, &(s->currentItemData), op, blkno, offnum); + adjustiptr(s, &(s->currentMarkData), op, blkno, offnum); + + so = (GISTScanOpaque) s->opaque; + + if (op == GISTOP_SPLIT) + { + adjuststack(so->s_stack, blkno, offnum); + adjuststack(so->s_markstk, blkno, offnum); + } } /* - * adjustiptr() -- adjust current and marked item pointers in the scan + * adjustiptr() -- adjust current and marked item pointers in the scan * - * Depending on the type of update and the place it happened, we - * need to do nothing, to back up one record, or to start over on - * the same page. + * Depending on the type of update and the place it happened, we + * need to do nothing, to back up one record, or to start over on + * the same page. */ static void adjustiptr(IndexScanDesc s, - ItemPointer iptr, - int op, - BlockNumber blkno, - OffsetNumber offnum) + ItemPointer iptr, + int op, + BlockNumber blkno, + OffsetNumber offnum) { - OffsetNumber curoff; - GISTScanOpaque so; - - if (ItemPointerIsValid(iptr)) { - if (ItemPointerGetBlockNumber(iptr) == blkno) { - curoff = ItemPointerGetOffsetNumber(iptr); - so = (GISTScanOpaque) s->opaque; - - switch (op) { - case GISTOP_DEL: - /* back up one if we need to */ - if (curoff >= offnum) { - - if (curoff > FirstOffsetNumber) { - /* just adjust the item pointer */ - ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff)); - } else { - /* remember that we're before the current tuple */ - ItemPointerSet(iptr, blkno, FirstOffsetNumber); - if (iptr == &(s->currentItemData)) - so->s_flags |= GS_CURBEFORE; - else - so->s_flags |= GS_MRKBEFORE; - } + OffsetNumber curoff; + GISTScanOpaque so; + + if (ItemPointerIsValid(iptr)) + { + if (ItemPointerGetBlockNumber(iptr) == blkno) + { + curoff = ItemPointerGetOffsetNumber(iptr); + so = (GISTScanOpaque) s->opaque; + + switch (op) + { + case GISTOP_DEL: + /* back up one if we need to */ + if (curoff >= offnum) + { + + if (curoff > FirstOffsetNumber) + { + /* just adjust the item pointer */ + ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff)); + } + else + { + /* remember that we're before the current tuple */ + ItemPointerSet(iptr, blkno, FirstOffsetNumber); + if (iptr == &(s->currentItemData)) + so->s_flags |= GS_CURBEFORE; + else + so->s_flags |= GS_MRKBEFORE; + } + } + break; + + case GISTOP_SPLIT: + /* back to start of page on split */ + ItemPointerSet(iptr, blkno, FirstOffsetNumber); + if (iptr == &(s->currentItemData)) + so->s_flags &= ~GS_CURBEFORE; + else + so->s_flags &= ~GS_MRKBEFORE; + break; + + default: + elog(WARN, "Bad operation in GiST scan adjust: %d", op); + } } - break; - - case GISTOP_SPLIT: - /* back to start of page on split */ - ItemPointerSet(iptr, blkno, FirstOffsetNumber); - if (iptr == &(s->currentItemData)) - so->s_flags &= ~GS_CURBEFORE; - else - so->s_flags &= ~GS_MRKBEFORE; - break; - - default: - elog(WARN, "Bad operation in GiST scan adjust: %d", op); - } } - } } /* - * adjuststack() -- adjust the supplied stack for a split on a page in - * the index we're scanning. + * adjuststack() -- adjust the supplied stack for a split on a page in + * the index we're scanning. * - * If a page on our parent stack has split, we need to back up to the - * beginning of the page and rescan it. The reason for this is that - * the split algorithm for GiSTs doesn't order tuples in any useful - * way on a single page. This means on that a split, we may wind up - * looking at some heap tuples more than once. This is handled in the - * access method update code for heaps; if we've modified the tuple we - * are looking at already in this transaction, we ignore the update - * request. + * If a page on our parent stack has split, we need to back up to the + * beginning of the page and rescan it. The reason for this is that + * the split algorithm for GiSTs doesn't order tuples in any useful + * way on a single page. This means on that a split, we may wind up + * looking at some heap tuples more than once. This is handled in the + * access method update code for heaps; if we've modified the tuple we + * are looking at already in this transaction, we ignore the update + * request. */ /*ARGSUSED*/ static void -adjuststack(GISTSTACK *stk, - BlockNumber blkno, - OffsetNumber offnum) +adjuststack(GISTSTACK * stk, + BlockNumber blkno, + OffsetNumber offnum) { - while (stk != (GISTSTACK *) NULL) { - if (stk->gs_blk == blkno) - stk->gs_child = FirstOffsetNumber; - - stk = stk->gs_parent; - } + while (stk != (GISTSTACK *) NULL) + { + if (stk->gs_blk == blkno) + stk->gs_child = FirstOffsetNumber; + + stk = stk->gs_parent; + } } diff --git a/src/backend/access/gist/giststrat.c b/src/backend/access/gist/giststrat.c index 8c78ccec3a..c7a6f9ff78 100644 --- a/src/backend/access/gist/giststrat.c +++ b/src/backend/access/gist/giststrat.c @@ -1,116 +1,117 @@ /*------------------------------------------------------------------------- * * giststrat.c-- - * strategy map data for GiSTs. + * strategy map data for GiSTs. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * /usr/local/devel/pglite/cvs/src/backend/access/gist/giststrat.c,v 1.4 1995/06/14 00:10:05 jolly Exp + * /usr/local/devel/pglite/cvs/src/backend/access/gist/giststrat.c,v 1.4 1995/06/14 00:10:05 jolly Exp * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/gist.h> #include <access/istrat.h> /* - * Note: negate, commute, and negatecommute all assume that operators are - * ordered as follows in the strategy map: + * Note: negate, commute, and negatecommute all assume that operators are + * ordered as follows in the strategy map: * - * contains, contained-by + * contains, contained-by * - * The negate, commute, and negatecommute arrays are used by the planner - * to plan indexed scans over data that appears in the qualificiation in - * a boolean negation, or whose operands appear in the wrong order. For - * example, if the operator "<%" means "contains", and the user says + * The negate, commute, and negatecommute arrays are used by the planner + * to plan indexed scans over data that appears in the qualificiation in + * a boolean negation, or whose operands appear in the wrong order. For + * example, if the operator "<%" means "contains", and the user says * - * where not rel.box <% "(10,10,20,20)"::box + * where not rel.box <% "(10,10,20,20)"::box * - * the planner can plan an index scan by noting that GiST indices have - * an operator in their operator class for negating <%. + * the planner can plan an index scan by noting that GiST indices have + * an operator in their operator class for negating <%. * - * Similarly, if the user says something like + * Similarly, if the user says something like * - * where "(10,10,20,20)"::box <% rel.box + * where "(10,10,20,20)"::box <% rel.box * - * the planner can see that the GiST index on rel.box has an operator in - * its opclass for commuting <%, and plan the scan using that operator. - * This added complexity in the access methods makes the planner a lot easier - * to write. + * the planner can see that the GiST index on rel.box has an operator in + * its opclass for commuting <%, and plan the scan using that operator. + * This added complexity in the access methods makes the planner a lot easier + * to write. */ /* if a op b, what operator tells us if (not a op b)? */ -static StrategyNumber GISTNegate[GISTNStrategies] = { - InvalidStrategy, - InvalidStrategy, - InvalidStrategy - }; +static StrategyNumber GISTNegate[GISTNStrategies] = { + InvalidStrategy, + InvalidStrategy, + InvalidStrategy +}; /* if a op_1 b, what is the operator op_2 such that b op_2 a? */ -static StrategyNumber GISTCommute[GISTNStrategies] = { - InvalidStrategy, - InvalidStrategy, - InvalidStrategy - }; +static StrategyNumber GISTCommute[GISTNStrategies] = { + InvalidStrategy, + InvalidStrategy, + InvalidStrategy +}; /* if a op_1 b, what is the operator op_2 such that (b !op_2 a)? */ -static StrategyNumber GISTNegateCommute[GISTNStrategies] = { - InvalidStrategy, - InvalidStrategy, - InvalidStrategy - }; +static StrategyNumber GISTNegateCommute[GISTNStrategies] = { + InvalidStrategy, + InvalidStrategy, + InvalidStrategy +}; /* - * GiSTs do not currently support TermData (see rtree/rtstrat.c for + * GiSTs do not currently support TermData (see rtree/rtstrat.c for * discussion of * TermData) -- such logic must be encoded in the user's Consistent function. */ /* - * If you were sufficiently attentive to detail, you would go through - * the ExpressionData pain above for every one of the strategies - * we defined. I am not. Now we declare the StrategyEvaluationData - * structure that gets shipped around to help the planner and the access - * method decide what sort of scan it should do, based on (a) what the - * user asked for, (b) what operators are defined for a particular opclass, - * and (c) the reams of information we supplied above. + * If you were sufficiently attentive to detail, you would go through + * the ExpressionData pain above for every one of the strategies + * we defined. I am not. Now we declare the StrategyEvaluationData + * structure that gets shipped around to help the planner and the access + * method decide what sort of scan it should do, based on (a) what the + * user asked for, (b) what operators are defined for a particular opclass, + * and (c) the reams of information we supplied above. * - * The idea of all of this initialized data is to make life easier on the - * user when he defines a new operator class to use this access method. - * By filling in all the data, we let him get away with leaving holes in his - * operator class, and still let him use the index. The added complexity - * in the access methods just isn't worth the trouble, though. + * The idea of all of this initialized data is to make life easier on the + * user when he defines a new operator class to use this access method. + * By filling in all the data, we let him get away with leaving holes in his + * operator class, and still let him use the index. The added complexity + * in the access methods just isn't worth the trouble, though. */ static StrategyEvaluationData GISTEvaluationData = { - GISTNStrategies, /* # of strategies */ - (StrategyTransformMap) GISTNegate, /* how to do (not qual) */ - (StrategyTransformMap) GISTCommute, /* how to swap operands */ - (StrategyTransformMap) GISTNegateCommute, /* how to do both */ - { NULL } + GISTNStrategies, /* # of strategies */ + (StrategyTransformMap) GISTNegate, /* how to do (not qual) */ + (StrategyTransformMap) GISTCommute, /* how to swap operands */ + (StrategyTransformMap) GISTNegateCommute, /* how to do both */ + {NULL} }; StrategyNumber RelationGetGISTStrategy(Relation r, - AttrNumber attnum, - RegProcedure proc) + AttrNumber attnum, + RegProcedure proc) { - return (RelationGetStrategy(r, attnum, &GISTEvaluationData, proc)); + return (RelationGetStrategy(r, attnum, &GISTEvaluationData, proc)); } #ifdef NOT_USED bool RelationInvokeGISTStrategy(Relation r, - AttrNumber attnum, - StrategyNumber s, - Datum left, - Datum right) + AttrNumber attnum, + StrategyNumber s, + Datum left, + Datum right) { - return (RelationInvokeStrategy(r, &GISTEvaluationData, attnum, s, - left, right)); + return (RelationInvokeStrategy(r, &GISTEvaluationData, attnum, s, + left, right)); } + #endif diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 89f81fc56a..e13539c4ad 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -1,16 +1,16 @@ /*------------------------------------------------------------------------- * * hash.c-- - * Implementation of Margo Seltzer's Hashing package for postgres. + * Implementation of Margo Seltzer's Hashing package for postgres. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.12 1997/01/10 09:46:13 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.13 1997/09/07 04:37:49 momjian Exp $ * * NOTES - * This file contains only the public interface routines. + * This file contains only the public interface routines. * *------------------------------------------------------------------------- */ @@ -26,452 +26,483 @@ #include <miscadmin.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -bool BuildingHash = false; +bool BuildingHash = false; /* - * hashbuild() -- build a new hash index. + * hashbuild() -- build a new hash index. * - * We use a global variable to record the fact that we're creating - * a new index. This is used to avoid high-concurrency locking, - * since the index won't be visible until this transaction commits - * and since building is guaranteed to be single-threaded. + * We use a global variable to record the fact that we're creating + * a new index. This is used to avoid high-concurrency locking, + * since the index won't be visible until this transaction commits + * and since building is guaranteed to be single-threaded. */ void hashbuild(Relation heap, - Relation index, - int natts, - AttrNumber *attnum, - IndexStrategy istrat, - uint16 pcount, - Datum *params, - FuncIndexInfo *finfo, - PredInfo *predInfo) + Relation index, + int natts, + AttrNumber * attnum, + IndexStrategy istrat, + uint16 pcount, + Datum * params, + FuncIndexInfo * finfo, + PredInfo * predInfo) { - HeapScanDesc hscan; - Buffer buffer; - HeapTuple htup; - IndexTuple itup; - TupleDesc htupdesc, itupdesc; - Datum *attdata; - bool *nulls; - InsertIndexResult res; - int nhtups, nitups; - int i; - HashItem hitem; + HeapScanDesc hscan; + Buffer buffer; + HeapTuple htup; + IndexTuple itup; + TupleDesc htupdesc, + itupdesc; + Datum *attdata; + bool *nulls; + InsertIndexResult res; + int nhtups, + nitups; + int i; + HashItem hitem; + #ifndef OMIT_PARTIAL_INDEX - ExprContext *econtext; - TupleTable tupleTable; - TupleTableSlot *slot; + ExprContext *econtext; + TupleTable tupleTable; + TupleTableSlot *slot; + #endif - Oid hrelid, irelid; - Node *pred, *oldPred; - - /* note that this is a new btree */ - BuildingHash = true; - - pred = predInfo->pred; - oldPred = predInfo->oldPred; - - /* initialize the hash index metadata page (if this is a new index) */ - if (oldPred == NULL) - _hash_metapinit(index); - - /* get tuple descriptors for heap and index relations */ - htupdesc = RelationGetTupleDescriptor(heap); - itupdesc = RelationGetTupleDescriptor(index); - - /* get space for data items that'll appear in the index tuple */ - attdata = (Datum *) palloc(natts * sizeof(Datum)); - nulls = (bool *) palloc(natts * sizeof(bool)); - - /* - * If this is a predicate (partial) index, we will need to evaluate the - * predicate using ExecQual, which requires the current tuple to be in a - * slot of a TupleTable. In addition, ExecQual must have an ExprContext - * referring to that slot. Here, we initialize dummy TupleTable and - * ExprContext objects for this purpose. --Nels, Feb '92 - */ + Oid hrelid, + irelid; + Node *pred, + *oldPred; + + /* note that this is a new btree */ + BuildingHash = true; + + pred = predInfo->pred; + oldPred = predInfo->oldPred; + + /* initialize the hash index metadata page (if this is a new index) */ + if (oldPred == NULL) + _hash_metapinit(index); + + /* get tuple descriptors for heap and index relations */ + htupdesc = RelationGetTupleDescriptor(heap); + itupdesc = RelationGetTupleDescriptor(index); + + /* get space for data items that'll appear in the index tuple */ + attdata = (Datum *) palloc(natts * sizeof(Datum)); + nulls = (bool *) palloc(natts * sizeof(bool)); + + /* + * If this is a predicate (partial) index, we will need to evaluate + * the predicate using ExecQual, which requires the current tuple to + * be in a slot of a TupleTable. In addition, ExecQual must have an + * ExprContext referring to that slot. Here, we initialize dummy + * TupleTable and ExprContext objects for this purpose. --Nels, Feb + * '92 + */ #ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) { - tupleTable = ExecCreateTupleTable(1); - slot = ExecAllocTableSlot(tupleTable); - econtext = makeNode(ExprContext); - FillDummyExprContext(econtext, slot, htupdesc, buffer); - } - else /* quiet the compiler */ + if (pred != NULL || oldPred != NULL) + { + tupleTable = ExecCreateTupleTable(1); + slot = ExecAllocTableSlot(tupleTable); + econtext = makeNode(ExprContext); + FillDummyExprContext(econtext, slot, htupdesc, buffer); + } + else +/* quiet the compiler */ { econtext = NULL; tupleTable = 0; slot = 0; } -#endif /* OMIT_PARTIAL_INDEX */ - - /* start a heap scan */ - hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); - htup = heap_getnext(hscan, 0, &buffer); - - /* build the index */ - nhtups = nitups = 0; - - for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) { - - nhtups++; - - /* - * If oldPred != NULL, this is an EXTEND INDEX command, so skip - * this tuple if it was already in the existing partial index - */ - if (oldPred != NULL) { - /*SetSlotContents(slot, htup); */ +#endif /* OMIT_PARTIAL_INDEX */ + + /* start a heap scan */ + hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); + htup = heap_getnext(hscan, 0, &buffer); + + /* build the index */ + nhtups = nitups = 0; + + for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) + { + + nhtups++; + + /* + * If oldPred != NULL, this is an EXTEND INDEX command, so skip + * this tuple if it was already in the existing partial index + */ + if (oldPred != NULL) + { + /* SetSlotContents(slot, htup); */ #ifndef OMIT_PARTIAL_INDEX - slot->val = htup; - if (ExecQual((List*)oldPred, econtext) == true) { + slot->val = htup; + if (ExecQual((List *) oldPred, econtext) == true) + { + nitups++; + continue; + } +#endif /* OMIT_PARTIAL_INDEX */ + } + + /* + * Skip this tuple if it doesn't satisfy the partial-index + * predicate + */ + if (pred != NULL) + { +#ifndef OMIT_PARTIAL_INDEX + /* SetSlotContents(slot, htup); */ + slot->val = htup; + if (ExecQual((List *) pred, econtext) == false) + continue; +#endif /* OMIT_PARTIAL_INDEX */ + } + nitups++; - continue; - } -#endif /* OMIT_PARTIAL_INDEX */ + + /* + * For the current heap tuple, extract all the attributes we use + * in this index, and note which are null. + */ + for (i = 1; i <= natts; i++) + { + int attoff; + bool attnull; + + /* + * Offsets are from the start of the tuple, and are + * zero-based; indices are one-based. The next call returns i + * - 1. That's data hiding for you. + */ + + /* attoff = i - 1 */ + attoff = AttrNumberGetAttrOffset(i); + + /* + * below, attdata[attoff] set to equal some datum & attnull is + * changed to indicate whether or not the attribute is null + * for this tuple + */ + attdata[attoff] = GetIndexValue(htup, + htupdesc, + attoff, + attnum, + finfo, + &attnull, + buffer); + nulls[attoff] = (attnull ? 'n' : ' '); + } + + /* form an index tuple and point it at the heap tuple */ + itup = index_formtuple(itupdesc, attdata, nulls); + + /* + * If the single index key is null, we don't insert it into the + * index. Hash tables support scans on '='. Relational algebra + * says that A = B returns null if either A or B is null. This + * means that no qualification used in an index scan could ever + * return true on a null attribute. It also means that indices + * can't be used by ISNULL or NOTNULL scans, but that's an + * artifact of the strategy map architecture chosen in 1986, not + * of the way nulls are handled here. + */ + + if (itup->t_info & INDEX_NULL_MASK) + { + pfree(itup); + continue; + } + + itup->t_tid = htup->t_ctid; + hitem = _hash_formitem(itup); + res = _hash_doinsert(index, hitem); + pfree(hitem); + pfree(itup); + pfree(res); } - - /* Skip this tuple if it doesn't satisfy the partial-index predicate */ - if (pred != NULL) { + + /* okay, all heap tuples are indexed */ + heap_endscan(hscan); + + if (pred != NULL || oldPred != NULL) + { #ifndef OMIT_PARTIAL_INDEX - /*SetSlotContents(slot, htup); */ - slot->val = htup; - if (ExecQual((List*)pred, econtext) == false) - continue; -#endif /* OMIT_PARTIAL_INDEX */ -} - - nitups++; - - /* - * For the current heap tuple, extract all the attributes - * we use in this index, and note which are null. - */ - for (i = 1; i <= natts; i++) { - int attoff; - bool attnull; - - /* - * Offsets are from the start of the tuple, and are - * zero-based; indices are one-based. The next call - * returns i - 1. That's data hiding for you. - */ - - /* attoff = i - 1 */ - attoff = AttrNumberGetAttrOffset(i); - - /* below, attdata[attoff] set to equal some datum & - * attnull is changed to indicate whether or not the attribute - * is null for this tuple - */ - attdata[attoff] = GetIndexValue(htup, - htupdesc, - attoff, - attnum, - finfo, - &attnull, - buffer); - nulls[attoff] = (attnull ? 'n' : ' '); + ExecDestroyTupleTable(tupleTable, true); + pfree(econtext); +#endif /* OMIT_PARTIAL_INDEX */ } - - /* form an index tuple and point it at the heap tuple */ - itup = index_formtuple(itupdesc, attdata, nulls); - + /* - * If the single index key is null, we don't insert it into - * the index. Hash tables support scans on '='. - * Relational algebra says that A = B - * returns null if either A or B is null. This - * means that no qualification used in an index scan could ever - * return true on a null attribute. It also means that indices - * can't be used by ISNULL or NOTNULL scans, but that's an - * artifact of the strategy map architecture chosen in 1986, not - * of the way nulls are handled here. + * Since we just counted the tuples in the heap, we update its stats + * in pg_class to guarantee that the planner takes advantage of the + * index we just created. Finally, only update statistics during + * normal index definitions, not for indices on system catalogs + * created during bootstrap processing. We must close the relations + * before updatings statistics to guarantee that the relcache entries + * are flushed when we increment the command counter in UpdateStats(). */ - - if (itup->t_info & INDEX_NULL_MASK) { - pfree(itup); - continue; - } - - itup->t_tid = htup->t_ctid; - hitem = _hash_formitem(itup); - res = _hash_doinsert(index, hitem); - pfree(hitem); - pfree(itup); - pfree(res); - } - - /* okay, all heap tuples are indexed */ - heap_endscan(hscan); - - if (pred != NULL || oldPred != NULL) { -#ifndef OMIT_PARTIAL_INDEX - ExecDestroyTupleTable(tupleTable, true); - pfree(econtext); -#endif /* OMIT_PARTIAL_INDEX */ - } - - /* - * Since we just counted the tuples in the heap, we update its - * stats in pg_class to guarantee that the planner takes advantage - * of the index we just created. Finally, only update statistics - * during normal index definitions, not for indices on system catalogs - * created during bootstrap processing. We must close the relations - * before updatings statistics to guarantee that the relcache entries - * are flushed when we increment the command counter in UpdateStats(). - */ - if (IsNormalProcessingMode()) + if (IsNormalProcessingMode()) { - hrelid = heap->rd_id; - irelid = index->rd_id; - heap_close(heap); - index_close(index); - UpdateStats(hrelid, nhtups, true); - UpdateStats(irelid, nitups, false); - if (oldPred != NULL) { - if (nitups == nhtups) pred = NULL; - UpdateIndexPredicate(irelid, oldPred, pred); - } + hrelid = heap->rd_id; + irelid = index->rd_id; + heap_close(heap); + index_close(index); + UpdateStats(hrelid, nhtups, true); + UpdateStats(irelid, nitups, false); + if (oldPred != NULL) + { + if (nitups == nhtups) + pred = NULL; + UpdateIndexPredicate(irelid, oldPred, pred); + } } - - /* be tidy */ - pfree(nulls); - pfree(attdata); - - /* all done */ - BuildingHash = false; + + /* be tidy */ + pfree(nulls); + pfree(attdata); + + /* all done */ + BuildingHash = false; } /* - * hashinsert() -- insert an index tuple into a hash table. + * hashinsert() -- insert an index tuple into a hash table. * - * Hash on the index tuple's key, find the appropriate location - * for the new tuple, put it there, and return an InsertIndexResult - * to the caller. + * Hash on the index tuple's key, find the appropriate location + * for the new tuple, put it there, and return an InsertIndexResult + * to the caller. */ InsertIndexResult -hashinsert(Relation rel, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation heapRel) +hashinsert(Relation rel, Datum * datum, char *nulls, ItemPointer ht_ctid, Relation heapRel) { - HashItem hitem; - IndexTuple itup; - InsertIndexResult res; - - - /* generate an index tuple */ - itup = index_formtuple(RelationGetTupleDescriptor(rel), datum, nulls); - itup->t_tid = *ht_ctid; - - if (itup->t_info & INDEX_NULL_MASK) - return ((InsertIndexResult) NULL); - - hitem = _hash_formitem(itup); - - res = _hash_doinsert(rel, hitem); - - pfree(hitem); - pfree(itup); - - return (res); + HashItem hitem; + IndexTuple itup; + InsertIndexResult res; + + + /* generate an index tuple */ + itup = index_formtuple(RelationGetTupleDescriptor(rel), datum, nulls); + itup->t_tid = *ht_ctid; + + if (itup->t_info & INDEX_NULL_MASK) + return ((InsertIndexResult) NULL); + + hitem = _hash_formitem(itup); + + res = _hash_doinsert(rel, hitem); + + pfree(hitem); + pfree(itup); + + return (res); } /* - * hashgettuple() -- Get the next tuple in the scan. + * hashgettuple() -- Get the next tuple in the scan. */ -char * +char * hashgettuple(IndexScanDesc scan, ScanDirection dir) { - RetrieveIndexResult res; - - /* - * If we've already initialized this scan, we can just advance it - * in the appropriate direction. If we haven't done so yet, we - * call a routine to get the first item in the scan. - */ - - if (ItemPointerIsValid(&(scan->currentItemData))) - res = _hash_next(scan, dir); - else - res = _hash_first(scan, dir); - - return ((char *) res); + RetrieveIndexResult res; + + /* + * If we've already initialized this scan, we can just advance it in + * the appropriate direction. If we haven't done so yet, we call a + * routine to get the first item in the scan. + */ + + if (ItemPointerIsValid(&(scan->currentItemData))) + res = _hash_next(scan, dir); + else + res = _hash_first(scan, dir); + + return ((char *) res); } /* - * hashbeginscan() -- start a scan on a hash index + * hashbeginscan() -- start a scan on a hash index */ -char * +char * hashbeginscan(Relation rel, - bool fromEnd, - uint16 keysz, - ScanKey scankey) + bool fromEnd, + uint16 keysz, + ScanKey scankey) { - IndexScanDesc scan; - HashScanOpaque so; - - scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey); - so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData)); - so->hashso_curbuf = so->hashso_mrkbuf = InvalidBuffer; - scan->opaque = so; - scan->flags = 0x0; - - /* register scan in case we change pages it's using */ - _hash_regscan(scan); - - return ((char *) scan); + IndexScanDesc scan; + HashScanOpaque so; + + scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey); + so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData)); + so->hashso_curbuf = so->hashso_mrkbuf = InvalidBuffer; + scan->opaque = so; + scan->flags = 0x0; + + /* register scan in case we change pages it's using */ + _hash_regscan(scan); + + return ((char *) scan); } /* - * hashrescan() -- rescan an index relation + * hashrescan() -- rescan an index relation */ void hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey) { - ItemPointer iptr; - HashScanOpaque so; - - so = (HashScanOpaque) scan->opaque; - - /* we hold a read lock on the current page in the scan */ - if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { - _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); - so->hashso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { - _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); - so->hashso_mrkbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - /* reset the scan key */ - if (scan->numberOfKeys > 0) { - memmove(scan->keyData, - scankey, - scan->numberOfKeys * sizeof(ScanKeyData)); - } + ItemPointer iptr; + HashScanOpaque so; + + so = (HashScanOpaque) scan->opaque; + + /* we hold a read lock on the current page in the scan */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) + { + _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); + so->hashso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) + { + _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); + so->hashso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* reset the scan key */ + if (scan->numberOfKeys > 0) + { + memmove(scan->keyData, + scankey, + scan->numberOfKeys * sizeof(ScanKeyData)); + } } /* - * hashendscan() -- close down a scan + * hashendscan() -- close down a scan */ void hashendscan(IndexScanDesc scan) { - - ItemPointer iptr; - HashScanOpaque so; - - so = (HashScanOpaque) scan->opaque; - - /* release any locks we still hold */ - if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { - _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); - so->hashso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { - if (BufferIsValid(so->hashso_mrkbuf)) - _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); - so->hashso_mrkbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - /* don't need scan registered anymore */ - _hash_dropscan(scan); - - /* be tidy */ - pfree (scan->opaque); + + ItemPointer iptr; + HashScanOpaque so; + + so = (HashScanOpaque) scan->opaque; + + /* release any locks we still hold */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) + { + _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); + so->hashso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) + { + if (BufferIsValid(so->hashso_mrkbuf)) + _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); + so->hashso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* don't need scan registered anymore */ + _hash_dropscan(scan); + + /* be tidy */ + pfree(scan->opaque); } /* - * hashmarkpos() -- save current scan position + * hashmarkpos() -- save current scan position * */ void hashmarkpos(IndexScanDesc scan) { - ItemPointer iptr; - HashScanOpaque so; - - /* see if we ever call this code. if we do, then so_mrkbuf a - * useful element in the scan->opaque structure. if this procedure - * is never called, so_mrkbuf should be removed from the scan->opaque - * structure. - */ - elog(NOTICE, "Hashmarkpos() called."); - - so = (HashScanOpaque) scan->opaque; - - /* release lock on old marked data, if any */ - if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { - _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); - so->hashso_mrkbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - /* bump lock on currentItemData and copy to currentMarkData */ - if (ItemPointerIsValid(&(scan->currentItemData))) { - so->hashso_mrkbuf = _hash_getbuf(scan->relation, - BufferGetBlockNumber(so->hashso_curbuf), - HASH_READ); - scan->currentMarkData = scan->currentItemData; - } + ItemPointer iptr; + HashScanOpaque so; + + /* + * see if we ever call this code. if we do, then so_mrkbuf a useful + * element in the scan->opaque structure. if this procedure is never + * called, so_mrkbuf should be removed from the scan->opaque + * structure. + */ + elog(NOTICE, "Hashmarkpos() called."); + + so = (HashScanOpaque) scan->opaque; + + /* release lock on old marked data, if any */ + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) + { + _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); + so->hashso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* bump lock on currentItemData and copy to currentMarkData */ + if (ItemPointerIsValid(&(scan->currentItemData))) + { + so->hashso_mrkbuf = _hash_getbuf(scan->relation, + BufferGetBlockNumber(so->hashso_curbuf), + HASH_READ); + scan->currentMarkData = scan->currentItemData; + } } /* - * hashrestrpos() -- restore scan to last saved position + * hashrestrpos() -- restore scan to last saved position */ void hashrestrpos(IndexScanDesc scan) { - ItemPointer iptr; - HashScanOpaque so; - - /* see if we ever call this code. if we do, then so_mrkbuf a - * useful element in the scan->opaque structure. if this procedure - * is never called, so_mrkbuf should be removed from the scan->opaque - * structure. - */ - elog(NOTICE, "Hashrestrpos() called."); - - so = (HashScanOpaque) scan->opaque; - - /* release lock on current data, if any */ - if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { - _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); - so->hashso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - /* bump lock on currentMarkData and copy to currentItemData */ - if (ItemPointerIsValid(&(scan->currentMarkData))) { - so->hashso_curbuf = - _hash_getbuf(scan->relation, - BufferGetBlockNumber(so->hashso_mrkbuf), - HASH_READ); - - scan->currentItemData = scan->currentMarkData; - } + ItemPointer iptr; + HashScanOpaque so; + + /* + * see if we ever call this code. if we do, then so_mrkbuf a useful + * element in the scan->opaque structure. if this procedure is never + * called, so_mrkbuf should be removed from the scan->opaque + * structure. + */ + elog(NOTICE, "Hashrestrpos() called."); + + so = (HashScanOpaque) scan->opaque; + + /* release lock on current data, if any */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) + { + _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); + so->hashso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* bump lock on currentMarkData and copy to currentItemData */ + if (ItemPointerIsValid(&(scan->currentMarkData))) + { + so->hashso_curbuf = + _hash_getbuf(scan->relation, + BufferGetBlockNumber(so->hashso_mrkbuf), + HASH_READ); + + scan->currentItemData = scan->currentMarkData; + } } /* stubs */ void hashdelete(Relation rel, ItemPointer tid) { - /* adjust any active scans that will be affected by this deletion */ - _hash_adjscans(rel, tid); - - /* delete the data from the page */ - _hash_pagedel(rel, tid); -} + /* adjust any active scans that will be affected by this deletion */ + _hash_adjscans(rel, tid); + /* delete the data from the page */ + _hash_pagedel(rel, tid); +} diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 5862800b21..a3cbaa1a94 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -1,17 +1,17 @@ /*------------------------------------------------------------------------- * * hashfunc.c-- - * Comparison functions for hash access method. + * Comparison functions for hash access method. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.3 1996/11/10 02:57:40 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.4 1997/09/07 04:37:53 momjian Exp $ * * NOTES - * These functions are stored in pg_amproc. For each operator class - * defined on hash tables, they compute the hash value of the argument. + * These functions are stored in pg_amproc. For each operator class + * defined on hash tables, they compute the hash value of the argument. * *------------------------------------------------------------------------- */ @@ -20,206 +20,223 @@ #include "access/hash.h" -uint32 hashint2(int16 key) +uint32 +hashint2(int16 key) { - return ((uint32) ~key); + return ((uint32) ~ key); } -uint32 hashint4(uint32 key) +uint32 +hashint4(uint32 key) { - return (~key); + return (~key); } /* Hash function from Chris Torek. */ -uint32 hashfloat4(float32 keyp) +uint32 +hashfloat4(float32 keyp) { - int len; - int loop; - uint32 h; - char *kp = (char *) keyp; + int len; + int loop; + uint32 h; + char *kp = (char *) keyp; - len = sizeof(float32data); + len = sizeof(float32data); -#define HASH4a h = (h << 5) - h + *kp++; -#define HASH4b h = (h << 5) + h + *kp++; +#define HASH4a h = (h << 5) - h + *kp++; +#define HASH4b h = (h << 5) + h + *kp++; #define HASH4 HASH4b - h = 0; - if (len > 0) { - loop = (len + 8 - 1) >> 3; - - switch (len & (8 - 1)) { - case 0: - do { /* All fall throughs */ - HASH4; - case 7: - HASH4; - case 6: - HASH4; - case 5: - HASH4; - case 4: - HASH4; - case 3: - HASH4; - case 2: - HASH4; - case 1: - HASH4; - } while (--loop); + h = 0; + if (len > 0) + { + loop = (len + 8 - 1) >> 3; + + switch (len & (8 - 1)) + { + case 0: + do + { /* All fall throughs */ + HASH4; + case 7: + HASH4; + case 6: + HASH4; + case 5: + HASH4; + case 4: + HASH4; + case 3: + HASH4; + case 2: + HASH4; + case 1: + HASH4; + } while (--loop); + } } - } - return (h); -} + return (h); +} -uint32 hashfloat8(float64 keyp) +uint32 +hashfloat8(float64 keyp) { - int len; - int loop; - uint32 h; - char *kp = (char *) keyp; + int len; + int loop; + uint32 h; + char *kp = (char *) keyp; - len = sizeof(float64data); + len = sizeof(float64data); -#define HASH4a h = (h << 5) - h + *kp++; -#define HASH4b h = (h << 5) + h + *kp++; +#define HASH4a h = (h << 5) - h + *kp++; +#define HASH4b h = (h << 5) + h + *kp++; #define HASH4 HASH4b - h = 0; - if (len > 0) { - loop = (len + 8 - 1) >> 3; - - switch (len & (8 - 1)) { - case 0: - do { /* All fall throughs */ - HASH4; - case 7: - HASH4; - case 6: - HASH4; - case 5: - HASH4; - case 4: - HASH4; - case 3: - HASH4; - case 2: - HASH4; - case 1: - HASH4; - } while (--loop); + h = 0; + if (len > 0) + { + loop = (len + 8 - 1) >> 3; + + switch (len & (8 - 1)) + { + case 0: + do + { /* All fall throughs */ + HASH4; + case 7: + HASH4; + case 6: + HASH4; + case 5: + HASH4; + case 4: + HASH4; + case 3: + HASH4; + case 2: + HASH4; + case 1: + HASH4; + } while (--loop); + } } - } - return (h); -} + return (h); +} -uint32 hashoid(Oid key) +uint32 +hashoid(Oid key) { - return ((uint32) ~key); + return ((uint32) ~ key); } -uint32 hashchar(char key) +uint32 +hashchar(char key) { - int len; - uint32 h; + int len; + uint32 h; + + len = sizeof(char); - len = sizeof(char); +#define PRIME1 37 +#define PRIME2 1048583 -#define PRIME1 37 -#define PRIME2 1048583 + h = 0; + /* Convert char to integer */ + h = h * PRIME1 ^ (key - ' '); + h %= PRIME2; - h = 0; - /* Convert char to integer */ - h = h * PRIME1 ^ (key - ' '); - h %= PRIME2; - - return (h); + return (h); } -uint32 hashchar2(uint16 intkey) +uint32 +hashchar2(uint16 intkey) { - uint32 h; - int len; - char *key = (char *) &intkey; - - h = 0; - len = sizeof(uint16); - /* Convert string to integer */ - while (len--) - h = h * PRIME1 ^ (*key++ - ' '); - h %= PRIME2; - - return (h); + uint32 h; + int len; + char *key = (char *) &intkey; + + h = 0; + len = sizeof(uint16); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); } -uint32 hashchar4(uint32 intkey) +uint32 +hashchar4(uint32 intkey) { - uint32 h; - int len; - char *key = (char *) &intkey; - - h = 0; - len = sizeof(uint32); - /* Convert string to integer */ - while (len--) - h = h * PRIME1 ^ (*key++ - ' '); - h %= PRIME2; - - return (h); + uint32 h; + int len; + char *key = (char *) &intkey; + + h = 0; + len = sizeof(uint32); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); } -uint32 hashchar8(char *key) +uint32 +hashchar8(char *key) { - uint32 h; - int len; - - h = 0; - len = sizeof(char8); - /* Convert string to integer */ - while (len--) - h = h * PRIME1 ^ (*key++ - ' '); - h %= PRIME2; - - return (h); + uint32 h; + int len; + + h = 0; + len = sizeof(char8); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); } -uint32 hashname(NameData *n) +uint32 +hashname(NameData * n) { - uint32 h; - int len; - char *key; - - key = n->data; - - h = 0; - len = NAMEDATALEN; - /* Convert string to integer */ - while (len--) - h = h * PRIME1 ^ (*key++ - ' '); - h %= PRIME2; - - return (h); + uint32 h; + int len; + char *key; + + key = n->data; + + h = 0; + len = NAMEDATALEN; + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); } -uint32 hashchar16(char *key) +uint32 +hashchar16(char *key) { - uint32 h; - int len; - - h = 0; - len = sizeof(char16); - /* Convert string to integer */ - while (len--) - h = h * PRIME1 ^ (*key++ - ' '); - h %= PRIME2; - - return (h); + uint32 h; + int len; + + h = 0; + len = sizeof(char16); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); } @@ -234,45 +251,49 @@ uint32 hashchar16(char *key) * * "OZ's original sdbm hash" */ -uint32 hashtext(struct varlena *key) +uint32 +hashtext(struct varlena * key) { - int keylen; - char *keydata; - uint32 n; - int loop; - - keydata = VARDATA(key); - keylen = VARSIZE(key); - - /* keylen includes the four bytes in which string keylength is stored */ - keylen -= sizeof(VARSIZE(key)); - -#define HASHC n = *keydata++ + 65599 * n - - n = 0; - if (keylen > 0) { - loop = (keylen + 8 - 1) >> 3; - - switch (keylen & (8 - 1)) { - case 0: - do { /* All fall throughs */ - HASHC; - case 7: - HASHC; - case 6: - HASHC; - case 5: - HASHC; - case 4: - HASHC; - case 3: - HASHC; - case 2: - HASHC; - case 1: - HASHC; - } while (--loop); + int keylen; + char *keydata; + uint32 n; + int loop; + + keydata = VARDATA(key); + keylen = VARSIZE(key); + + /* keylen includes the four bytes in which string keylength is stored */ + keylen -= sizeof(VARSIZE(key)); + +#define HASHC n = *keydata++ + 65599 * n + + n = 0; + if (keylen > 0) + { + loop = (keylen + 8 - 1) >> 3; + + switch (keylen & (8 - 1)) + { + case 0: + do + { /* All fall throughs */ + HASHC; + case 7: + HASHC; + case 6: + HASHC; + case 5: + HASHC; + case 4: + HASHC; + case 3: + HASHC; + case 2: + HASHC; + case 1: + HASHC; + } while (--loop); + } } - } - return (n); -} + return (n); +} diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c index f1233c68b2..4829093589 100644 --- a/src/backend/access/hash/hashinsert.c +++ b/src/backend/access/hash/hashinsert.c @@ -1,19 +1,19 @@ /*------------------------------------------------------------------------- * * hashinsert.c-- - * Item insertion in hash tables for Postgres. + * Item insertion in hash tables for Postgres. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.8 1997/08/12 22:51:30 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.9 1997/09/07 04:37:56 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <storage/bufmgr.h> #include <utils/memutils.h> @@ -22,211 +22,221 @@ static InsertIndexResult _hash_insertonpg(Relation rel, Buffer buf, int keysz, S static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, HashItem hitem); /* - * _hash_doinsert() -- Handle insertion of a single HashItem in the table. + * _hash_doinsert() -- Handle insertion of a single HashItem in the table. * - * This routine is called by the public interface routines, hashbuild - * and hashinsert. By here, hashitem is filled in, and has a unique - * (xid, seqno) pair. The datum to be used as a "key" is in the - * hashitem. + * This routine is called by the public interface routines, hashbuild + * and hashinsert. By here, hashitem is filled in, and has a unique + * (xid, seqno) pair. The datum to be used as a "key" is in the + * hashitem. */ InsertIndexResult _hash_doinsert(Relation rel, HashItem hitem) { - Buffer buf; - Buffer metabuf; - BlockNumber blkno; - HashMetaPage metap; - IndexTuple itup; - InsertIndexResult res; - ScanKey itup_scankey; - int natts; - Page page; - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - /* we need a scan key to do our search, so build one */ - itup = &(hitem->hash_itup); - if ((natts = rel->rd_rel->relnatts) != 1) - elog(WARN, "Hash indices valid for only one index key."); - itup_scankey = _hash_mkscankey(rel, itup, metap); - - /* - * find the first page in the bucket chain containing this key and - * place it in buf. _hash_search obtains a read lock for us. - */ - _hash_search(rel, natts, itup_scankey, &buf, metap); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE); - - /* - * trade in our read lock for a write lock so that we can do the - * insertion. - */ - blkno = BufferGetBlockNumber(buf); - _hash_relbuf(rel, buf, HASH_READ); - buf = _hash_getbuf(rel, blkno, HASH_WRITE); - - - /* - * XXX btree comment (haven't decided what to do in hash): don't - * think the bucket can be split while we're reading the metapage. - * - * If the page was split between the time that we surrendered our - * read lock and acquired our write lock, then this page may no - * longer be the right place for the key we want to insert. - */ - - /* do the insertion */ - res = _hash_insertonpg(rel, buf, natts, itup_scankey, - hitem, metabuf); - - /* be tidy */ - _hash_freeskey(itup_scankey); - - return (res); + Buffer buf; + Buffer metabuf; + BlockNumber blkno; + HashMetaPage metap; + IndexTuple itup; + InsertIndexResult res; + ScanKey itup_scankey; + int natts; + Page page; + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* we need a scan key to do our search, so build one */ + itup = &(hitem->hash_itup); + if ((natts = rel->rd_rel->relnatts) != 1) + elog(WARN, "Hash indices valid for only one index key."); + itup_scankey = _hash_mkscankey(rel, itup, metap); + + /* + * find the first page in the bucket chain containing this key and + * place it in buf. _hash_search obtains a read lock for us. + */ + _hash_search(rel, natts, itup_scankey, &buf, metap); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE); + + /* + * trade in our read lock for a write lock so that we can do the + * insertion. + */ + blkno = BufferGetBlockNumber(buf); + _hash_relbuf(rel, buf, HASH_READ); + buf = _hash_getbuf(rel, blkno, HASH_WRITE); + + + /* + * XXX btree comment (haven't decided what to do in hash): don't think + * the bucket can be split while we're reading the metapage. + * + * If the page was split between the time that we surrendered our read + * lock and acquired our write lock, then this page may no longer be + * the right place for the key we want to insert. + */ + + /* do the insertion */ + res = _hash_insertonpg(rel, buf, natts, itup_scankey, + hitem, metabuf); + + /* be tidy */ + _hash_freeskey(itup_scankey); + + return (res); } /* - * _hash_insertonpg() -- Insert a tuple on a particular page in the table. + * _hash_insertonpg() -- Insert a tuple on a particular page in the table. * - * This recursive procedure does the following things: + * This recursive procedure does the following things: * - * + if necessary, splits the target page. - * + inserts the tuple. + * + if necessary, splits the target page. + * + inserts the tuple. * - * On entry, we must have the right buffer on which to do the - * insertion, and the buffer must be pinned and locked. On return, - * we will have dropped both the pin and the write lock on the buffer. + * On entry, we must have the right buffer on which to do the + * insertion, and the buffer must be pinned and locked. On return, + * we will have dropped both the pin and the write lock on the buffer. * */ -static InsertIndexResult +static InsertIndexResult _hash_insertonpg(Relation rel, - Buffer buf, - int keysz, - ScanKey scankey, - HashItem hitem, - Buffer metabuf) + Buffer buf, + int keysz, + ScanKey scankey, + HashItem hitem, + Buffer metabuf) { - InsertIndexResult res; - Page page; - BlockNumber itup_blkno; - OffsetNumber itup_off; - int itemsz; - HashPageOpaque pageopaque; - bool do_expand = false; - Buffer ovflbuf; - HashMetaPage metap; - Bucket bucket; - - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); - bucket = pageopaque->hasho_bucket; - - itemsz = IndexTupleDSize(hitem->hash_itup) - + (sizeof(HashItemData) - sizeof(IndexTupleData)); - itemsz = DOUBLEALIGN(itemsz); - - while (PageGetFreeSpace(page) < itemsz) { - /* - * no space on this page; check for an overflow page - */ - if (BlockNumberIsValid(pageopaque->hasho_nextblkno)) { - /* - * ovfl page exists; go get it. if it doesn't have room, - * we'll find out next pass through the loop test above. - */ - ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno, - HASH_WRITE); - _hash_relbuf(rel, buf, HASH_WRITE); - buf = ovflbuf; - page = BufferGetPage(buf); - } else { - /* - * we're at the end of the bucket chain and we haven't - * found a page with enough room. allocate a new overflow - * page. - */ - do_expand = true; - ovflbuf = _hash_addovflpage(rel, &metabuf, buf); - _hash_relbuf(rel, buf, HASH_WRITE); - buf = ovflbuf; - page = BufferGetPage(buf); - - if (PageGetFreeSpace(page) < itemsz) { - /* it doesn't fit on an empty page -- give up */ - elog(WARN, "hash item too large"); - } - } - _hash_checkpage(page, LH_OVERFLOW_PAGE); + InsertIndexResult res; + Page page; + BlockNumber itup_blkno; + OffsetNumber itup_off; + int itemsz; + HashPageOpaque pageopaque; + bool do_expand = false; + Buffer ovflbuf; + HashMetaPage metap; + Bucket bucket; + + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); - Assert(pageopaque->hasho_bucket == bucket); - } - - itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem); - itup_blkno = BufferGetBlockNumber(buf); - - /* by here, the new tuple is inserted */ - res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); - - ItemPointerSet(&(res->pointerData), itup_blkno, itup_off); - - if (res != NULL) { - /* - * Increment the number of keys in the table. - * We switch lock access type just for a moment - * to allow greater accessibility to the metapage. - */ - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, - HASH_READ, HASH_WRITE); - metap->hashm_nkeys += 1; - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, - HASH_WRITE, HASH_READ); - - } - - _hash_wrtbuf(rel, buf); - - if (do_expand || - (metap->hashm_nkeys / (metap->hashm_maxbucket + 1)) - > metap->hashm_ffactor) { - _hash_expandtable(rel, metabuf); - } - _hash_relbuf(rel, metabuf, HASH_READ); - return (res); -} + bucket = pageopaque->hasho_bucket; + + itemsz = IndexTupleDSize(hitem->hash_itup) + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + itemsz = DOUBLEALIGN(itemsz); + + while (PageGetFreeSpace(page) < itemsz) + { + + /* + * no space on this page; check for an overflow page + */ + if (BlockNumberIsValid(pageopaque->hasho_nextblkno)) + { + + /* + * ovfl page exists; go get it. if it doesn't have room, + * we'll find out next pass through the loop test above. + */ + ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno, + HASH_WRITE); + _hash_relbuf(rel, buf, HASH_WRITE); + buf = ovflbuf; + page = BufferGetPage(buf); + } + else + { + + /* + * we're at the end of the bucket chain and we haven't found a + * page with enough room. allocate a new overflow page. + */ + do_expand = true; + ovflbuf = _hash_addovflpage(rel, &metabuf, buf); + _hash_relbuf(rel, buf, HASH_WRITE); + buf = ovflbuf; + page = BufferGetPage(buf); + + if (PageGetFreeSpace(page) < itemsz) + { + /* it doesn't fit on an empty page -- give up */ + elog(WARN, "hash item too large"); + } + } + _hash_checkpage(page, LH_OVERFLOW_PAGE); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(pageopaque->hasho_bucket == bucket); + } + + itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem); + itup_blkno = BufferGetBlockNumber(buf); + + /* by here, the new tuple is inserted */ + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + + ItemPointerSet(&(res->pointerData), itup_blkno, itup_off); + + if (res != NULL) + { + + /* + * Increment the number of keys in the table. We switch lock + * access type just for a moment to allow greater accessibility to + * the metapage. + */ + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, + HASH_READ, HASH_WRITE); + metap->hashm_nkeys += 1; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, + HASH_WRITE, HASH_READ); + + } + + _hash_wrtbuf(rel, buf); + + if (do_expand || + (metap->hashm_nkeys / (metap->hashm_maxbucket + 1)) + > metap->hashm_ffactor) + { + _hash_expandtable(rel, metabuf); + } + _hash_relbuf(rel, metabuf, HASH_READ); + return (res); +} /* - * _hash_pgaddtup() -- add a tuple to a particular page in the index. + * _hash_pgaddtup() -- add a tuple to a particular page in the index. * - * This routine adds the tuple to the page as requested, and keeps the - * write lock and reference associated with the page's buffer. It is - * an error to call pgaddtup() without a write lock and reference. + * This routine adds the tuple to the page as requested, and keeps the + * write lock and reference associated with the page's buffer. It is + * an error to call pgaddtup() without a write lock and reference. */ -static OffsetNumber +static OffsetNumber _hash_pgaddtup(Relation rel, - Buffer buf, - int keysz, - ScanKey itup_scankey, - Size itemsize, - HashItem hitem) + Buffer buf, + int keysz, + ScanKey itup_scankey, + Size itemsize, + HashItem hitem) { - OffsetNumber itup_off; - Page page; - - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - - itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); - PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED); - - /* write the buffer, but hold our lock */ - _hash_wrtnorelbuf(rel, buf); - - return (itup_off); + OffsetNumber itup_off; + Page page; + + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + + itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED); + + /* write the buffer, but hold our lock */ + _hash_wrtnorelbuf(rel, buf); + + return (itup_off); } diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c index d976c4818c..b6882d4d3e 100644 --- a/src/backend/access/hash/hashovfl.c +++ b/src/backend/access/hash/hashovfl.c @@ -1,400 +1,423 @@ /*------------------------------------------------------------------------- * * hashovfl.c-- - * Overflow page management code for the Postgres hash access method + * Overflow page management code for the Postgres hash access method * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.9 1997/08/12 22:51:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.10 1997/09/07 04:37:57 momjian Exp $ * * NOTES - * Overflow pages look like ordinary relation pages. + * Overflow pages look like ordinary relation pages. * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <storage/bufmgr.h> #include <utils/memutils.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static OverflowPageAddress _hash_getovfladdr(Relation rel, Buffer *metabufp); -static uint32 _hash_firstfreebit(uint32 map); +static OverflowPageAddress _hash_getovfladdr(Relation rel, Buffer * metabufp); +static uint32 _hash_firstfreebit(uint32 map); /* - * _hash_addovflpage + * _hash_addovflpage + * + * Add an overflow page to the page currently pointed to by the buffer + * argument 'buf'. * - * Add an overflow page to the page currently pointed to by the buffer - * argument 'buf'. + * *Metabufp has a read lock upon entering the function; buf has a + * write lock. * - * *Metabufp has a read lock upon entering the function; buf has a - * write lock. - * */ Buffer -_hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf) +_hash_addovflpage(Relation rel, Buffer * metabufp, Buffer buf) { - - OverflowPageAddress oaddr; - BlockNumber ovflblkno; - Buffer ovflbuf; - HashMetaPage metap; - HashPageOpaque ovflopaque; - HashPageOpaque pageopaque; - Page page; - Page ovflpage; - - /* this had better be the last page in a bucket chain */ - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); - Assert(!BlockNumberIsValid(pageopaque->hasho_nextblkno)); - - metap = (HashMetaPage) BufferGetPage(*metabufp); - _hash_checkpage((Page) metap, LH_META_PAGE); - - /* allocate an empty overflow page */ - oaddr = _hash_getovfladdr(rel, metabufp); - if (oaddr == InvalidOvflAddress) { - elog(WARN, "_hash_addovflpage: problem with _hash_getovfladdr."); - } - ovflblkno = OADDR_TO_BLKNO(OADDR_OF(SPLITNUM(oaddr), OPAGENUM(oaddr))); - Assert(BlockNumberIsValid(ovflblkno)); - ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE); - Assert(BufferIsValid(ovflbuf)); - ovflpage = BufferGetPage(ovflbuf); - - /* initialize the new overflow page */ - _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf)); - ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); - ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf); - ovflopaque->hasho_nextblkno = InvalidBlockNumber; - ovflopaque->hasho_flag = LH_OVERFLOW_PAGE; - ovflopaque->hasho_oaddr = oaddr; - ovflopaque->hasho_bucket = pageopaque->hasho_bucket; - _hash_wrtnorelbuf(rel, ovflbuf); - - /* logically chain overflow page to previous page */ - pageopaque->hasho_nextblkno = ovflblkno; - _hash_wrtnorelbuf(rel, buf); - return (ovflbuf); + + OverflowPageAddress oaddr; + BlockNumber ovflblkno; + Buffer ovflbuf; + HashMetaPage metap; + HashPageOpaque ovflopaque; + HashPageOpaque pageopaque; + Page page; + Page ovflpage; + + /* this had better be the last page in a bucket chain */ + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(!BlockNumberIsValid(pageopaque->hasho_nextblkno)); + + metap = (HashMetaPage) BufferGetPage(*metabufp); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* allocate an empty overflow page */ + oaddr = _hash_getovfladdr(rel, metabufp); + if (oaddr == InvalidOvflAddress) + { + elog(WARN, "_hash_addovflpage: problem with _hash_getovfladdr."); + } + ovflblkno = OADDR_TO_BLKNO(OADDR_OF(SPLITNUM(oaddr), OPAGENUM(oaddr))); + Assert(BlockNumberIsValid(ovflblkno)); + ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE); + Assert(BufferIsValid(ovflbuf)); + ovflpage = BufferGetPage(ovflbuf); + + /* initialize the new overflow page */ + _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf)); + ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); + ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf); + ovflopaque->hasho_nextblkno = InvalidBlockNumber; + ovflopaque->hasho_flag = LH_OVERFLOW_PAGE; + ovflopaque->hasho_oaddr = oaddr; + ovflopaque->hasho_bucket = pageopaque->hasho_bucket; + _hash_wrtnorelbuf(rel, ovflbuf); + + /* logically chain overflow page to previous page */ + pageopaque->hasho_nextblkno = ovflblkno; + _hash_wrtnorelbuf(rel, buf); + return (ovflbuf); } /* - * _hash_getovfladdr() + * _hash_getovfladdr() * - * Find an available overflow page and return its address. + * Find an available overflow page and return its address. * - * When we enter this function, we have a read lock on *metabufp which - * we change to a write lock immediately. Before exiting, the write lock - * is exchanged for a read lock. + * When we enter this function, we have a read lock on *metabufp which + * we change to a write lock immediately. Before exiting, the write lock + * is exchanged for a read lock. * */ -static OverflowPageAddress -_hash_getovfladdr(Relation rel, Buffer *metabufp) +static OverflowPageAddress +_hash_getovfladdr(Relation rel, Buffer * metabufp) { - HashMetaPage metap; - Buffer mapbuf = 0; - BlockNumber blkno; - PageOffset offset; - OverflowPageAddress oaddr; - SplitNumber splitnum; - uint32 *freep = NULL; - uint32 max_free; - uint32 bit; - uint32 first_page; - uint32 free_bit; - uint32 free_page; - uint32 in_use_bits; - uint32 i, j; - - metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE); - - splitnum = metap->OVFL_POINT; - max_free = metap->SPARES[splitnum]; - - free_page = (max_free - 1) >> (metap->BSHIFT + BYTE_TO_BIT); - free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1); - - /* Look through all the free maps to find the first free block */ - first_page = metap->LAST_FREED >> (metap->BSHIFT + BYTE_TO_BIT); - for ( i = first_page; i <= free_page; i++ ) { - Page mappage; - - blkno = metap->hashm_mapp[i]; - mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE); - mappage = BufferGetPage(mapbuf); - _hash_checkpage(mappage, LH_BITMAP_PAGE); - freep = HashPageGetBitmap(mappage); - Assert(freep); - - if (i == free_page) - in_use_bits = free_bit; - else - in_use_bits = BMPGSZ_BIT(metap) - 1; - - if (i == first_page) { - bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1); - j = bit / BITS_PER_MAP; - bit = bit & ~(BITS_PER_MAP - 1); - } else { - bit = 0; - j = 0; + HashMetaPage metap; + Buffer mapbuf = 0; + BlockNumber blkno; + PageOffset offset; + OverflowPageAddress oaddr; + SplitNumber splitnum; + uint32 *freep = NULL; + uint32 max_free; + uint32 bit; + uint32 first_page; + uint32 free_bit; + uint32 free_page; + uint32 in_use_bits; + uint32 i, + j; + + metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE); + + splitnum = metap->OVFL_POINT; + max_free = metap->SPARES[splitnum]; + + free_page = (max_free - 1) >> (metap->BSHIFT + BYTE_TO_BIT); + free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1); + + /* Look through all the free maps to find the first free block */ + first_page = metap->LAST_FREED >> (metap->BSHIFT + BYTE_TO_BIT); + for (i = first_page; i <= free_page; i++) + { + Page mappage; + + blkno = metap->hashm_mapp[i]; + mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE); + mappage = BufferGetPage(mapbuf); + _hash_checkpage(mappage, LH_BITMAP_PAGE); + freep = HashPageGetBitmap(mappage); + Assert(freep); + + if (i == free_page) + in_use_bits = free_bit; + else + in_use_bits = BMPGSZ_BIT(metap) - 1; + + if (i == first_page) + { + bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1); + j = bit / BITS_PER_MAP; + bit = bit & ~(BITS_PER_MAP - 1); + } + else + { + bit = 0; + j = 0; + } + for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) + if (freep[j] != ALL_SET) + goto found; + } + + /* No Free Page Found - have to allocate a new page */ + metap->LAST_FREED = metap->SPARES[splitnum]; + metap->SPARES[splitnum]++; + offset = metap->SPARES[splitnum] - + (splitnum ? metap->SPARES[splitnum - 1] : 0); + +#define OVMSG "HASH: Out of overflow pages. Out of luck.\n" + + if (offset > SPLITMASK) + { + if (++splitnum >= NCACHED) + { + elog(WARN, OVMSG); + } + metap->OVFL_POINT = splitnum; + metap->SPARES[splitnum] = metap->SPARES[splitnum - 1]; + metap->SPARES[splitnum - 1]--; + offset = 0; } - for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) - if (freep[j] != ALL_SET) - goto found; - } - - /* No Free Page Found - have to allocate a new page */ - metap->LAST_FREED = metap->SPARES[splitnum]; - metap->SPARES[splitnum]++; - offset = metap->SPARES[splitnum] - - (splitnum ? metap->SPARES[splitnum - 1] : 0); - -#define OVMSG "HASH: Out of overflow pages. Out of luck.\n" - - if (offset > SPLITMASK) { - if (++splitnum >= NCACHED) { - elog(WARN, OVMSG); + + /* Check if we need to allocate a new bitmap page */ + if (free_bit == BMPGSZ_BIT(metap) - 1) + { + /* won't be needing old map page */ + + _hash_relbuf(rel, mapbuf, HASH_WRITE); + + free_page++; + if (free_page >= NCACHED) + { + elog(WARN, OVMSG); + } + + /* + * This is tricky. The 1 indicates that you want the new page + * allocated with 1 clear bit. Actually, you are going to + * allocate 2 pages from this map. The first is going to be the + * map page, the second is the overflow page we were looking for. + * The init_bitmap routine automatically, sets the first bit of + * itself to indicate that the bitmap itself is in use. We would + * explicitly set the second bit, but don't have to if we tell + * init_bitmap not to leave it clear in the first place. + */ + if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset), + 1, free_page)) + { + elog(WARN, "overflow_page: problem with _hash_initbitmap."); + } + metap->SPARES[splitnum]++; + offset++; + if (offset > SPLITMASK) + { + if (++splitnum >= NCACHED) + { + elog(WARN, OVMSG); + } + metap->OVFL_POINT = splitnum; + metap->SPARES[splitnum] = metap->SPARES[splitnum - 1]; + metap->SPARES[splitnum - 1]--; + offset = 0; + } } - metap->OVFL_POINT = splitnum; - metap->SPARES[splitnum] = metap->SPARES[splitnum-1]; - metap->SPARES[splitnum-1]--; - offset = 0; - } - - /* Check if we need to allocate a new bitmap page */ - if (free_bit == BMPGSZ_BIT(metap) - 1) { - /* won't be needing old map page */ - - _hash_relbuf(rel, mapbuf, HASH_WRITE); - - free_page++; - if (free_page >= NCACHED) { - elog(WARN, OVMSG); + else + { + + /* + * Free_bit addresses the last used bit. Bump it to address the + * first available bit. + */ + free_bit++; + SETBIT(freep, free_bit); + _hash_wrtbuf(rel, mapbuf); } - + + /* Calculate address of the new overflow page */ + oaddr = OADDR_OF(splitnum, offset); + _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ); + return (oaddr); + +found: + bit = bit + _hash_firstfreebit(freep[j]); + SETBIT(freep, bit); + _hash_wrtbuf(rel, mapbuf); + /* - * This is tricky. The 1 indicates that you want the new page - * allocated with 1 clear bit. Actually, you are going to - * allocate 2 pages from this map. The first is going to be - * the map page, the second is the overflow page we were - * looking for. The init_bitmap routine automatically, sets - * the first bit of itself to indicate that the bitmap itself - * is in use. We would explicitly set the second bit, but - * don't have to if we tell init_bitmap not to leave it clear - * in the first place. + * Bits are addressed starting with 0, but overflow pages are + * addressed beginning at 1. Bit is a bit addressnumber, so we need to + * increment it to convert it to a page number. */ - if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset), - 1, free_page)) { - elog(WARN, "overflow_page: problem with _hash_initbitmap."); + + bit = 1 + bit + (i * BMPGSZ_BIT(metap)); + if (bit >= metap->LAST_FREED) + { + metap->LAST_FREED = bit - 1; } - metap->SPARES[splitnum]++; - offset++; - if (offset > SPLITMASK) { - if (++splitnum >= NCACHED) { + + /* Calculate the split number for this page */ + for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++) + ; + offset = (i ? bit - metap->SPARES[i - 1] : bit); + if (offset >= SPLITMASK) + { elog(WARN, OVMSG); - } - metap->OVFL_POINT = splitnum; - metap->SPARES[splitnum] = metap->SPARES[splitnum-1]; - metap->SPARES[splitnum-1]--; - offset = 0; } - } else { - - /* - * Free_bit addresses the last used bit. Bump it to address - * the first available bit. - */ - free_bit++; - SETBIT(freep, free_bit); - _hash_wrtbuf(rel, mapbuf); - } - - /* Calculate address of the new overflow page */ - oaddr = OADDR_OF(splitnum, offset); - _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ); - return (oaddr); - - found: - bit = bit + _hash_firstfreebit(freep[j]); - SETBIT(freep, bit); - _hash_wrtbuf(rel, mapbuf); - - /* - * Bits are addressed starting with 0, but overflow pages are addressed - * beginning at 1. Bit is a bit addressnumber, so we need to increment - * it to convert it to a page number. - */ - - bit = 1 + bit + (i * BMPGSZ_BIT(metap)); - if (bit >= metap->LAST_FREED) { - metap->LAST_FREED = bit - 1; - } - - /* Calculate the split number for this page */ - for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++) - ; - offset = (i ? bit - metap->SPARES[i - 1] : bit); - if (offset >= SPLITMASK) { - elog(WARN, OVMSG); - } - - /* initialize this page */ - oaddr = OADDR_OF(i, offset); - _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ); - return (oaddr); + + /* initialize this page */ + oaddr = OADDR_OF(i, offset); + _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ); + return (oaddr); } /* - * _hash_firstfreebit() + * _hash_firstfreebit() + * + * Return the first bit that is not set in the argument 'map'. This + * function is used to find an available overflow page within a + * splitnumber. * - * Return the first bit that is not set in the argument 'map'. This - * function is used to find an available overflow page within a - * splitnumber. - * */ -static uint32 +static uint32 _hash_firstfreebit(uint32 map) { - uint32 i, mask; - - mask = 0x1; - for (i = 0; i < BITS_PER_MAP; i++) { - if (!(mask & map)) - return (i); - mask = mask << 1; - } - return (i); + uint32 i, + mask; + + mask = 0x1; + for (i = 0; i < BITS_PER_MAP; i++) + { + if (!(mask & map)) + return (i); + mask = mask << 1; + } + return (i); } /* - * _hash_freeovflpage() - + * _hash_freeovflpage() - * - * Mark this overflow page as free and return a buffer with - * the page that follows it (which may be defined as - * InvalidBuffer). + * Mark this overflow page as free and return a buffer with + * the page that follows it (which may be defined as + * InvalidBuffer). * */ Buffer _hash_freeovflpage(Relation rel, Buffer ovflbuf) { - HashMetaPage metap; - Buffer metabuf; - Buffer mapbuf; - BlockNumber prevblkno; - BlockNumber blkno; - BlockNumber nextblkno; - HashPageOpaque ovflopaque; - Page ovflpage; - Page mappage; - OverflowPageAddress addr; - SplitNumber splitnum; - uint32 *freep; - uint32 ovflpgno; - int32 bitmappage, bitmapbit; - Bucket bucket; - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - ovflpage = BufferGetPage(ovflbuf); - _hash_checkpage(ovflpage, LH_OVERFLOW_PAGE); - ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); - addr = ovflopaque->hasho_oaddr; - nextblkno = ovflopaque->hasho_nextblkno; - prevblkno = ovflopaque->hasho_prevblkno; - bucket = ovflopaque->hasho_bucket; - memset(ovflpage, 0, BufferGetPageSize(ovflbuf)); - _hash_wrtbuf(rel, ovflbuf); - - /* - * fix up the bucket chain. this is a doubly-linked list, so we - * must fix up the bucket chain members behind and ahead of the - * overflow page being deleted. - * - * XXX this should look like: - * - lock prev/next - * - modify/write prev/next (how to do write ordering with a - * doubly-linked list?) - * - unlock prev/next - */ - if (BlockNumberIsValid(prevblkno)) { - Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE); - Page prevpage = BufferGetPage(prevbuf); - HashPageOpaque prevopaque = - (HashPageOpaque) PageGetSpecialPointer(prevpage); - - _hash_checkpage(prevpage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - Assert(prevopaque->hasho_bucket == bucket); - prevopaque->hasho_nextblkno = nextblkno; - _hash_wrtbuf(rel, prevbuf); - } - if (BlockNumberIsValid(nextblkno)) { - Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE); - Page nextpage = BufferGetPage(nextbuf); - HashPageOpaque nextopaque = - (HashPageOpaque) PageGetSpecialPointer(nextpage); - - _hash_checkpage(nextpage, LH_OVERFLOW_PAGE); - Assert(nextopaque->hasho_bucket == bucket); - nextopaque->hasho_prevblkno = prevblkno; - _hash_wrtbuf(rel, nextbuf); - } - - /* - * Fix up the overflow page bitmap that tracks this particular - * overflow page. The bitmap can be found in the MetaPageData - * array element hashm_mapp[bitmappage]. - */ - splitnum = (addr >> SPLITSHIFT); - ovflpgno = - (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1; - - if (ovflpgno < metap->LAST_FREED) { - metap->LAST_FREED = ovflpgno; - } - - bitmappage = (ovflpgno >> (metap->BSHIFT + BYTE_TO_BIT)); - bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1); - - blkno = metap->hashm_mapp[bitmappage]; - mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE); - mappage = BufferGetPage(mapbuf); - _hash_checkpage(mappage, LH_BITMAP_PAGE); - freep = HashPageGetBitmap(mappage); - CLRBIT(freep, bitmapbit); - _hash_wrtbuf(rel, mapbuf); - - _hash_relbuf(rel, metabuf, HASH_WRITE); - - /* - * now instantiate the page that replaced this one, - * if it exists, and return that buffer with a write lock. - */ - if (BlockNumberIsValid(nextblkno)) { - return (_hash_getbuf(rel, nextblkno, HASH_WRITE)); - } else { - return (InvalidBuffer); - } + HashMetaPage metap; + Buffer metabuf; + Buffer mapbuf; + BlockNumber prevblkno; + BlockNumber blkno; + BlockNumber nextblkno; + HashPageOpaque ovflopaque; + Page ovflpage; + Page mappage; + OverflowPageAddress addr; + SplitNumber splitnum; + uint32 *freep; + uint32 ovflpgno; + int32 bitmappage, + bitmapbit; + Bucket bucket; + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + ovflpage = BufferGetPage(ovflbuf); + _hash_checkpage(ovflpage, LH_OVERFLOW_PAGE); + ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); + addr = ovflopaque->hasho_oaddr; + nextblkno = ovflopaque->hasho_nextblkno; + prevblkno = ovflopaque->hasho_prevblkno; + bucket = ovflopaque->hasho_bucket; + memset(ovflpage, 0, BufferGetPageSize(ovflbuf)); + _hash_wrtbuf(rel, ovflbuf); + + /* + * fix up the bucket chain. this is a doubly-linked list, so we must + * fix up the bucket chain members behind and ahead of the overflow + * page being deleted. + * + * XXX this should look like: - lock prev/next - modify/write prev/next + * (how to do write ordering with a doubly-linked list?) - unlock + * prev/next + */ + if (BlockNumberIsValid(prevblkno)) + { + Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE); + Page prevpage = BufferGetPage(prevbuf); + HashPageOpaque prevopaque = + (HashPageOpaque) PageGetSpecialPointer(prevpage); + + _hash_checkpage(prevpage, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + Assert(prevopaque->hasho_bucket == bucket); + prevopaque->hasho_nextblkno = nextblkno; + _hash_wrtbuf(rel, prevbuf); + } + if (BlockNumberIsValid(nextblkno)) + { + Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE); + Page nextpage = BufferGetPage(nextbuf); + HashPageOpaque nextopaque = + (HashPageOpaque) PageGetSpecialPointer(nextpage); + + _hash_checkpage(nextpage, LH_OVERFLOW_PAGE); + Assert(nextopaque->hasho_bucket == bucket); + nextopaque->hasho_prevblkno = prevblkno; + _hash_wrtbuf(rel, nextbuf); + } + + /* + * Fix up the overflow page bitmap that tracks this particular + * overflow page. The bitmap can be found in the MetaPageData array + * element hashm_mapp[bitmappage]. + */ + splitnum = (addr >> SPLITSHIFT); + ovflpgno = + (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1; + + if (ovflpgno < metap->LAST_FREED) + { + metap->LAST_FREED = ovflpgno; + } + + bitmappage = (ovflpgno >> (metap->BSHIFT + BYTE_TO_BIT)); + bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1); + + blkno = metap->hashm_mapp[bitmappage]; + mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE); + mappage = BufferGetPage(mapbuf); + _hash_checkpage(mappage, LH_BITMAP_PAGE); + freep = HashPageGetBitmap(mappage); + CLRBIT(freep, bitmapbit); + _hash_wrtbuf(rel, mapbuf); + + _hash_relbuf(rel, metabuf, HASH_WRITE); + + /* + * now instantiate the page that replaced this one, if it exists, and + * return that buffer with a write lock. + */ + if (BlockNumberIsValid(nextblkno)) + { + return (_hash_getbuf(rel, nextblkno, HASH_WRITE)); + } + else + { + return (InvalidBuffer); + } } /* - * _hash_initbitmap() - * - * Initialize a new bitmap page. The metapage has a write-lock upon - * entering the function. + * _hash_initbitmap() + * + * Initialize a new bitmap page. The metapage has a write-lock upon + * entering the function. * * 'pnum' is the OverflowPageAddress of the new bitmap page. * 'nbits' is how many bits to clear (i.e., make available) in the new @@ -404,211 +427,219 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf) * metapage's array of bitmap page OverflowPageAddresses. */ -#define INT_MASK ((1 << INT_TO_BIT) -1) +#define INT_MASK ((1 << INT_TO_BIT) -1) int32 _hash_initbitmap(Relation rel, - HashMetaPage metap, - int32 pnum, - int32 nbits, - int32 ndx) + HashMetaPage metap, + int32 pnum, + int32 nbits, + int32 ndx) { - Buffer buf; - BlockNumber blkno; - Page pg; - HashPageOpaque op; - uint32 *freep; - int clearbytes, clearints; - - blkno = OADDR_TO_BLKNO(pnum); - buf = _hash_getbuf(rel, blkno, HASH_WRITE); - pg = BufferGetPage(buf); - _hash_pageinit(pg, BufferGetPageSize(buf)); - op = (HashPageOpaque) PageGetSpecialPointer(pg); - op->hasho_oaddr = InvalidOvflAddress; - op->hasho_prevblkno = InvalidBlockNumber; - op->hasho_nextblkno = InvalidBlockNumber; - op->hasho_flag = LH_BITMAP_PAGE; - op->hasho_bucket = -1; - - freep = HashPageGetBitmap(pg); - - /* set all of the bits above 'nbits' to 1 */ - clearints = ((nbits - 1) >> INT_TO_BIT) + 1; - clearbytes = clearints << INT_TO_BYTE; - memset((char *) freep, 0, clearbytes); - memset(((char *) freep) + clearbytes, 0xFF, - BMPGSZ_BYTE(metap) - clearbytes); - freep[clearints - 1] = ALL_SET << (nbits & INT_MASK); - - /* bit 0 represents the new bitmap page */ - SETBIT(freep, 0); - - /* metapage already has a write lock */ - metap->hashm_nmaps++; - metap->hashm_mapp[ndx] = blkno; - - /* write out the new bitmap page (releasing its locks) */ - _hash_wrtbuf(rel, buf); - - return (0); + Buffer buf; + BlockNumber blkno; + Page pg; + HashPageOpaque op; + uint32 *freep; + int clearbytes, + clearints; + + blkno = OADDR_TO_BLKNO(pnum); + buf = _hash_getbuf(rel, blkno, HASH_WRITE); + pg = BufferGetPage(buf); + _hash_pageinit(pg, BufferGetPageSize(buf)); + op = (HashPageOpaque) PageGetSpecialPointer(pg); + op->hasho_oaddr = InvalidOvflAddress; + op->hasho_prevblkno = InvalidBlockNumber; + op->hasho_nextblkno = InvalidBlockNumber; + op->hasho_flag = LH_BITMAP_PAGE; + op->hasho_bucket = -1; + + freep = HashPageGetBitmap(pg); + + /* set all of the bits above 'nbits' to 1 */ + clearints = ((nbits - 1) >> INT_TO_BIT) + 1; + clearbytes = clearints << INT_TO_BYTE; + memset((char *) freep, 0, clearbytes); + memset(((char *) freep) + clearbytes, 0xFF, + BMPGSZ_BYTE(metap) - clearbytes); + freep[clearints - 1] = ALL_SET << (nbits & INT_MASK); + + /* bit 0 represents the new bitmap page */ + SETBIT(freep, 0); + + /* metapage already has a write lock */ + metap->hashm_nmaps++; + metap->hashm_mapp[ndx] = blkno; + + /* write out the new bitmap page (releasing its locks) */ + _hash_wrtbuf(rel, buf); + + return (0); } /* - * _hash_squeezebucket(rel, bucket) + * _hash_squeezebucket(rel, bucket) * - * Try to squeeze the tuples onto pages occuring earlier in the - * bucket chain in an attempt to free overflow pages. When we start - * the "squeezing", the page from which we start taking tuples (the - * "read" page) is the last bucket in the bucket chain and the page - * onto which we start squeezing tuples (the "write" page) is the - * first page in the bucket chain. The read page works backward and - * the write page works forward; the procedure terminates when the - * read page and write page are the same page. + * Try to squeeze the tuples onto pages occuring earlier in the + * bucket chain in an attempt to free overflow pages. When we start + * the "squeezing", the page from which we start taking tuples (the + * "read" page) is the last bucket in the bucket chain and the page + * onto which we start squeezing tuples (the "write" page) is the + * first page in the bucket chain. The read page works backward and + * the write page works forward; the procedure terminates when the + * read page and write page are the same page. */ void _hash_squeezebucket(Relation rel, - HashMetaPage metap, - Bucket bucket) + HashMetaPage metap, + Bucket bucket) { - Buffer wbuf; - Buffer rbuf = 0; - BlockNumber wblkno; - BlockNumber rblkno; - Page wpage; - Page rpage; - HashPageOpaque wopaque; - HashPageOpaque ropaque; - OffsetNumber woffnum; - OffsetNumber roffnum; - HashItem hitem; - int itemsz; - -/* elog(DEBUG, "_hash_squeezebucket: squeezing bucket %d", bucket); */ - - /* - * start squeezing into the base bucket page. - */ - wblkno = BUCKET_TO_BLKNO(bucket); - wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); - wpage = BufferGetPage(wbuf); - _hash_checkpage(wpage, LH_BUCKET_PAGE); - wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); - - /* - * if there aren't any overflow pages, there's nothing to squeeze. - */ - if (!BlockNumberIsValid(wopaque->hasho_nextblkno)) { - _hash_relbuf(rel, wbuf, HASH_WRITE); - return; - } - - /* - * find the last page in the bucket chain by starting at the base - * bucket page and working forward. - * - * XXX if chains tend to be long, we should probably move forward - * using HASH_READ and then _hash_chgbufaccess to HASH_WRITE when - * we reach the end. if they are short we probably don't care - * very much. if the hash function is working at all, they had - * better be short.. - */ - ropaque = wopaque; - do { - rblkno = ropaque->hasho_nextblkno; - if (ropaque != wopaque) { - _hash_relbuf(rel, rbuf, HASH_WRITE); - } - rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); - rpage = BufferGetPage(rbuf); - _hash_checkpage(rpage, LH_OVERFLOW_PAGE); - Assert(!PageIsEmpty(rpage)); - ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); - Assert(ropaque->hasho_bucket == bucket); - } while (BlockNumberIsValid(ropaque->hasho_nextblkno)); - - /* - * squeeze the tuples. - */ - roffnum = FirstOffsetNumber; - for(;;) { - hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum)); - itemsz = IndexTupleDSize(hitem->hash_itup) - + (sizeof(HashItemData) - sizeof(IndexTupleData)); - itemsz = DOUBLEALIGN(itemsz); - + Buffer wbuf; + Buffer rbuf = 0; + BlockNumber wblkno; + BlockNumber rblkno; + Page wpage; + Page rpage; + HashPageOpaque wopaque; + HashPageOpaque ropaque; + OffsetNumber woffnum; + OffsetNumber roffnum; + HashItem hitem; + int itemsz; + +/* elog(DEBUG, "_hash_squeezebucket: squeezing bucket %d", bucket); */ + /* - * walk up the bucket chain, looking for a page big enough for - * this item. + * start squeezing into the base bucket page. */ - while (PageGetFreeSpace(wpage) < itemsz) { - wblkno = wopaque->hasho_nextblkno; + wblkno = BUCKET_TO_BLKNO(bucket); + wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); + wpage = BufferGetPage(wbuf); + _hash_checkpage(wpage, LH_BUCKET_PAGE); + wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); - _hash_wrtbuf(rel, wbuf); - - if (!BlockNumberIsValid(wblkno) || (rblkno == wblkno)) { - _hash_wrtbuf(rel, rbuf); - /* wbuf is already released */ + /* + * if there aren't any overflow pages, there's nothing to squeeze. + */ + if (!BlockNumberIsValid(wopaque->hasho_nextblkno)) + { + _hash_relbuf(rel, wbuf, HASH_WRITE); return; - } - - wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); - wpage = BufferGetPage(wbuf); - _hash_checkpage(wpage, LH_OVERFLOW_PAGE); - Assert(!PageIsEmpty(wpage)); - wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); - Assert(wopaque->hasho_bucket == bucket); } - - /* - * if we're here, we have found room so insert on the "write" - * page. - */ - woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage)); - PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED); - - /* - * delete the tuple from the "read" page. - * PageIndexTupleDelete repacks the ItemId array, so 'roffnum' - * will be "advanced" to the "next" ItemId. + + /* + * find the last page in the bucket chain by starting at the base + * bucket page and working forward. + * + * XXX if chains tend to be long, we should probably move forward using + * HASH_READ and then _hash_chgbufaccess to HASH_WRITE when we reach + * the end. if they are short we probably don't care very much. if + * the hash function is working at all, they had better be short.. */ - PageIndexTupleDelete(rpage, roffnum); - _hash_wrtnorelbuf(rel, rbuf); - + ropaque = wopaque; + do + { + rblkno = ropaque->hasho_nextblkno; + if (ropaque != wopaque) + { + _hash_relbuf(rel, rbuf, HASH_WRITE); + } + rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); + rpage = BufferGetPage(rbuf); + _hash_checkpage(rpage, LH_OVERFLOW_PAGE); + Assert(!PageIsEmpty(rpage)); + ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); + Assert(ropaque->hasho_bucket == bucket); + } while (BlockNumberIsValid(ropaque->hasho_nextblkno)); + /* - * if the "read" page is now empty because of the deletion, - * free it. + * squeeze the tuples. */ - if (PageIsEmpty(rpage) && (ropaque->hasho_flag & LH_OVERFLOW_PAGE)) { - rblkno = ropaque->hasho_prevblkno; - Assert(BlockNumberIsValid(rblkno)); - - /* - * free this overflow page. the extra _hash_relbuf is - * because _hash_freeovflpage gratuitously returns the - * next page (we want the previous page and will get it - * ourselves later). - */ - rbuf = _hash_freeovflpage(rel, rbuf); - if (BufferIsValid(rbuf)) { - _hash_relbuf(rel, rbuf, HASH_WRITE); - } - - if (rblkno == wblkno) { - /* rbuf is already released */ - _hash_wrtbuf(rel, wbuf); - return; - } - - rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); - rpage = BufferGetPage(rbuf); - _hash_checkpage(rpage, LH_OVERFLOW_PAGE); - Assert(!PageIsEmpty(rpage)); - ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); - Assert(ropaque->hasho_bucket == bucket); - - roffnum = FirstOffsetNumber; + roffnum = FirstOffsetNumber; + for (;;) + { + hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum)); + itemsz = IndexTupleDSize(hitem->hash_itup) + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + itemsz = DOUBLEALIGN(itemsz); + + /* + * walk up the bucket chain, looking for a page big enough for + * this item. + */ + while (PageGetFreeSpace(wpage) < itemsz) + { + wblkno = wopaque->hasho_nextblkno; + + _hash_wrtbuf(rel, wbuf); + + if (!BlockNumberIsValid(wblkno) || (rblkno == wblkno)) + { + _hash_wrtbuf(rel, rbuf); + /* wbuf is already released */ + return; + } + + wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); + wpage = BufferGetPage(wbuf); + _hash_checkpage(wpage, LH_OVERFLOW_PAGE); + Assert(!PageIsEmpty(wpage)); + wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); + Assert(wopaque->hasho_bucket == bucket); + } + + /* + * if we're here, we have found room so insert on the "write" + * page. + */ + woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage)); + PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED); + + /* + * delete the tuple from the "read" page. PageIndexTupleDelete + * repacks the ItemId array, so 'roffnum' will be "advanced" to + * the "next" ItemId. + */ + PageIndexTupleDelete(rpage, roffnum); + _hash_wrtnorelbuf(rel, rbuf); + + /* + * if the "read" page is now empty because of the deletion, free + * it. + */ + if (PageIsEmpty(rpage) && (ropaque->hasho_flag & LH_OVERFLOW_PAGE)) + { + rblkno = ropaque->hasho_prevblkno; + Assert(BlockNumberIsValid(rblkno)); + + /* + * free this overflow page. the extra _hash_relbuf is because + * _hash_freeovflpage gratuitously returns the next page (we + * want the previous page and will get it ourselves later). + */ + rbuf = _hash_freeovflpage(rel, rbuf); + if (BufferIsValid(rbuf)) + { + _hash_relbuf(rel, rbuf, HASH_WRITE); + } + + if (rblkno == wblkno) + { + /* rbuf is already released */ + _hash_wrtbuf(rel, wbuf); + return; + } + + rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); + rpage = BufferGetPage(rbuf); + _hash_checkpage(rpage, LH_OVERFLOW_PAGE); + Assert(!PageIsEmpty(rpage)); + ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); + Assert(ropaque->hasho_bucket == bucket); + + roffnum = FirstOffsetNumber; + } } - } } diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 49c8f03f52..6c819b652d 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -1,30 +1,30 @@ /*------------------------------------------------------------------------- * * hashpage.c-- - * Hash table page management code for the Postgres hash access method + * Hash table page management code for the Postgres hash access method * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.9 1997/08/18 20:51:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.10 1997/09/07 04:38:00 momjian Exp $ * * NOTES - * Postgres hash pages look like ordinary relation pages. The opaque - * data at high addresses includes information about the page including - * whether a page is an overflow page or a true bucket, the block - * numbers of the preceding and following pages, and the overflow - * address of the page if it is an overflow page. + * Postgres hash pages look like ordinary relation pages. The opaque + * data at high addresses includes information about the page including + * whether a page is an overflow page or a true bucket, the block + * numbers of the preceding and following pages, and the overflow + * address of the page if it is an overflow page. * - * The first page in a hash relation, page zero, is special -- it stores - * information describing the hash table; it is referred to as teh - * "meta page." Pages one and higher store the actual data. + * The first page in a hash relation, page zero, is special -- it stores + * information describing the hash table; it is referred to as teh + * "meta page." Pages one and higher store the actual data. * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <storage/bufmgr.h> #include <miscadmin.h> @@ -33,411 +33,429 @@ #include <access/genam.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access); -static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access); -static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket); - -/* - * We use high-concurrency locking on hash indices. There are two cases in - * which we don't do locking. One is when we're building the index. - * Since the creating transaction has not committed, no one can see - * the index, and there's no reason to share locks. The second case - * is when we're just starting up the database system. We use some - * special-purpose initialization code in the relation cache manager - * (see utils/cache/relcache.c) to allow us to do indexed scans on - * the system catalogs before we'd normally be able to. This happens - * before the lock table is fully initialized, so we can't use it. - * Strictly speaking, this violates 2pl, but we don't do 2pl on the - * system catalogs anyway. +static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access); +static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access); +static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket); + +/* + * We use high-concurrency locking on hash indices. There are two cases in + * which we don't do locking. One is when we're building the index. + * Since the creating transaction has not committed, no one can see + * the index, and there's no reason to share locks. The second case + * is when we're just starting up the database system. We use some + * special-purpose initialization code in the relation cache manager + * (see utils/cache/relcache.c) to allow us to do indexed scans on + * the system catalogs before we'd normally be able to. This happens + * before the lock table is fully initialized, so we can't use it. + * Strictly speaking, this violates 2pl, but we don't do 2pl on the + * system catalogs anyway. */ -#define USELOCKING (!BuildingHash && !IsInitProcessingMode()) +#define USELOCKING (!BuildingHash && !IsInitProcessingMode()) /* - * _hash_metapinit() -- Initialize the metadata page of a hash index, - * the two buckets that we begin with and the initial - * bitmap page. + * _hash_metapinit() -- Initialize the metadata page of a hash index, + * the two buckets that we begin with and the initial + * bitmap page. */ void _hash_metapinit(Relation rel) { - HashMetaPage metap; - HashPageOpaque pageopaque; - Buffer metabuf; - Buffer buf; - Page pg; - int nbuckets; - uint32 nelem; /* number elements */ - uint32 lg2nelem; /* _hash_log2(nelem) */ - uint32 nblocks; - uint16 i; - - /* can't be sharing this with anyone, now... */ - if (USELOCKING) - RelationSetLockForWrite(rel); - - if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) { - elog(WARN, "Cannot initialize non-empty hash table %s", - RelationGetRelationName(rel)); - } - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); - pg = BufferGetPage(metabuf); - metap = (HashMetaPage) pg; - _hash_pageinit(pg, BufferGetPageSize(metabuf)); - - metap->hashm_magic = HASH_MAGIC; - metap->hashm_version = HASH_VERSION; - metap->hashm_nkeys = 0; - metap->hashm_nmaps = 0; - metap->hashm_ffactor = DEFAULT_FFACTOR; - metap->hashm_bsize = BufferGetPageSize(metabuf); - metap->hashm_bshift = _hash_log2(metap->hashm_bsize); - for (i = metap->hashm_bshift; i > 0; --i) { - if ((1 << i) < (metap->hashm_bsize - - (DOUBLEALIGN(sizeof(PageHeaderData)) + - DOUBLEALIGN(sizeof(HashPageOpaqueData))))) { - break; + HashMetaPage metap; + HashPageOpaque pageopaque; + Buffer metabuf; + Buffer buf; + Page pg; + int nbuckets; + uint32 nelem; /* number elements */ + uint32 lg2nelem; /* _hash_log2(nelem) */ + uint32 nblocks; + uint16 i; + + /* can't be sharing this with anyone, now... */ + if (USELOCKING) + RelationSetLockForWrite(rel); + + if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) + { + elog(WARN, "Cannot initialize non-empty hash table %s", + RelationGetRelationName(rel)); + } + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); + pg = BufferGetPage(metabuf); + metap = (HashMetaPage) pg; + _hash_pageinit(pg, BufferGetPageSize(metabuf)); + + metap->hashm_magic = HASH_MAGIC; + metap->hashm_version = HASH_VERSION; + metap->hashm_nkeys = 0; + metap->hashm_nmaps = 0; + metap->hashm_ffactor = DEFAULT_FFACTOR; + metap->hashm_bsize = BufferGetPageSize(metabuf); + metap->hashm_bshift = _hash_log2(metap->hashm_bsize); + for (i = metap->hashm_bshift; i > 0; --i) + { + if ((1 << i) < (metap->hashm_bsize - + (DOUBLEALIGN(sizeof(PageHeaderData)) + + DOUBLEALIGN(sizeof(HashPageOpaqueData))))) + { + break; + } } - } - Assert(i); - metap->hashm_bmsize = 1 << i; - metap->hashm_procid = index_getprocid(rel, 1, HASHPROC); - - /* - * Make nelem = 2 rather than 0 so that we end up allocating space - * for the next greater power of two number of buckets. - */ - nelem = 2; - lg2nelem = 1; /*_hash_log2(MAX(nelem, 2)) */ - nbuckets = 2; /*1 << lg2nelem */ - - memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares)); - memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp)); - - metap->hashm_spares[lg2nelem] = 2; /* lg2nelem + 1 */ - metap->hashm_spares[lg2nelem + 1] = 2; /* lg2nelem + 1 */ - metap->hashm_ovflpoint = 1; /* lg2nelem */ - metap->hashm_lastfreed = 2; - - metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */ - metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */ - - pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); - pageopaque->hasho_oaddr = InvalidOvflAddress; - pageopaque->hasho_prevblkno = InvalidBlockNumber; - pageopaque->hasho_nextblkno = InvalidBlockNumber; - pageopaque->hasho_flag = LH_META_PAGE; - pageopaque->hasho_bucket = -1; - - /* - * First bitmap page is at: splitpoint lg2nelem page offset 1 which - * turns out to be page 3. Couldn't initialize page 3 until we created - * the first two buckets above. - */ - if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0)) - elog(WARN, "Problem with _hash_initbitmap."); - - /* all done */ - _hash_wrtnorelbuf(rel, metabuf); - - /* - * initialize the first two buckets - */ - for (i = 0; i <= 1; i++) { - buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE); - pg = BufferGetPage(buf); - _hash_pageinit(pg, BufferGetPageSize(buf)); + Assert(i); + metap->hashm_bmsize = 1 << i; + metap->hashm_procid = index_getprocid(rel, 1, HASHPROC); + + /* + * Make nelem = 2 rather than 0 so that we end up allocating space for + * the next greater power of two number of buckets. + */ + nelem = 2; + lg2nelem = 1; /* _hash_log2(MAX(nelem, 2)) */ + nbuckets = 2; /* 1 << lg2nelem */ + + memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares)); + memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp)); + + metap->hashm_spares[lg2nelem] = 2; /* lg2nelem + 1 */ + metap->hashm_spares[lg2nelem + 1] = 2; /* lg2nelem + 1 */ + metap->hashm_ovflpoint = 1; /* lg2nelem */ + metap->hashm_lastfreed = 2; + + metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */ + metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */ + pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); pageopaque->hasho_oaddr = InvalidOvflAddress; pageopaque->hasho_prevblkno = InvalidBlockNumber; pageopaque->hasho_nextblkno = InvalidBlockNumber; - pageopaque->hasho_flag = LH_BUCKET_PAGE; - pageopaque->hasho_bucket = i; - _hash_wrtbuf(rel, buf); - } - - _hash_relbuf(rel, metabuf, HASH_WRITE); - - if (USELOCKING) - RelationUnsetLockForWrite(rel); + pageopaque->hasho_flag = LH_META_PAGE; + pageopaque->hasho_bucket = -1; + + /* + * First bitmap page is at: splitpoint lg2nelem page offset 1 which + * turns out to be page 3. Couldn't initialize page 3 until we + * created the first two buckets above. + */ + if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0)) + elog(WARN, "Problem with _hash_initbitmap."); + + /* all done */ + _hash_wrtnorelbuf(rel, metabuf); + + /* + * initialize the first two buckets + */ + for (i = 0; i <= 1; i++) + { + buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE); + pg = BufferGetPage(buf); + _hash_pageinit(pg, BufferGetPageSize(buf)); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); + pageopaque->hasho_oaddr = InvalidOvflAddress; + pageopaque->hasho_prevblkno = InvalidBlockNumber; + pageopaque->hasho_nextblkno = InvalidBlockNumber; + pageopaque->hasho_flag = LH_BUCKET_PAGE; + pageopaque->hasho_bucket = i; + _hash_wrtbuf(rel, buf); + } + + _hash_relbuf(rel, metabuf, HASH_WRITE); + + if (USELOCKING) + RelationUnsetLockForWrite(rel); } /* - * _hash_getbuf() -- Get a buffer by block number for read or write. + * _hash_getbuf() -- Get a buffer by block number for read or write. * - * When this routine returns, the appropriate lock is set on the - * requested buffer its reference count is correct. + * When this routine returns, the appropriate lock is set on the + * requested buffer its reference count is correct. * - * XXX P_NEW is not used because, unlike the tree structures, we - * need the bucket blocks to be at certain block numbers. we must - * depend on the caller to call _hash_pageinit on the block if it - * knows that this is a new block. + * XXX P_NEW is not used because, unlike the tree structures, we + * need the bucket blocks to be at certain block numbers. we must + * depend on the caller to call _hash_pageinit on the block if it + * knows that this is a new block. */ Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access) { - Buffer buf; - - if (blkno == P_NEW) { - elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW"); - } - switch (access) { - case HASH_WRITE: - case HASH_READ: - _hash_setpagelock(rel, blkno, access); - break; - default: - elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %s", - access, RelationGetRelationName(rel)); - break; - } - buf = ReadBuffer(rel, blkno); - - /* ref count and lock type are correct */ - return (buf); + Buffer buf; + + if (blkno == P_NEW) + { + elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW"); + } + switch (access) + { + case HASH_WRITE: + case HASH_READ: + _hash_setpagelock(rel, blkno, access); + break; + default: + elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %s", + access, RelationGetRelationName(rel)); + break; + } + buf = ReadBuffer(rel, blkno); + + /* ref count and lock type are correct */ + return (buf); } /* - * _hash_relbuf() -- release a locked buffer. + * _hash_relbuf() -- release a locked buffer. */ void _hash_relbuf(Relation rel, Buffer buf, int access) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - - switch (access) { - case HASH_WRITE: - case HASH_READ: - _hash_unsetpagelock(rel, blkno, access); - break; - default: - elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %s", - access, blkno, RelationGetRelationName(rel)); - } - - ReleaseBuffer(buf); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + + switch (access) + { + case HASH_WRITE: + case HASH_READ: + _hash_unsetpagelock(rel, blkno, access); + break; + default: + elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %s", + access, blkno, RelationGetRelationName(rel)); + } + + ReleaseBuffer(buf); } /* - * _hash_wrtbuf() -- write a hash page to disk. + * _hash_wrtbuf() -- write a hash page to disk. * - * This routine releases the lock held on the buffer and our reference - * to it. It is an error to call _hash_wrtbuf() without a write lock - * or a reference to the buffer. + * This routine releases the lock held on the buffer and our reference + * to it. It is an error to call _hash_wrtbuf() without a write lock + * or a reference to the buffer. */ void _hash_wrtbuf(Relation rel, Buffer buf) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - WriteBuffer(buf); - _hash_unsetpagelock(rel, blkno, HASH_WRITE); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + WriteBuffer(buf); + _hash_unsetpagelock(rel, blkno, HASH_WRITE); } /* - * _hash_wrtnorelbuf() -- write a hash page to disk, but do not release - * our reference or lock. + * _hash_wrtnorelbuf() -- write a hash page to disk, but do not release + * our reference or lock. * - * It is an error to call _hash_wrtnorelbuf() without a write lock - * or a reference to the buffer. + * It is an error to call _hash_wrtnorelbuf() without a write lock + * or a reference to the buffer. */ void _hash_wrtnorelbuf(Relation rel, Buffer buf) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - WriteNoReleaseBuffer(buf); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + WriteNoReleaseBuffer(buf); } Page _hash_chgbufaccess(Relation rel, - Buffer *bufp, - int from_access, - int to_access) + Buffer * bufp, + int from_access, + int to_access) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(*bufp); - - switch (from_access) { - case HASH_WRITE: - _hash_wrtbuf(rel, *bufp); - break; - case HASH_READ: - _hash_relbuf(rel, *bufp, from_access); - break; - default: - elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %s", - from_access, blkno, RelationGetRelationName(rel)); - break; - } - *bufp = _hash_getbuf(rel, blkno, to_access); - return (BufferGetPage(*bufp)); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(*bufp); + + switch (from_access) + { + case HASH_WRITE: + _hash_wrtbuf(rel, *bufp); + break; + case HASH_READ: + _hash_relbuf(rel, *bufp, from_access); + break; + default: + elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %s", + from_access, blkno, RelationGetRelationName(rel)); + break; + } + *bufp = _hash_getbuf(rel, blkno, to_access); + return (BufferGetPage(*bufp)); } /* - * _hash_pageinit() -- Initialize a new page. + * _hash_pageinit() -- Initialize a new page. */ void _hash_pageinit(Page page, Size size) { - Assert(((PageHeader) page)->pd_lower == 0); - Assert(((PageHeader) page)->pd_upper == 0); - Assert(((PageHeader) page)->pd_special == 0); - - /* - * Cargo-cult programming -- don't really need this to be zero, but - * creating new pages is an infrequent occurrence and it makes me feel - * good when I know they're empty. - */ - memset(page, 0, size); - - PageInit(page, size, sizeof(HashPageOpaqueData)); + Assert(((PageHeader) page)->pd_lower == 0); + Assert(((PageHeader) page)->pd_upper == 0); + Assert(((PageHeader) page)->pd_special == 0); + + /* + * Cargo-cult programming -- don't really need this to be zero, but + * creating new pages is an infrequent occurrence and it makes me feel + * good when I know they're empty. + */ + memset(page, 0, size); + + PageInit(page, size, sizeof(HashPageOpaqueData)); } static void _hash_setpagelock(Relation rel, - BlockNumber blkno, - int access) + BlockNumber blkno, + int access) { - ItemPointerData iptr; - - if (USELOCKING) { - ItemPointerSet(&iptr, blkno, 1); - - switch (access) { - case HASH_WRITE: - RelationSetSingleWLockPage(rel, &iptr); - break; - case HASH_READ: - RelationSetSingleRLockPage(rel, &iptr); - break; - default: - elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %s", - access, blkno, RelationGetRelationName(rel)); - break; + ItemPointerData iptr; + + if (USELOCKING) + { + ItemPointerSet(&iptr, blkno, 1); + + switch (access) + { + case HASH_WRITE: + RelationSetSingleWLockPage(rel, &iptr); + break; + case HASH_READ: + RelationSetSingleRLockPage(rel, &iptr); + break; + default: + elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %s", + access, blkno, RelationGetRelationName(rel)); + break; + } } - } } static void _hash_unsetpagelock(Relation rel, - BlockNumber blkno, - int access) + BlockNumber blkno, + int access) { - ItemPointerData iptr; - - if (USELOCKING) { - ItemPointerSet(&iptr, blkno, 1); - - switch (access) { - case HASH_WRITE: - RelationUnsetSingleWLockPage(rel, &iptr); - break; - case HASH_READ: - RelationUnsetSingleRLockPage(rel, &iptr); - break; - default: - elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %s", - access, blkno, RelationGetRelationName(rel)); - break; + ItemPointerData iptr; + + if (USELOCKING) + { + ItemPointerSet(&iptr, blkno, 1); + + switch (access) + { + case HASH_WRITE: + RelationUnsetSingleWLockPage(rel, &iptr); + break; + case HASH_READ: + RelationUnsetSingleRLockPage(rel, &iptr); + break; + default: + elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %s", + access, blkno, RelationGetRelationName(rel)); + break; + } } - } } void _hash_pagedel(Relation rel, ItemPointer tid) { - Buffer buf; - Buffer metabuf; - Page page; - BlockNumber blkno; - OffsetNumber offno; - HashMetaPage metap; - HashPageOpaque opaque; - - blkno = ItemPointerGetBlockNumber(tid); - offno = ItemPointerGetOffsetNumber(tid); - - buf = _hash_getbuf(rel, blkno, HASH_WRITE); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - - PageIndexTupleDelete(page, offno); - _hash_wrtnorelbuf(rel, buf); - - if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) { - buf = _hash_freeovflpage(rel, buf); - if (BufferIsValid(buf)) { - _hash_relbuf(rel, buf, HASH_WRITE); + Buffer buf; + Buffer metabuf; + Page page; + BlockNumber blkno; + OffsetNumber offno; + HashMetaPage metap; + HashPageOpaque opaque; + + blkno = ItemPointerGetBlockNumber(tid); + offno = ItemPointerGetOffsetNumber(tid); + + buf = _hash_getbuf(rel, blkno, HASH_WRITE); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + + PageIndexTupleDelete(page, offno); + _hash_wrtnorelbuf(rel, buf); + + if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) + { + buf = _hash_freeovflpage(rel, buf); + if (BufferIsValid(buf)) + { + _hash_relbuf(rel, buf, HASH_WRITE); + } } - } else { - _hash_relbuf(rel, buf, HASH_WRITE); - } - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - ++metap->hashm_nkeys; - _hash_wrtbuf(rel, metabuf); + else + { + _hash_relbuf(rel, buf, HASH_WRITE); + } + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + ++metap->hashm_nkeys; + _hash_wrtbuf(rel, metabuf); } void _hash_expandtable(Relation rel, Buffer metabuf) { - HashMetaPage metap; - Bucket old_bucket; - Bucket new_bucket; - uint32 spare_ndx; - -/* elog(DEBUG, "_hash_expandtable: expanding..."); */ - - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); - new_bucket = ++metap->MAX_BUCKET; - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); - old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK); - - /* - * If the split point is increasing (MAX_BUCKET's log base 2 - * * increases), we need to copy the current contents of the spare - * split bucket to the next bucket. - */ - spare_ndx = _hash_log2(metap->MAX_BUCKET + 1); - if (spare_ndx > metap->OVFL_POINT) { - - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); - metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT]; - metap->OVFL_POINT = spare_ndx; - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); - } - - if (new_bucket > metap->HIGH_MASK) { - - /* Starting a new doubling */ - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); - metap->LOW_MASK = metap->HIGH_MASK; - metap->HIGH_MASK = new_bucket | metap->LOW_MASK; - metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); - - } - /* Relocate records to the new bucket */ - _hash_splitpage(rel, metabuf, old_bucket, new_bucket); + HashMetaPage metap; + Bucket old_bucket; + Bucket new_bucket; + uint32 spare_ndx; + +/* elog(DEBUG, "_hash_expandtable: expanding..."); */ + + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); + new_bucket = ++metap->MAX_BUCKET; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); + old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK); + + /* + * If the split point is increasing (MAX_BUCKET's log base 2 * + * increases), we need to copy the current contents of the spare split + * bucket to the next bucket. + */ + spare_ndx = _hash_log2(metap->MAX_BUCKET + 1); + if (spare_ndx > metap->OVFL_POINT) + { + + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); + metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT]; + metap->OVFL_POINT = spare_ndx; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); + } + + if (new_bucket > metap->HIGH_MASK) + { + + /* Starting a new doubling */ + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); + metap->LOW_MASK = metap->HIGH_MASK; + metap->HIGH_MASK = new_bucket | metap->LOW_MASK; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); + + } + /* Relocate records to the new bucket */ + _hash_splitpage(rel, metabuf, old_bucket, new_bucket); } @@ -450,224 +468,243 @@ _hash_expandtable(Relation rel, Buffer metabuf) */ static void _hash_splitpage(Relation rel, - Buffer metabuf, - Bucket obucket, - Bucket nbucket) + Buffer metabuf, + Bucket obucket, + Bucket nbucket) { - Bucket bucket; - Buffer obuf; - Buffer nbuf; - Buffer ovflbuf; - BlockNumber oblkno; - BlockNumber nblkno; - bool null; - Datum datum; - HashItem hitem; - HashPageOpaque oopaque; - HashPageOpaque nopaque; - HashMetaPage metap; - IndexTuple itup; - int itemsz; - OffsetNumber ooffnum; - OffsetNumber noffnum; - OffsetNumber omaxoffnum; - Page opage; - Page npage; - TupleDesc itupdesc; - -/* elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d", - obucket, obucket, nbucket); + Bucket bucket; + Buffer obuf; + Buffer nbuf; + Buffer ovflbuf; + BlockNumber oblkno; + BlockNumber nblkno; + bool null; + Datum datum; + HashItem hitem; + HashPageOpaque oopaque; + HashPageOpaque nopaque; + HashMetaPage metap; + IndexTuple itup; + int itemsz; + OffsetNumber ooffnum; + OffsetNumber noffnum; + OffsetNumber omaxoffnum; + Page opage; + Page npage; + TupleDesc itupdesc; + +/* elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d", + obucket, obucket, nbucket); */ - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - /* get the buffers & pages */ - oblkno = BUCKET_TO_BLKNO(obucket); - nblkno = BUCKET_TO_BLKNO(nbucket); - obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); - nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE); - opage = BufferGetPage(obuf); - npage = BufferGetPage(nbuf); - - /* initialize the new bucket */ - _hash_pageinit(npage, BufferGetPageSize(nbuf)); - nopaque = (HashPageOpaque) PageGetSpecialPointer(npage); - nopaque->hasho_prevblkno = InvalidBlockNumber; - nopaque->hasho_nextblkno = InvalidBlockNumber; - nopaque->hasho_flag = LH_BUCKET_PAGE; - nopaque->hasho_oaddr = InvalidOvflAddress; - nopaque->hasho_bucket = nbucket; - _hash_wrtnorelbuf(rel, nbuf); - - /* - * make sure the old bucket isn't empty. advance 'opage' and - * friends through the overflow bucket chain until we find a - * non-empty page. - * - * XXX we should only need this once, if we are careful to - * preserve the invariant that overflow pages are never empty. - */ - _hash_checkpage(opage, LH_BUCKET_PAGE); - oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); - if (PageIsEmpty(opage)) { - oblkno = oopaque->hasho_nextblkno; - _hash_relbuf(rel, obuf, HASH_WRITE); - if (!BlockNumberIsValid(oblkno)) { - /* - * the old bucket is completely empty; of course, the new - * bucket will be as well, but since it's a base bucket - * page we don't care. - */ - _hash_relbuf(rel, nbuf, HASH_WRITE); - return; - } + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* get the buffers & pages */ + oblkno = BUCKET_TO_BLKNO(obucket); + nblkno = BUCKET_TO_BLKNO(nbucket); obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); + nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE); opage = BufferGetPage(obuf); - _hash_checkpage(opage, LH_OVERFLOW_PAGE); - if (PageIsEmpty(opage)) { - elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno); - } - oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); - } - - /* - * we are now guaranteed that 'opage' is not empty. partition the - * tuples in the old bucket between the old bucket and the new - * bucket, advancing along their respective overflow bucket chains - * and adding overflow pages as needed. - */ - ooffnum = FirstOffsetNumber; - omaxoffnum = PageGetMaxOffsetNumber(opage); - for (;;) { + npage = BufferGetPage(nbuf); + + /* initialize the new bucket */ + _hash_pageinit(npage, BufferGetPageSize(nbuf)); + nopaque = (HashPageOpaque) PageGetSpecialPointer(npage); + nopaque->hasho_prevblkno = InvalidBlockNumber; + nopaque->hasho_nextblkno = InvalidBlockNumber; + nopaque->hasho_flag = LH_BUCKET_PAGE; + nopaque->hasho_oaddr = InvalidOvflAddress; + nopaque->hasho_bucket = nbucket; + _hash_wrtnorelbuf(rel, nbuf); + /* - * at each iteration through this loop, each of these variables - * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum + * make sure the old bucket isn't empty. advance 'opage' and friends + * through the overflow bucket chain until we find a non-empty page. + * + * XXX we should only need this once, if we are careful to preserve the + * invariant that overflow pages are never empty. */ + _hash_checkpage(opage, LH_BUCKET_PAGE); + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + if (PageIsEmpty(opage)) + { + oblkno = oopaque->hasho_nextblkno; + _hash_relbuf(rel, obuf, HASH_WRITE); + if (!BlockNumberIsValid(oblkno)) + { - /* check if we're at the end of the page */ - if (ooffnum > omaxoffnum) { - /* at end of page, but check for overflow page */ - oblkno = oopaque->hasho_nextblkno; - if (BlockNumberIsValid(oblkno)) { - /* - * we ran out of tuples on this particular page, but - * we have more overflow pages; re-init values. - */ - _hash_wrtbuf(rel, obuf); + /* + * the old bucket is completely empty; of course, the new + * bucket will be as well, but since it's a base bucket page + * we don't care. + */ + _hash_relbuf(rel, nbuf, HASH_WRITE); + return; + } obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); opage = BufferGetPage(obuf); _hash_checkpage(opage, LH_OVERFLOW_PAGE); - oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); - - /* we're guaranteed that an ovfl page has at least 1 tuple */ - if (PageIsEmpty(opage)) { - elog(WARN, "_hash_splitpage: empty ovfl page %d!", - oblkno); + if (PageIsEmpty(opage)) + { + elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno); } - ooffnum = FirstOffsetNumber; - omaxoffnum = PageGetMaxOffsetNumber(opage); - } else { + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + } + + /* + * we are now guaranteed that 'opage' is not empty. partition the + * tuples in the old bucket between the old bucket and the new bucket, + * advancing along their respective overflow bucket chains and adding + * overflow pages as needed. + */ + ooffnum = FirstOffsetNumber; + omaxoffnum = PageGetMaxOffsetNumber(opage); + for (;;) + { + /* - * we're at the end of the bucket chain, so now we're - * really done with everything. before quitting, call - * _hash_squeezebucket to ensure the tuples in the - * bucket (including the overflow pages) are packed as - * tightly as possible. + * at each iteration through this loop, each of these variables + * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum */ - _hash_wrtbuf(rel, obuf); - _hash_wrtbuf(rel, nbuf); - _hash_squeezebucket(rel, metap, obucket); - return; - } - } - - /* hash on the tuple */ - hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum)); - itup = &(hitem->hash_itup); - itupdesc = RelationGetTupleDescriptor(rel); - datum = index_getattr(itup, 1, itupdesc, &null); - bucket = _hash_call(rel, metap, datum); - - if (bucket == nbucket) { - /* - * insert the tuple into the new bucket. if it doesn't - * fit on the current page in the new bucket, we must - * allocate a new overflow page and place the tuple on - * that page instead. - */ - itemsz = IndexTupleDSize(hitem->hash_itup) - + (sizeof(HashItemData) - sizeof(IndexTupleData)); - - itemsz = DOUBLEALIGN(itemsz); - - if (PageGetFreeSpace(npage) < itemsz) { - ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf); - _hash_wrtbuf(rel, nbuf); - nbuf = ovflbuf; - npage = BufferGetPage(nbuf); - _hash_checkpage(npage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - } - - noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage)); - PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED); - _hash_wrtnorelbuf(rel, nbuf); - - /* - * now delete the tuple from the old bucket. after this - * section of code, 'ooffnum' will actually point to the - * ItemId to which we would point if we had advanced it - * before the deletion (PageIndexTupleDelete repacks the - * ItemId array). this also means that 'omaxoffnum' is - * exactly one less than it used to be, so we really can - * just decrement it instead of calling - * PageGetMaxOffsetNumber. - */ - PageIndexTupleDelete(opage, ooffnum); - _hash_wrtnorelbuf(rel, obuf); - omaxoffnum = OffsetNumberPrev(omaxoffnum); - - /* - * tidy up. if the old page was an overflow page and it - * is now empty, we must free it (we want to preserve the - * invariant that overflow pages cannot be empty). - */ - if (PageIsEmpty(opage) && - (oopaque->hasho_flag & LH_OVERFLOW_PAGE)) { - obuf = _hash_freeovflpage(rel, obuf); - - /* check that we're not through the bucket chain */ - if (BufferIsInvalid(obuf)) { - _hash_wrtbuf(rel, nbuf); - _hash_squeezebucket(rel, metap, obucket); - return; + + /* check if we're at the end of the page */ + if (ooffnum > omaxoffnum) + { + /* at end of page, but check for overflow page */ + oblkno = oopaque->hasho_nextblkno; + if (BlockNumberIsValid(oblkno)) + { + + /* + * we ran out of tuples on this particular page, but we + * have more overflow pages; re-init values. + */ + _hash_wrtbuf(rel, obuf); + obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); + opage = BufferGetPage(obuf); + _hash_checkpage(opage, LH_OVERFLOW_PAGE); + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + + /* we're guaranteed that an ovfl page has at least 1 tuple */ + if (PageIsEmpty(opage)) + { + elog(WARN, "_hash_splitpage: empty ovfl page %d!", + oblkno); + } + ooffnum = FirstOffsetNumber; + omaxoffnum = PageGetMaxOffsetNumber(opage); + } + else + { + + /* + * we're at the end of the bucket chain, so now we're + * really done with everything. before quitting, call + * _hash_squeezebucket to ensure the tuples in the bucket + * (including the overflow pages) are packed as tightly as + * possible. + */ + _hash_wrtbuf(rel, obuf); + _hash_wrtbuf(rel, nbuf); + _hash_squeezebucket(rel, metap, obucket); + return; + } } - - /* - * re-init. again, we're guaranteed that an ovfl page - * has at least one tuple. - */ - opage = BufferGetPage(obuf); - _hash_checkpage(opage, LH_OVERFLOW_PAGE); - oblkno = BufferGetBlockNumber(obuf); - oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); - if (PageIsEmpty(opage)) { - elog(WARN, "_hash_splitpage: empty overflow page %d", - oblkno); + + /* hash on the tuple */ + hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum)); + itup = &(hitem->hash_itup); + itupdesc = RelationGetTupleDescriptor(rel); + datum = index_getattr(itup, 1, itupdesc, &null); + bucket = _hash_call(rel, metap, datum); + + if (bucket == nbucket) + { + + /* + * insert the tuple into the new bucket. if it doesn't fit on + * the current page in the new bucket, we must allocate a new + * overflow page and place the tuple on that page instead. + */ + itemsz = IndexTupleDSize(hitem->hash_itup) + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + + itemsz = DOUBLEALIGN(itemsz); + + if (PageGetFreeSpace(npage) < itemsz) + { + ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf); + _hash_wrtbuf(rel, nbuf); + nbuf = ovflbuf; + npage = BufferGetPage(nbuf); + _hash_checkpage(npage, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + } + + noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage)); + PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED); + _hash_wrtnorelbuf(rel, nbuf); + + /* + * now delete the tuple from the old bucket. after this + * section of code, 'ooffnum' will actually point to the + * ItemId to which we would point if we had advanced it before + * the deletion (PageIndexTupleDelete repacks the ItemId + * array). this also means that 'omaxoffnum' is exactly one + * less than it used to be, so we really can just decrement it + * instead of calling PageGetMaxOffsetNumber. + */ + PageIndexTupleDelete(opage, ooffnum); + _hash_wrtnorelbuf(rel, obuf); + omaxoffnum = OffsetNumberPrev(omaxoffnum); + + /* + * tidy up. if the old page was an overflow page and it is + * now empty, we must free it (we want to preserve the + * invariant that overflow pages cannot be empty). + */ + if (PageIsEmpty(opage) && + (oopaque->hasho_flag & LH_OVERFLOW_PAGE)) + { + obuf = _hash_freeovflpage(rel, obuf); + + /* check that we're not through the bucket chain */ + if (BufferIsInvalid(obuf)) + { + _hash_wrtbuf(rel, nbuf); + _hash_squeezebucket(rel, metap, obucket); + return; + } + + /* + * re-init. again, we're guaranteed that an ovfl page has + * at least one tuple. + */ + opage = BufferGetPage(obuf); + _hash_checkpage(opage, LH_OVERFLOW_PAGE); + oblkno = BufferGetBlockNumber(obuf); + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + if (PageIsEmpty(opage)) + { + elog(WARN, "_hash_splitpage: empty overflow page %d", + oblkno); + } + ooffnum = FirstOffsetNumber; + omaxoffnum = PageGetMaxOffsetNumber(opage); + } + } + else + { + + /* + * the tuple stays on this page. we didn't move anything, so + * we didn't delete anything and therefore we don't have to + * change 'omaxoffnum'. + * + * XXX any hash value from [0, nbucket-1] will map to this + * bucket, which doesn't make sense to me. + */ + ooffnum = OffsetNumberNext(ooffnum); } - ooffnum = FirstOffsetNumber; - omaxoffnum = PageGetMaxOffsetNumber(opage); - } - } else { - /* - * the tuple stays on this page. we didn't move anything, - * so we didn't delete anything and therefore we don't - * have to change 'omaxoffnum'. - * - * XXX any hash value from [0, nbucket-1] will map to this - * bucket, which doesn't make sense to me. - */ - ooffnum = OffsetNumberNext(ooffnum); } - } - /*NOTREACHED*/ + /* NOTREACHED */ } diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c index bd776d68c0..79fa33f747 100644 --- a/src/backend/access/hash/hashscan.c +++ b/src/backend/access/hash/hashscan.c @@ -1,160 +1,167 @@ /*------------------------------------------------------------------------- * * hashscan.c-- - * manage scans on hash tables + * manage scans on hash tables * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.8 1996/11/15 18:36:31 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.9 1997/09/07 04:38:01 momjian Exp $ * * NOTES - * Because we can be doing an index scan on a relation while we - * update it, we need to avoid missing data that moves around in - * the index. The routines and global variables in this file - * guarantee that all scans in the local address space stay - * correctly positioned. This is all we need to worry about, since - * write locking guarantees that no one else will be on the same - * page at the same time as we are. + * Because we can be doing an index scan on a relation while we + * update it, we need to avoid missing data that moves around in + * the index. The routines and global variables in this file + * guarantee that all scans in the local address space stay + * correctly positioned. This is all we need to worry about, since + * write locking guarantees that no one else will be on the same + * page at the same time as we are. * - * The scheme is to manage a list of active scans in the current - * backend. Whenever we add or remove records from an index, we - * check the list of active scans to see if any has been affected. - * A scan is affected only if it is on the same relation, and the - * same page, as the update. + * The scheme is to manage a list of active scans in the current + * backend. Whenever we add or remove records from an index, we + * check the list of active scans to see if any has been affected. + * A scan is affected only if it is on the same relation, and the + * same page, as the update. * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> -static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); -static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); +static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); +static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); -typedef struct HashScanListData { - IndexScanDesc hashsl_scan; - struct HashScanListData *hashsl_next; -} HashScanListData; +typedef struct HashScanListData +{ + IndexScanDesc hashsl_scan; + struct HashScanListData *hashsl_next; +} HashScanListData; -typedef HashScanListData *HashScanList; +typedef HashScanListData *HashScanList; -static HashScanList HashScans = (HashScanList) NULL; +static HashScanList HashScans = (HashScanList) NULL; /* - * _Hash_regscan() -- register a new scan. + * _Hash_regscan() -- register a new scan. */ void _hash_regscan(IndexScanDesc scan) { - HashScanList new_el; - - new_el = (HashScanList) palloc(sizeof(HashScanListData)); - new_el->hashsl_scan = scan; - new_el->hashsl_next = HashScans; - HashScans = new_el; + HashScanList new_el; + + new_el = (HashScanList) palloc(sizeof(HashScanListData)); + new_el->hashsl_scan = scan; + new_el->hashsl_next = HashScans; + HashScans = new_el; } /* - * _hash_dropscan() -- drop a scan from the scan list + * _hash_dropscan() -- drop a scan from the scan list */ void _hash_dropscan(IndexScanDesc scan) { - HashScanList chk, last; - - last = (HashScanList) NULL; - for (chk = HashScans; - chk != (HashScanList) NULL && chk->hashsl_scan != scan; - chk = chk->hashsl_next) { - last = chk; - } - - if (chk == (HashScanList) NULL) - elog(WARN, "hash scan list trashed; can't find 0x%lx", scan); - - if (last == (HashScanList) NULL) - HashScans = chk->hashsl_next; - else - last->hashsl_next = chk->hashsl_next; - - pfree (chk); + HashScanList chk, + last; + + last = (HashScanList) NULL; + for (chk = HashScans; + chk != (HashScanList) NULL && chk->hashsl_scan != scan; + chk = chk->hashsl_next) + { + last = chk; + } + + if (chk == (HashScanList) NULL) + elog(WARN, "hash scan list trashed; can't find 0x%lx", scan); + + if (last == (HashScanList) NULL) + HashScans = chk->hashsl_next; + else + last->hashsl_next = chk->hashsl_next; + + pfree(chk); } void _hash_adjscans(Relation rel, ItemPointer tid) { - HashScanList l; - Oid relid; - - relid = rel->rd_id; - for (l = HashScans; l != (HashScanList) NULL; l = l->hashsl_next) { - if (relid == l->hashsl_scan->relation->rd_id) - _hash_scandel(l->hashsl_scan, ItemPointerGetBlockNumber(tid), - ItemPointerGetOffsetNumber(tid)); - } + HashScanList l; + Oid relid; + + relid = rel->rd_id; + for (l = HashScans; l != (HashScanList) NULL; l = l->hashsl_next) + { + if (relid == l->hashsl_scan->relation->rd_id) + _hash_scandel(l->hashsl_scan, ItemPointerGetBlockNumber(tid), + ItemPointerGetOffsetNumber(tid)); + } } static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno) { - ItemPointer current; - Buffer buf; - Buffer metabuf; - HashScanOpaque so; - - if (!_hash_scantouched(scan, blkno, offno)) - return; - - metabuf = _hash_getbuf(scan->relation, HASH_METAPAGE, HASH_READ); - - so = (HashScanOpaque) scan->opaque; - buf = so->hashso_curbuf; - - current = &(scan->currentItemData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) { - _hash_step(scan, &buf, BackwardScanDirection, metabuf); - so->hashso_curbuf = buf; - } - - current = &(scan->currentMarkData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) { - ItemPointerData tmp; - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; - _hash_step(scan, &buf, BackwardScanDirection, metabuf); - so->hashso_mrkbuf = buf; - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; - } + ItemPointer current; + Buffer buf; + Buffer metabuf; + HashScanOpaque so; + + if (!_hash_scantouched(scan, blkno, offno)) + return; + + metabuf = _hash_getbuf(scan->relation, HASH_METAPAGE, HASH_READ); + + so = (HashScanOpaque) scan->opaque; + buf = so->hashso_curbuf; + + current = &(scan->currentItemData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + { + _hash_step(scan, &buf, BackwardScanDirection, metabuf); + so->hashso_curbuf = buf; + } + + current = &(scan->currentMarkData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + { + ItemPointerData tmp; + + tmp = *current; + *current = scan->currentItemData; + scan->currentItemData = tmp; + _hash_step(scan, &buf, BackwardScanDirection, metabuf); + so->hashso_mrkbuf = buf; + tmp = *current; + *current = scan->currentItemData; + scan->currentItemData = tmp; + } } -static bool +static bool _hash_scantouched(IndexScanDesc scan, - BlockNumber blkno, - OffsetNumber offno) + BlockNumber blkno, + OffsetNumber offno) { - ItemPointer current; - - current = &(scan->currentItemData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - return (true); - - current = &(scan->currentMarkData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - return (true); - - return (false); + ItemPointer current; + + current = &(scan->currentItemData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + return (true); + + current = &(scan->currentMarkData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + return (true); + + return (false); } diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index bc67b7f5aa..0a42ad0506 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -1,423 +1,467 @@ /*------------------------------------------------------------------------- * * hashsearch.c-- - * search code for postgres hash tables + * search code for postgres hash tables * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.10 1997/06/28 05:45:40 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.11 1997/09/07 04:38:02 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <storage/bufmgr.h> #ifndef HAVE_MEMMOVE -# include "regex/utils.h" +#include "regex/utils.h" #else -# include <string.h> -#endif +#include <string.h> +#endif /* - * _hash_search() -- Finds the page/bucket that the contains the - * scankey and loads it into *bufP. the buffer has a read lock. + * _hash_search() -- Finds the page/bucket that the contains the + * scankey and loads it into *bufP. the buffer has a read lock. */ void _hash_search(Relation rel, - int keysz, - ScanKey scankey, - Buffer *bufP, - HashMetaPage metap) + int keysz, + ScanKey scankey, + Buffer * bufP, + HashMetaPage metap) { - BlockNumber blkno; - Datum keyDatum; - Bucket bucket; - - if (scankey == (ScanKey) NULL || - (keyDatum = scankey[0].sk_argument) == (Datum) NULL) { - /* - * If the scankey argument is NULL, all tuples will satisfy - * the scan so we start the scan at the first bucket (bucket - * 0). - */ - bucket = 0; - } else { - bucket = _hash_call(rel, metap, keyDatum); - } - - blkno = BUCKET_TO_BLKNO(bucket); - - *bufP = _hash_getbuf(rel, blkno, HASH_READ); + BlockNumber blkno; + Datum keyDatum; + Bucket bucket; + + if (scankey == (ScanKey) NULL || + (keyDatum = scankey[0].sk_argument) == (Datum) NULL) + { + + /* + * If the scankey argument is NULL, all tuples will satisfy the + * scan so we start the scan at the first bucket (bucket 0). + */ + bucket = 0; + } + else + { + bucket = _hash_call(rel, metap, keyDatum); + } + + blkno = BUCKET_TO_BLKNO(bucket); + + *bufP = _hash_getbuf(rel, blkno, HASH_READ); } /* - * _hash_next() -- Get the next item in a scan. + * _hash_next() -- Get the next item in a scan. * - * On entry, we have a valid currentItemData in the scan, and a - * read lock on the page that contains that item. We do not have - * the page pinned. We return the next item in the scan. On - * exit, we have the page containing the next item locked but not - * pinned. + * On entry, we have a valid currentItemData in the scan, and a + * read lock on the page that contains that item. We do not have + * the page pinned. We return the next item in the scan. On + * exit, we have the page containing the next item locked but not + * pinned. */ RetrieveIndexResult _hash_next(IndexScanDesc scan, ScanDirection dir) { - Relation rel; - Buffer buf; - Buffer metabuf; - Page page; - OffsetNumber offnum; - RetrieveIndexResult res; - ItemPointer current; - HashItem hitem; - IndexTuple itup; - HashScanOpaque so; - - rel = scan->relation; - so = (HashScanOpaque) scan->opaque; - current = &(scan->currentItemData); - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); - - /* - * XXX 10 may 91: somewhere there's a bug in our management of the - * cached buffer for this scan. wei discovered it. the following - * is a workaround so he can work until i figure out what's going on. - */ - - if (!BufferIsValid(so->hashso_curbuf)) { - so->hashso_curbuf = _hash_getbuf(rel, - ItemPointerGetBlockNumber(current), - HASH_READ); - } - - /* we still have the buffer pinned and locked */ - buf = so->hashso_curbuf; - - /* - * step to next valid tuple. note that _hash_step releases our - * lock on 'metabuf'; if we switch to a new 'buf' while looking - * for the next tuple, we come back with a lock on that buffer. - */ - if (!_hash_step(scan, &buf, dir, metabuf)) { - return ((RetrieveIndexResult) NULL); - } - - /* if we're here, _hash_step found a valid tuple */ - current = &(scan->currentItemData); - offnum = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); - itup = &hitem->hash_itup; - res = FormRetrieveIndexResult(current, &(itup->t_tid)); - - return (res); + Relation rel; + Buffer buf; + Buffer metabuf; + Page page; + OffsetNumber offnum; + RetrieveIndexResult res; + ItemPointer current; + HashItem hitem; + IndexTuple itup; + HashScanOpaque so; + + rel = scan->relation; + so = (HashScanOpaque) scan->opaque; + current = &(scan->currentItemData); + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); + + /* + * XXX 10 may 91: somewhere there's a bug in our management of the + * cached buffer for this scan. wei discovered it. the following is + * a workaround so he can work until i figure out what's going on. + */ + + if (!BufferIsValid(so->hashso_curbuf)) + { + so->hashso_curbuf = _hash_getbuf(rel, + ItemPointerGetBlockNumber(current), + HASH_READ); + } + + /* we still have the buffer pinned and locked */ + buf = so->hashso_curbuf; + + /* + * step to next valid tuple. note that _hash_step releases our lock + * on 'metabuf'; if we switch to a new 'buf' while looking for the + * next tuple, we come back with a lock on that buffer. + */ + if (!_hash_step(scan, &buf, dir, metabuf)) + { + return ((RetrieveIndexResult) NULL); + } + + /* if we're here, _hash_step found a valid tuple */ + current = &(scan->currentItemData); + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &hitem->hash_itup; + res = FormRetrieveIndexResult(current, &(itup->t_tid)); + + return (res); } static void _hash_readnext(Relation rel, - Buffer *bufp, Page *pagep, HashPageOpaque *opaquep) + Buffer * bufp, Page * pagep, HashPageOpaque * opaquep) { - BlockNumber blkno; - - blkno = (*opaquep)->hasho_nextblkno; - _hash_relbuf(rel, *bufp, HASH_READ); - *bufp = InvalidBuffer; - if (BlockNumberIsValid(blkno)) { - *bufp = _hash_getbuf(rel, blkno, HASH_READ); - *pagep = BufferGetPage(*bufp); - _hash_checkpage(*pagep, LH_OVERFLOW_PAGE); - *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); - Assert(!PageIsEmpty(*pagep)); - } + BlockNumber blkno; + + blkno = (*opaquep)->hasho_nextblkno; + _hash_relbuf(rel, *bufp, HASH_READ); + *bufp = InvalidBuffer; + if (BlockNumberIsValid(blkno)) + { + *bufp = _hash_getbuf(rel, blkno, HASH_READ); + *pagep = BufferGetPage(*bufp); + _hash_checkpage(*pagep, LH_OVERFLOW_PAGE); + *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); + Assert(!PageIsEmpty(*pagep)); + } } static void _hash_readprev(Relation rel, - Buffer *bufp, Page *pagep, HashPageOpaque *opaquep) + Buffer * bufp, Page * pagep, HashPageOpaque * opaquep) { - BlockNumber blkno; - - blkno = (*opaquep)->hasho_prevblkno; - _hash_relbuf(rel, *bufp, HASH_READ); - *bufp = InvalidBuffer; - if (BlockNumberIsValid(blkno)) { - *bufp = _hash_getbuf(rel, blkno, HASH_READ); - *pagep = BufferGetPage(*bufp); - _hash_checkpage(*pagep, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); - if (PageIsEmpty(*pagep)) { - Assert((*opaquep)->hasho_flag & LH_BUCKET_PAGE); - _hash_relbuf(rel, *bufp, HASH_READ); - *bufp = InvalidBuffer; + BlockNumber blkno; + + blkno = (*opaquep)->hasho_prevblkno; + _hash_relbuf(rel, *bufp, HASH_READ); + *bufp = InvalidBuffer; + if (BlockNumberIsValid(blkno)) + { + *bufp = _hash_getbuf(rel, blkno, HASH_READ); + *pagep = BufferGetPage(*bufp); + _hash_checkpage(*pagep, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); + if (PageIsEmpty(*pagep)) + { + Assert((*opaquep)->hasho_flag & LH_BUCKET_PAGE); + _hash_relbuf(rel, *bufp, HASH_READ); + *bufp = InvalidBuffer; + } } - } } /* - * _hash_first() -- Find the first item in a scan. + * _hash_first() -- Find the first item in a scan. * - * Return the RetrieveIndexResult of the first item in the tree that - * satisfies the qualificatin associated with the scan descriptor. On - * exit, the page containing the current index tuple is read locked - * and pinned, and the scan's opaque data entry is updated to - * include the buffer. + * Return the RetrieveIndexResult of the first item in the tree that + * satisfies the qualificatin associated with the scan descriptor. On + * exit, the page containing the current index tuple is read locked + * and pinned, and the scan's opaque data entry is updated to + * include the buffer. */ RetrieveIndexResult _hash_first(IndexScanDesc scan, ScanDirection dir) { - Relation rel; - Buffer buf; - Buffer metabuf; - Page page; - HashPageOpaque opaque; - HashMetaPage metap; - HashItem hitem; - IndexTuple itup; - ItemPointer current; - OffsetNumber offnum; - RetrieveIndexResult res; - HashScanOpaque so; - - rel = scan->relation; - so = (HashScanOpaque) scan->opaque; - current = &(scan->currentItemData); - - metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - /* - * XXX -- The attribute number stored in the scan key is the attno - * in the heap relation. We need to transmogrify this into - * the index relation attno here. For the moment, we have - * hardwired attno == 1. - */ - - /* find the correct bucket page and load it into buf */ - _hash_search(rel, 1, scan->keyData, &buf, metap); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE); - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - - /* - * if we are scanning forward, we need to find the first non-empty - * page (if any) in the bucket chain. since overflow pages are - * never empty, this had better be either the bucket page or the - * first overflow page. - * - * if we are scanning backward, we always go all the way to the - * end of the bucket chain. - */ - if (PageIsEmpty(page)) { - if (BlockNumberIsValid(opaque->hasho_nextblkno)) { - _hash_readnext(rel, &buf, &page, &opaque); - } else { - ItemPointerSetInvalid(current); - so->hashso_curbuf = InvalidBuffer; - /* - * If there is no scankeys, all tuples will satisfy - * the scan - so we continue in _hash_step to get - * tuples from all buckets. - vadim 04/29/97 - */ - if ( scan->numberOfKeys >= 1 ) - { - _hash_relbuf(rel, buf, HASH_READ); - _hash_relbuf(rel, metabuf, HASH_READ); - return ((RetrieveIndexResult) NULL); - } + Relation rel; + Buffer buf; + Buffer metabuf; + Page page; + HashPageOpaque opaque; + HashMetaPage metap; + HashItem hitem; + IndexTuple itup; + ItemPointer current; + OffsetNumber offnum; + RetrieveIndexResult res; + HashScanOpaque so; + + rel = scan->relation; + so = (HashScanOpaque) scan->opaque; + current = &(scan->currentItemData); + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* + * XXX -- The attribute number stored in the scan key is the attno in + * the heap relation. We need to transmogrify this into the index + * relation attno here. For the moment, we have hardwired attno == 1. + */ + + /* find the correct bucket page and load it into buf */ + _hash_search(rel, 1, scan->keyData, &buf, metap); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + + /* + * if we are scanning forward, we need to find the first non-empty + * page (if any) in the bucket chain. since overflow pages are never + * empty, this had better be either the bucket page or the first + * overflow page. + * + * if we are scanning backward, we always go all the way to the end of + * the bucket chain. + */ + if (PageIsEmpty(page)) + { + if (BlockNumberIsValid(opaque->hasho_nextblkno)) + { + _hash_readnext(rel, &buf, &page, &opaque); + } + else + { + ItemPointerSetInvalid(current); + so->hashso_curbuf = InvalidBuffer; + + /* + * If there is no scankeys, all tuples will satisfy the scan - + * so we continue in _hash_step to get tuples from all + * buckets. - vadim 04/29/97 + */ + if (scan->numberOfKeys >= 1) + { + _hash_relbuf(rel, buf, HASH_READ); + _hash_relbuf(rel, metabuf, HASH_READ); + return ((RetrieveIndexResult) NULL); + } + } } - } - if (ScanDirectionIsBackward(dir)) { - while (BlockNumberIsValid(opaque->hasho_nextblkno)) { - _hash_readnext(rel, &buf, &page, &opaque); + if (ScanDirectionIsBackward(dir)) + { + while (BlockNumberIsValid(opaque->hasho_nextblkno)) + { + _hash_readnext(rel, &buf, &page, &opaque); + } + } + + if (!_hash_step(scan, &buf, dir, metabuf)) + { + return ((RetrieveIndexResult) NULL); } - } - - if (!_hash_step(scan, &buf, dir, metabuf)) { - return ((RetrieveIndexResult) NULL); - } - - /* if we're here, _hash_step found a valid tuple */ - current = &(scan->currentItemData); - offnum = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); - itup = &hitem->hash_itup; - res = FormRetrieveIndexResult(current, &(itup->t_tid)); - - return (res); + + /* if we're here, _hash_step found a valid tuple */ + current = &(scan->currentItemData); + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &hitem->hash_itup; + res = FormRetrieveIndexResult(current, &(itup->t_tid)); + + return (res); } /* - * _hash_step() -- step to the next valid item in a scan in the bucket. + * _hash_step() -- step to the next valid item in a scan in the bucket. * - * If no valid record exists in the requested direction, return - * false. Else, return true and set the CurrentItemData for the - * scan to the right thing. - * - * 'bufP' points to the buffer which contains the current page - * that we'll step through. + * If no valid record exists in the requested direction, return + * false. Else, return true and set the CurrentItemData for the + * scan to the right thing. * - * 'metabuf' is released when this returns. + * 'bufP' points to the buffer which contains the current page + * that we'll step through. + * + * 'metabuf' is released when this returns. */ bool -_hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf) +_hash_step(IndexScanDesc scan, Buffer * bufP, ScanDirection dir, Buffer metabuf) { - Relation rel; - ItemPointer current; - HashScanOpaque so; - int allbuckets; - HashMetaPage metap; - Buffer buf; - Page page; - HashPageOpaque opaque; - OffsetNumber maxoff; - OffsetNumber offnum; - Bucket bucket; - BlockNumber blkno; - HashItem hitem; - IndexTuple itup; - - rel = scan->relation; - current = &(scan->currentItemData); - so = (HashScanOpaque) scan->opaque; - allbuckets = (scan->numberOfKeys < 1); - - metap = (HashMetaPage) BufferGetPage(metabuf); - _hash_checkpage((Page) metap, LH_META_PAGE); - - buf = *bufP; - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - - /* - * If _hash_step is called from _hash_first, current will not be - * valid, so we can't dereference it. However, in that case, we - * presumably want to start at the beginning/end of the page... - */ - maxoff = PageGetMaxOffsetNumber(page); - if (ItemPointerIsValid(current)) { - offnum = ItemPointerGetOffsetNumber(current); - } else { - offnum = InvalidOffsetNumber; - } - - /* - * 'offnum' now points to the last tuple we have seen (if any). - * - * continue to step through tuples until: - * 1) we get to the end of the bucket chain or - * 2) we find a valid tuple. - */ - do { - bucket = opaque->hasho_bucket; - - switch (dir) { - case ForwardScanDirection: - if (offnum != InvalidOffsetNumber) { - offnum = OffsetNumberNext(offnum); /* move forward */ - } else { - offnum = FirstOffsetNumber; /* new page */ - } - while (offnum > maxoff) { - /* - * either this page is empty (maxoff == - * InvalidOffsetNumber) or we ran off the end. - */ - _hash_readnext(rel, &buf, &page, &opaque); - if (BufferIsInvalid(buf)) { /* end of chain */ - if (allbuckets && bucket < metap->hashm_maxbucket) { - ++bucket; - blkno = BUCKET_TO_BLKNO(bucket); - buf = _hash_getbuf(rel, blkno, HASH_READ); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE); - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - Assert(opaque->hasho_bucket == bucket); - while (PageIsEmpty(page) && - BlockNumberIsValid(opaque->hasho_nextblkno)) { - _hash_readnext(rel, &buf, &page, &opaque); + Relation rel; + ItemPointer current; + HashScanOpaque so; + int allbuckets; + HashMetaPage metap; + Buffer buf; + Page page; + HashPageOpaque opaque; + OffsetNumber maxoff; + OffsetNumber offnum; + Bucket bucket; + BlockNumber blkno; + HashItem hitem; + IndexTuple itup; + + rel = scan->relation; + current = &(scan->currentItemData); + so = (HashScanOpaque) scan->opaque; + allbuckets = (scan->numberOfKeys < 1); + + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + buf = *bufP; + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + + /* + * If _hash_step is called from _hash_first, current will not be + * valid, so we can't dereference it. However, in that case, we + * presumably want to start at the beginning/end of the page... + */ + maxoff = PageGetMaxOffsetNumber(page); + if (ItemPointerIsValid(current)) + { + offnum = ItemPointerGetOffsetNumber(current); + } + else + { + offnum = InvalidOffsetNumber; + } + + /* + * 'offnum' now points to the last tuple we have seen (if any). + * + * continue to step through tuples until: 1) we get to the end of the + * bucket chain or 2) we find a valid tuple. + */ + do + { + bucket = opaque->hasho_bucket; + + switch (dir) + { + case ForwardScanDirection: + if (offnum != InvalidOffsetNumber) + { + offnum = OffsetNumberNext(offnum); /* move forward */ } - maxoff = PageGetMaxOffsetNumber(page); - offnum = FirstOffsetNumber; - } else { - maxoff = offnum = InvalidOffsetNumber; - break; /* while */ - } - } else { - /* _hash_readnext never returns an empty page */ - maxoff = PageGetMaxOffsetNumber(page); - offnum = FirstOffsetNumber; - } - } - break; - case BackwardScanDirection: - if (offnum != InvalidOffsetNumber) { - offnum = OffsetNumberPrev(offnum); /* move back */ - } else { - offnum = maxoff; /* new page */ - } - while (offnum < FirstOffsetNumber) { - /* - * either this page is empty (offnum == - * InvalidOffsetNumber) or we ran off the end. - */ - _hash_readprev(rel, &buf, &page, &opaque); - if (BufferIsInvalid(buf)) { /* end of chain */ - if (allbuckets && bucket > 0) { - --bucket; - blkno = BUCKET_TO_BLKNO(bucket); - buf = _hash_getbuf(rel, blkno, HASH_READ); - page = BufferGetPage(buf); - _hash_checkpage(page, LH_BUCKET_PAGE); - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - Assert(opaque->hasho_bucket == bucket); - while (BlockNumberIsValid(opaque->hasho_nextblkno)) { - _hash_readnext(rel, &buf, &page, &opaque); + else + { + offnum = FirstOffsetNumber; /* new page */ + } + while (offnum > maxoff) + { + + /* + * either this page is empty (maxoff == + * InvalidOffsetNumber) or we ran off the end. + */ + _hash_readnext(rel, &buf, &page, &opaque); + if (BufferIsInvalid(buf)) + { /* end of chain */ + if (allbuckets && bucket < metap->hashm_maxbucket) + { + ++bucket; + blkno = BUCKET_TO_BLKNO(bucket); + buf = _hash_getbuf(rel, blkno, HASH_READ); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(opaque->hasho_bucket == bucket); + while (PageIsEmpty(page) && + BlockNumberIsValid(opaque->hasho_nextblkno)) + { + _hash_readnext(rel, &buf, &page, &opaque); + } + maxoff = PageGetMaxOffsetNumber(page); + offnum = FirstOffsetNumber; + } + else + { + maxoff = offnum = InvalidOffsetNumber; + break; /* while */ + } + } + else + { + /* _hash_readnext never returns an empty page */ + maxoff = PageGetMaxOffsetNumber(page); + offnum = FirstOffsetNumber; + } + } + break; + case BackwardScanDirection: + if (offnum != InvalidOffsetNumber) + { + offnum = OffsetNumberPrev(offnum); /* move back */ + } + else + { + offnum = maxoff;/* new page */ } - maxoff = offnum = PageGetMaxOffsetNumber(page); - } else { - maxoff = offnum = InvalidOffsetNumber; - break; /* while */ - } - } else { - /* _hash_readprev never returns an empty page */ - maxoff = offnum = PageGetMaxOffsetNumber(page); + while (offnum < FirstOffsetNumber) + { + + /* + * either this page is empty (offnum == + * InvalidOffsetNumber) or we ran off the end. + */ + _hash_readprev(rel, &buf, &page, &opaque); + if (BufferIsInvalid(buf)) + { /* end of chain */ + if (allbuckets && bucket > 0) + { + --bucket; + blkno = BUCKET_TO_BLKNO(bucket); + buf = _hash_getbuf(rel, blkno, HASH_READ); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(opaque->hasho_bucket == bucket); + while (BlockNumberIsValid(opaque->hasho_nextblkno)) + { + _hash_readnext(rel, &buf, &page, &opaque); + } + maxoff = offnum = PageGetMaxOffsetNumber(page); + } + else + { + maxoff = offnum = InvalidOffsetNumber; + break; /* while */ + } + } + else + { + /* _hash_readprev never returns an empty page */ + maxoff = offnum = PageGetMaxOffsetNumber(page); + } + } + break; + default: + /* NoMovementScanDirection */ + /* this should not be reached */ + break; } - } - break; - default: - /* NoMovementScanDirection */ - /* this should not be reached */ - break; - } - /* we ran off the end of the world without finding a match */ - if (offnum == InvalidOffsetNumber) { - _hash_relbuf(rel, metabuf, HASH_READ); - *bufP = so->hashso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(current); - return(false); - } - - /* get ready to check this tuple */ - hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); - itup = &hitem->hash_itup; - } while (!_hash_checkqual(scan, itup)); - - /* if we made it to here, we've found a valid tuple */ - _hash_relbuf(rel, metabuf, HASH_READ); - blkno = BufferGetBlockNumber(buf); - *bufP = so->hashso_curbuf = buf; - ItemPointerSet(current, blkno, offnum); - return(true); + /* we ran off the end of the world without finding a match */ + if (offnum == InvalidOffsetNumber) + { + _hash_relbuf(rel, metabuf, HASH_READ); + *bufP = so->hashso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(current); + return (false); + } + + /* get ready to check this tuple */ + hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &hitem->hash_itup; + } while (!_hash_checkqual(scan, itup)); + + /* if we made it to here, we've found a valid tuple */ + _hash_relbuf(rel, metabuf, HASH_READ); + blkno = BufferGetBlockNumber(buf); + *bufP = so->hashso_curbuf = buf; + ItemPointerSet(current, blkno, offnum); + return (true); } diff --git a/src/backend/access/hash/hashstrat.c b/src/backend/access/hash/hashstrat.c index d2f1e513c3..f1bdbdb8a3 100644 --- a/src/backend/access/hash/hashstrat.c +++ b/src/backend/access/hash/hashstrat.c @@ -1,80 +1,83 @@ /*------------------------------------------------------------------------- * * btstrat.c-- - * Srategy map entries for the btree indexed access method + * Srategy map entries for the btree indexed access method * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/hashstrat.c,v 1.9 1997/08/20 02:01:42 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/hashstrat.c,v 1.10 1997/09/07 04:38:03 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <access/istrat.h> -/* - * only one valid strategy for hash tables: equality. +/* + * only one valid strategy for hash tables: equality. */ #ifdef NOT_USED -static StrategyNumber HTNegate[1] = { - InvalidStrategy +static StrategyNumber HTNegate[1] = { + InvalidStrategy }; -static StrategyNumber HTCommute[1] = { - HTEqualStrategyNumber +static StrategyNumber HTCommute[1] = { + HTEqualStrategyNumber }; -static StrategyNumber HTNegateCommute[1] = { - InvalidStrategy +static StrategyNumber HTNegateCommute[1] = { + InvalidStrategy }; -static StrategyEvaluationData HTEvaluationData = { - /* XXX static for simplicity */ +static StrategyEvaluationData HTEvaluationData = { + /* XXX static for simplicity */ - HTMaxStrategyNumber, - (StrategyTransformMap)HTNegate, - (StrategyTransformMap)HTCommute, - (StrategyTransformMap)HTNegateCommute, - {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL} + HTMaxStrategyNumber, + (StrategyTransformMap) HTNegate, + (StrategyTransformMap) HTCommute, + (StrategyTransformMap) HTNegateCommute, + {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL} }; + #endif /* ---------------------------------------------------------------- - * RelationGetHashStrategy + * RelationGetHashStrategy * ---------------------------------------------------------------- */ #ifdef NOT_USED -static StrategyNumber +static StrategyNumber _hash_getstrat(Relation rel, - AttrNumber attno, - RegProcedure proc) + AttrNumber attno, + RegProcedure proc) { - StrategyNumber strat; + StrategyNumber strat; - strat = RelationGetStrategy(rel, attno, &HTEvaluationData, proc); + strat = RelationGetStrategy(rel, attno, &HTEvaluationData, proc); - Assert(StrategyNumberIsValid(strat)); + Assert(StrategyNumberIsValid(strat)); - return (strat); + return (strat); } + #endif #ifdef NOT_USED -static bool +static bool _hash_invokestrat(Relation rel, - AttrNumber attno, - StrategyNumber strat, - Datum left, - Datum right) + AttrNumber attno, + StrategyNumber strat, + Datum left, + Datum right) { - return (RelationInvokeStrategy(rel, &HTEvaluationData, attno, strat, - left, right)); + return (RelationInvokeStrategy(rel, &HTEvaluationData, attno, strat, + left, right)); } + #endif diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c index dd0b473745..f9fbe0e2d1 100644 --- a/src/backend/access/hash/hashutil.c +++ b/src/backend/access/hash/hashutil.c @@ -1,109 +1,110 @@ /*------------------------------------------------------------------------- * * btutils.c-- - * Utility code for Postgres btree implementation. + * Utility code for Postgres btree implementation. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.9 1997/08/14 05:01:32 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.10 1997/09/07 04:38:04 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <access/hash.h> #include <fmgr.h> #include <utils/memutils.h> #include <access/iqual.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif ScanKey _hash_mkscankey(Relation rel, IndexTuple itup, HashMetaPage metap) { - ScanKey skey; - TupleDesc itupdesc; - int natts; - AttrNumber i; - Datum arg; - RegProcedure proc; - bool null; - - natts = rel->rd_rel->relnatts; - itupdesc = RelationGetTupleDescriptor(rel); - - skey = (ScanKey) palloc(natts * sizeof(ScanKeyData)); - - for (i = 0; i < natts; i++) { - arg = index_getattr(itup, i + 1, itupdesc, &null); - proc = metap->hashm_procid; - ScanKeyEntryInitialize(&skey[i], - 0x0, (AttrNumber) (i + 1), proc, arg); - } - - return (skey); -} + ScanKey skey; + TupleDesc itupdesc; + int natts; + AttrNumber i; + Datum arg; + RegProcedure proc; + bool null; + + natts = rel->rd_rel->relnatts; + itupdesc = RelationGetTupleDescriptor(rel); + + skey = (ScanKey) palloc(natts * sizeof(ScanKeyData)); + + for (i = 0; i < natts; i++) + { + arg = index_getattr(itup, i + 1, itupdesc, &null); + proc = metap->hashm_procid; + ScanKeyEntryInitialize(&skey[i], + 0x0, (AttrNumber) (i + 1), proc, arg); + } + + return (skey); +} void _hash_freeskey(ScanKey skey) { - pfree(skey); + pfree(skey); } bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup) { - if (scan->numberOfKeys > 0) - return (index_keytest(itup, - RelationGetTupleDescriptor(scan->relation), - scan->numberOfKeys, scan->keyData)); - else - return (true); + if (scan->numberOfKeys > 0) + return (index_keytest(itup, + RelationGetTupleDescriptor(scan->relation), + scan->numberOfKeys, scan->keyData)); + else + return (true); } HashItem _hash_formitem(IndexTuple itup) { - int nbytes_hitem; - HashItem hitem; - Size tuplen; - - /* disallow nulls in hash keys */ - if (itup->t_info & INDEX_NULL_MASK) - elog(WARN, "hash indices cannot include null keys"); - - /* make a copy of the index tuple with room for the sequence number */ - tuplen = IndexTupleSize(itup); - nbytes_hitem = tuplen + - (sizeof(HashItemData) - sizeof(IndexTupleData)); - - hitem = (HashItem) palloc(nbytes_hitem); - memmove((char *) &(hitem->hash_itup), (char *) itup, tuplen); - - return (hitem); + int nbytes_hitem; + HashItem hitem; + Size tuplen; + + /* disallow nulls in hash keys */ + if (itup->t_info & INDEX_NULL_MASK) + elog(WARN, "hash indices cannot include null keys"); + + /* make a copy of the index tuple with room for the sequence number */ + tuplen = IndexTupleSize(itup); + nbytes_hitem = tuplen + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + + hitem = (HashItem) palloc(nbytes_hitem); + memmove((char *) &(hitem->hash_itup), (char *) itup, tuplen); + + return (hitem); } Bucket _hash_call(Relation rel, HashMetaPage metap, Datum key) { - uint32 n; - Bucket bucket; - RegProcedure proc; - - proc = metap->hashm_procid; - n = (uint32) fmgr(proc, key); - bucket = n & metap->hashm_highmask; - if (bucket > metap->hashm_maxbucket) - bucket = bucket & metap->hashm_lowmask; - return (bucket); + uint32 n; + Bucket bucket; + RegProcedure proc; + + proc = metap->hashm_procid; + n = (uint32) fmgr(proc, key); + bucket = n & metap->hashm_highmask; + if (bucket > metap->hashm_maxbucket) + bucket = bucket & metap->hashm_lowmask; + return (bucket); } /* @@ -112,12 +113,13 @@ _hash_call(Relation rel, HashMetaPage metap, Datum key) uint32 _hash_log2(uint32 num) { - uint32 i, limit; - - limit = 1; - for (i = 0; limit < num; limit = limit << 1, i++) - ; - return (i); + uint32 i, + limit; + + limit = 1; + for (i = 0; limit < num; limit = limit << 1, i++) + ; + return (i); } /* @@ -126,19 +128,20 @@ _hash_log2(uint32 num) void _hash_checkpage(Page page, int flags) { - HashPageOpaque opaque; + HashPageOpaque opaque; - Assert(page); - Assert(((PageHeader)(page))->pd_lower >= (sizeof(PageHeaderData) - sizeof(ItemIdData))); + Assert(page); + Assert(((PageHeader) (page))->pd_lower >= (sizeof(PageHeaderData) - sizeof(ItemIdData))); #if 1 - Assert(((PageHeader)(page))->pd_upper <= - (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData)))); - Assert(((PageHeader)(page))->pd_special == - (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData)))); - Assert(((PageHeader)(page))->pd_opaque.od_pagesize == BLCKSZ); + Assert(((PageHeader) (page))->pd_upper <= + (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData)))); + Assert(((PageHeader) (page))->pd_special == + (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData)))); + Assert(((PageHeader) (page))->pd_opaque.od_pagesize == BLCKSZ); #endif - if (flags) { - opaque = (HashPageOpaque) PageGetSpecialPointer(page); - Assert(opaque->hasho_flag & flags); - } + if (flags) + { + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(opaque->hasho_flag & flags); + } } diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index f199803a71..b7ab862514 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1,74 +1,74 @@ /*------------------------------------------------------------------------- * * heapam.c-- - * heap access method code + * heap access method code * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.15 1997/08/27 09:00:20 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.16 1997/09/07 04:38:09 momjian Exp $ * * * INTERFACE ROUTINES - * heapgettup - fetch next heap tuple from a scan - * heap_open - open a heap relation by relationId - * heap_openr - open a heap relation by name - * heap_close - close a heap relation - * heap_beginscan - begin relation scan - * heap_rescan - restart a relation scan - * heap_endscan - end relation scan - * heap_getnext - retrieve next tuple in scan - * heap_fetch - retrive tuple with tid - * heap_insert - insert tuple into a relation - * heap_delete - delete a tuple from a relation - * heap_replace - replace a tuple in a relation with another tuple - * heap_markpos - mark scan position - * heap_restrpos - restore position to marked location - * + * heapgettup - fetch next heap tuple from a scan + * heap_open - open a heap relation by relationId + * heap_openr - open a heap relation by name + * heap_close - close a heap relation + * heap_beginscan - begin relation scan + * heap_rescan - restart a relation scan + * heap_endscan - end relation scan + * heap_getnext - retrieve next tuple in scan + * heap_fetch - retrive tuple with tid + * heap_insert - insert tuple into a relation + * heap_delete - delete a tuple from a relation + * heap_replace - replace a tuple in a relation with another tuple + * heap_markpos - mark scan position + * heap_restrpos - restore position to marked location + * * NOTES - * This file contains the heap_ routines which implement - * the POSTGRES heap access method used for all POSTGRES - * relations. + * This file contains the heap_ routines which implement + * the POSTGRES heap access method used for all POSTGRES + * relations. * * OLD COMMENTS - * struct relscan hints: (struct should be made AM independent?) + * struct relscan hints: (struct should be made AM independent?) * - * rs_ctid is the tid of the last tuple returned by getnext. - * rs_ptid and rs_ntid are the tids of the previous and next tuples - * returned by getnext, respectively. NULL indicates an end of - * scan (either direction); NON indicates an unknow value. + * rs_ctid is the tid of the last tuple returned by getnext. + * rs_ptid and rs_ntid are the tids of the previous and next tuples + * returned by getnext, respectively. NULL indicates an end of + * scan (either direction); NON indicates an unknow value. * - * possible combinations: - * rs_p rs_c rs_n interpretation - * NULL NULL NULL empty scan - * NULL NULL NON at begining of scan - * NULL NULL t1 at begining of scan (with cached tid) - * NON NULL NULL at end of scan - * t1 NULL NULL at end of scan (with cached tid) - * NULL t1 NULL just returned only tuple - * NULL t1 NON just returned first tuple - * NULL t1 t2 returned first tuple (with cached tid) - * NON t1 NULL just returned last tuple - * t2 t1 NULL returned last tuple (with cached tid) - * t1 t2 NON in the middle of a forward scan - * NON t2 t1 in the middle of a reverse scan - * ti tj tk in the middle of a scan (w cached tid) + * possible combinations: + * rs_p rs_c rs_n interpretation + * NULL NULL NULL empty scan + * NULL NULL NON at begining of scan + * NULL NULL t1 at begining of scan (with cached tid) + * NON NULL NULL at end of scan + * t1 NULL NULL at end of scan (with cached tid) + * NULL t1 NULL just returned only tuple + * NULL t1 NON just returned first tuple + * NULL t1 t2 returned first tuple (with cached tid) + * NON t1 NULL just returned last tuple + * t2 t1 NULL returned last tuple (with cached tid) + * t1 t2 NON in the middle of a forward scan + * NON t2 t1 in the middle of a reverse scan + * ti tj tk in the middle of a scan (w cached tid) * - * Here NULL is ...tup == NULL && ...buf == InvalidBuffer, - * and NON is ...tup == NULL && ...buf == UnknownBuffer. + * Here NULL is ...tup == NULL && ...buf == InvalidBuffer, + * and NON is ...tup == NULL && ...buf == UnknownBuffer. * - * Currently, the NONTID values are not cached with their actual - * values by getnext. Values may be cached by markpos since it stores - * all three tids. + * Currently, the NONTID values are not cached with their actual + * values by getnext. Values may be cached by markpos since it stores + * all three tids. * - * NOTE: the calls to elog() must stop. Should decide on an interface - * between the general and specific AM calls. + * NOTE: the calls to elog() must stop. Should decide on an interface + * between the general and specific AM calls. * - * XXX probably do not need a free tuple routine for heaps. - * Huh? Free tuple is not necessary for tuples returned by scans, but - * is necessary for tuples which are returned by - * RelationGetTupleByItemPointer. -hirohama + * XXX probably do not need a free tuple routine for heaps. + * Huh? Free tuple is not necessary for tuples returned by scans, but + * is necessary for tuples which are returned by + * RelationGetTupleByItemPointer. -hirohama * *------------------------------------------------------------------------- */ @@ -91,644 +91,706 @@ #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static bool ImmediateInvalidation; +static bool ImmediateInvalidation; /* ---------------------------------------------------------------- - * heap support routines + * heap support routines * ---------------------------------------------------------------- */ /* ---------------- - * initsdesc - sdesc code common to heap_beginscan and heap_rescan + * initsdesc - sdesc code common to heap_beginscan and heap_rescan * ---------------- */ static void initsdesc(HeapScanDesc sdesc, - Relation relation, - int atend, - unsigned nkeys, - ScanKey key) + Relation relation, + int atend, + unsigned nkeys, + ScanKey key) { - if (!RelationGetNumberOfBlocks(relation)) { - /* ---------------- - * relation is empty - * ---------------- - */ - sdesc->rs_ntup = sdesc->rs_ctup = sdesc->rs_ptup = NULL; - sdesc->rs_nbuf = sdesc->rs_cbuf = sdesc->rs_pbuf = InvalidBuffer; - } else if (atend) { - /* ---------------- - * reverse scan - * ---------------- - */ - sdesc->rs_ntup = sdesc->rs_ctup = NULL; - sdesc->rs_nbuf = sdesc->rs_cbuf = InvalidBuffer; - sdesc->rs_ptup = NULL; - sdesc->rs_pbuf = UnknownBuffer; - } else { + if (!RelationGetNumberOfBlocks(relation)) + { + /* ---------------- + * relation is empty + * ---------------- + */ + sdesc->rs_ntup = sdesc->rs_ctup = sdesc->rs_ptup = NULL; + sdesc->rs_nbuf = sdesc->rs_cbuf = sdesc->rs_pbuf = InvalidBuffer; + } + else if (atend) + { + /* ---------------- + * reverse scan + * ---------------- + */ + sdesc->rs_ntup = sdesc->rs_ctup = NULL; + sdesc->rs_nbuf = sdesc->rs_cbuf = InvalidBuffer; + sdesc->rs_ptup = NULL; + sdesc->rs_pbuf = UnknownBuffer; + } + else + { + /* ---------------- + * forward scan + * ---------------- + */ + sdesc->rs_ctup = sdesc->rs_ptup = NULL; + sdesc->rs_cbuf = sdesc->rs_pbuf = InvalidBuffer; + sdesc->rs_ntup = NULL; + sdesc->rs_nbuf = UnknownBuffer; + } /* invalid too */ + + /* we don't have a marked position... */ + ItemPointerSetInvalid(&(sdesc->rs_mptid)); + ItemPointerSetInvalid(&(sdesc->rs_mctid)); + ItemPointerSetInvalid(&(sdesc->rs_mntid)); + ItemPointerSetInvalid(&(sdesc->rs_mcd)); + /* ---------------- - * forward scan + * copy the scan key, if appropriate * ---------------- */ - sdesc->rs_ctup = sdesc->rs_ptup = NULL; - sdesc->rs_cbuf = sdesc->rs_pbuf = InvalidBuffer; - sdesc->rs_ntup = NULL; - sdesc->rs_nbuf = UnknownBuffer; - } /* invalid too */ - - /* we don't have a marked position... */ - ItemPointerSetInvalid(&(sdesc->rs_mptid)); - ItemPointerSetInvalid(&(sdesc->rs_mctid)); - ItemPointerSetInvalid(&(sdesc->rs_mntid)); - ItemPointerSetInvalid(&(sdesc->rs_mcd)); - - /* ---------------- - * copy the scan key, if appropriate - * ---------------- - */ - if (key != NULL) - memmove(sdesc->rs_key, key, nkeys * sizeof(ScanKeyData)); + if (key != NULL) + memmove(sdesc->rs_key, key, nkeys * sizeof(ScanKeyData)); } /* ---------------- - * unpinsdesc - code common to heap_rescan and heap_endscan + * unpinsdesc - code common to heap_rescan and heap_endscan * ---------------- */ static void unpinsdesc(HeapScanDesc sdesc) { - if (BufferIsValid(sdesc->rs_pbuf)) { - ReleaseBuffer(sdesc->rs_pbuf); - } - - /* ------------------------------------ - * Scan will pin buffer one for each non-NULL tuple pointer - * (ptup, ctup, ntup), so they have to be unpinned multiple - * times. - * ------------------------------------ - */ - if (BufferIsValid(sdesc->rs_cbuf)) { - ReleaseBuffer(sdesc->rs_cbuf); - } - - if (BufferIsValid(sdesc->rs_nbuf)) { - ReleaseBuffer(sdesc->rs_nbuf); - } + if (BufferIsValid(sdesc->rs_pbuf)) + { + ReleaseBuffer(sdesc->rs_pbuf); + } + + /* ------------------------------------ + * Scan will pin buffer one for each non-NULL tuple pointer + * (ptup, ctup, ntup), so they have to be unpinned multiple + * times. + * ------------------------------------ + */ + if (BufferIsValid(sdesc->rs_cbuf)) + { + ReleaseBuffer(sdesc->rs_cbuf); + } + + if (BufferIsValid(sdesc->rs_nbuf)) + { + ReleaseBuffer(sdesc->rs_nbuf); + } } /* ------------------------------------------ - * nextpage + * nextpage * - * figure out the next page to scan after the current page - * taking into account of possible adjustment of degrees of - * parallelism + * figure out the next page to scan after the current page + * taking into account of possible adjustment of degrees of + * parallelism * ------------------------------------------ */ static int nextpage(int page, int dir) { - return((dir<0)?page-1:page+1); + return ((dir < 0) ? page - 1 : page + 1); } /* ---------------- - * heapgettup - fetch next heap tuple + * heapgettup - fetch next heap tuple * - * routine used by heap_getnext() which does most of the - * real work in scanning tuples. + * routine used by heap_getnext() which does most of the + * real work in scanning tuples. * ---------------- */ -static HeapTuple +static HeapTuple heapgettup(Relation relation, - ItemPointer tid, - int dir, - Buffer *b, - TimeQual timeQual, - int nkeys, - ScanKey key) + ItemPointer tid, + int dir, + Buffer * b, + TimeQual timeQual, + int nkeys, + ScanKey key) { - ItemId lpp; - Page dp; - int page; - int pages; - int lines; - HeapTuple rtup; - OffsetNumber lineoff; - int linesleft; - - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_heapgettup); - IncrHeapAccessStat(global_heapgettup); - - /* ---------------- - * debugging stuff - * - * check validity of arguments, here and for other functions too - * Note: no locking manipulations needed--this is a local function - * ---------------- - */ -#ifdef HEAPDEBUGALL - if (ItemPointerIsValid(tid)) { - elog(DEBUG, "heapgettup(%.16s, tid=0x%x[%d,%d], dir=%d, ...)", - RelationGetRelationName(relation), tid, tid->ip_blkid, - tid->ip_posid, dir); - } else { - elog(DEBUG, "heapgettup(%.16s, tid=0x%x, dir=%d, ...)", - RelationGetRelationName(relation), tid, dir); - } - elog(DEBUG, "heapgettup(..., b=0x%x, timeQ=0x%x, nkeys=%d, key=0x%x", - b, timeQual, nkeys, key); - if (timeQual == SelfTimeQual) { - elog(DEBUG, "heapgettup: relation(%c)=`%.16s', SelfTimeQual", - relation->rd_rel->relkind, &relation->rd_rel->relname); - } else { - elog(DEBUG, "heapgettup: relation(%c)=`%.16s', timeQual=%d", - relation->rd_rel->relkind, &relation->rd_rel->relname, - timeQual); - } -#endif /* !defined(HEAPDEBUGALL) */ - - if (!ItemPointerIsValid(tid)) { - Assert(!PointerIsValid(tid)); - } - - /* ---------------- - * return null immediately if relation is empty - * ---------------- - */ - if (!(pages = relation->rd_nblocks)) - return (NULL); - - /* ---------------- - * calculate next starting lineoff, given scan direction - * ---------------- - */ - if (!dir) { + ItemId lpp; + Page dp; + int page; + int pages; + int lines; + HeapTuple rtup; + OffsetNumber lineoff; + int linesleft; + /* ---------------- - * ``no movement'' scan direction + * increment access statistics * ---------------- */ - /* assume it is a valid TID XXX */ - if (ItemPointerIsValid(tid) == false) { - *b = InvalidBuffer; - return (NULL); - } - *b = RelationGetBufferWithBuffer(relation, - ItemPointerGetBlockNumber(tid), - *b); - -#ifndef NO_BUFFERISVALID - if (!BufferIsValid(*b)) { - elog(WARN, "heapgettup: failed ReadBuffer"); - } -#endif - - dp = (Page) BufferGetPage(*b); - lineoff = ItemPointerGetOffsetNumber(tid); - lpp = PageGetItemId(dp, lineoff); - - rtup = (HeapTuple)PageGetItem((Page) dp, lpp); - return (rtup); - - } else if (dir < 0) { + IncrHeapAccessStat(local_heapgettup); + IncrHeapAccessStat(global_heapgettup); + /* ---------------- - * reverse scan direction + * debugging stuff + * + * check validity of arguments, here and for other functions too + * Note: no locking manipulations needed--this is a local function * ---------------- */ - if (ItemPointerIsValid(tid) == false) { - tid = NULL; +#ifdef HEAPDEBUGALL + if (ItemPointerIsValid(tid)) + { + elog(DEBUG, "heapgettup(%.16s, tid=0x%x[%d,%d], dir=%d, ...)", + RelationGetRelationName(relation), tid, tid->ip_blkid, + tid->ip_posid, dir); } - if (tid == NULL) { - page = pages - 1; /* final page */ - } else { - page = ItemPointerGetBlockNumber(tid); /* current page */ + else + { + elog(DEBUG, "heapgettup(%.16s, tid=0x%x, dir=%d, ...)", + RelationGetRelationName(relation), tid, dir); } - if (page < 0) { - *b = InvalidBuffer; - return (NULL); + elog(DEBUG, "heapgettup(..., b=0x%x, timeQ=0x%x, nkeys=%d, key=0x%x", + b, timeQual, nkeys, key); + if (timeQual == SelfTimeQual) + { + elog(DEBUG, "heapgettup: relation(%c)=`%.16s', SelfTimeQual", + relation->rd_rel->relkind, &relation->rd_rel->relname); } - - *b = RelationGetBufferWithBuffer(relation, page, *b); -#ifndef NO_BUFFERISVALID - if (!BufferIsValid(*b)) { - elog(WARN, "heapgettup: failed ReadBuffer"); + else + { + elog(DEBUG, "heapgettup: relation(%c)=`%.16s', timeQual=%d", + relation->rd_rel->relkind, &relation->rd_rel->relname, + timeQual); } -#endif - - dp = (Page) BufferGetPage(*b); - lines = PageGetMaxOffsetNumber(dp); - if (tid == NULL) { - lineoff = lines; /* final offnum */ - } else { - lineoff = /* previous offnum */ - OffsetNumberPrev(ItemPointerGetOffsetNumber(tid)); +#endif /* !defined(HEAPDEBUGALL) */ + + if (!ItemPointerIsValid(tid)) + { + Assert(!PointerIsValid(tid)); } - /* page and lineoff now reference the physically previous tid */ - } else { /* ---------------- - * forward scan direction + * return null immediately if relation is empty * ---------------- */ - if (ItemPointerIsValid(tid) == false) { - page = 0; /* first page */ - lineoff = FirstOffsetNumber; /* first offnum */ - } else { - page = ItemPointerGetBlockNumber(tid); /* current page */ - lineoff = /* next offnum */ - OffsetNumberNext(ItemPointerGetOffsetNumber(tid)); - } - - if (page >= pages) { - *b = InvalidBuffer; - return (NULL); - } - /* page and lineoff now reference the physically next tid */ + if (!(pages = relation->rd_nblocks)) + return (NULL); + + /* ---------------- + * calculate next starting lineoff, given scan direction + * ---------------- + */ + if (!dir) + { + /* ---------------- + * ``no movement'' scan direction + * ---------------- + */ + /* assume it is a valid TID XXX */ + if (ItemPointerIsValid(tid) == false) + { + *b = InvalidBuffer; + return (NULL); + } + *b = RelationGetBufferWithBuffer(relation, + ItemPointerGetBlockNumber(tid), + *b); - *b = RelationGetBufferWithBuffer(relation, page, *b); #ifndef NO_BUFFERISVALID - if (!BufferIsValid(*b)) { - elog(WARN, "heapgettup: failed ReadBuffer"); + if (!BufferIsValid(*b)) + { + elog(WARN, "heapgettup: failed ReadBuffer"); + } +#endif + + dp = (Page) BufferGetPage(*b); + lineoff = ItemPointerGetOffsetNumber(tid); + lpp = PageGetItemId(dp, lineoff); + + rtup = (HeapTuple) PageGetItem((Page) dp, lpp); + return (rtup); + } + else if (dir < 0) + { + /* ---------------- + * reverse scan direction + * ---------------- + */ + if (ItemPointerIsValid(tid) == false) + { + tid = NULL; + } + if (tid == NULL) + { + page = pages - 1; /* final page */ + } + else + { + page = ItemPointerGetBlockNumber(tid); /* current page */ + } + if (page < 0) + { + *b = InvalidBuffer; + return (NULL); + } + + *b = RelationGetBufferWithBuffer(relation, page, *b); +#ifndef NO_BUFFERISVALID + if (!BufferIsValid(*b)) + { + elog(WARN, "heapgettup: failed ReadBuffer"); + } #endif - - dp = (Page) BufferGetPage(*b); - lines = PageGetMaxOffsetNumber(dp); - } - - /* 'dir' is now non-zero */ - - /* ---------------- - * calculate line pointer and number of remaining items - * to check on this page. - * ---------------- - */ - lpp = PageGetItemId(dp, lineoff); - if (dir < 0) { - linesleft = lineoff - 1; - } else { - linesleft = lines - lineoff; - } - - /* ---------------- - * advance the scan until we find a qualifying tuple or - * run out of stuff to scan - * ---------------- - */ - for (;;) { - while (linesleft >= 0) { - /* ---------------- - * if current tuple qualifies, return it. - * ---------------- - */ - if ((rtup = heap_tuple_satisfies(lpp, relation, *b, (PageHeader) dp, - timeQual, nkeys, key)) != NULL) { - ItemPointer iptr = &(rtup->t_ctid); - if (ItemPointerGetBlockNumber(iptr) != page) { - /* - * set block id to the correct page number - * --- this is a hack to support the virtual fragment - * concept - */ - ItemPointerSetBlockNumber(iptr, page); + + dp = (Page) BufferGetPage(*b); + lines = PageGetMaxOffsetNumber(dp); + if (tid == NULL) + { + lineoff = lines; /* final offnum */ } - return (rtup); - } - - /* ---------------- - * otherwise move to the next item on the page - * ---------------- - */ - --linesleft; - if (dir < 0) { - --lpp; /* move back in this page's ItemId array */ - } else { - ++lpp; /* move forward in this page's ItemId array */ - } + else + { + lineoff = /* previous offnum */ + OffsetNumberPrev(ItemPointerGetOffsetNumber(tid)); + } + /* page and lineoff now reference the physically previous tid */ + + } + else + { + /* ---------------- + * forward scan direction + * ---------------- + */ + if (ItemPointerIsValid(tid) == false) + { + page = 0; /* first page */ + lineoff = FirstOffsetNumber; /* first offnum */ + } + else + { + page = ItemPointerGetBlockNumber(tid); /* current page */ + lineoff = /* next offnum */ + OffsetNumberNext(ItemPointerGetOffsetNumber(tid)); + } + + if (page >= pages) + { + *b = InvalidBuffer; + return (NULL); + } + /* page and lineoff now reference the physically next tid */ + + *b = RelationGetBufferWithBuffer(relation, page, *b); +#ifndef NO_BUFFERISVALID + if (!BufferIsValid(*b)) + { + elog(WARN, "heapgettup: failed ReadBuffer"); + } +#endif + + dp = (Page) BufferGetPage(*b); + lines = PageGetMaxOffsetNumber(dp); } - + + /* 'dir' is now non-zero */ + /* ---------------- - * if we get here, it means we've exhausted the items on - * this page and it's time to move to the next.. + * calculate line pointer and number of remaining items + * to check on this page. * ---------------- */ - page = nextpage(page, dir); - + lpp = PageGetItemId(dp, lineoff); + if (dir < 0) + { + linesleft = lineoff - 1; + } + else + { + linesleft = lines - lineoff; + } + /* ---------------- - * return NULL if we've exhausted all the pages.. + * advance the scan until we find a qualifying tuple or + * run out of stuff to scan * ---------------- */ - if (page < 0 || page >= pages) { - if (BufferIsValid(*b)) - ReleaseBuffer(*b); - *b = InvalidBuffer; - return (NULL); - } - - *b = ReleaseAndReadBuffer(*b, relation, page); - + for (;;) + { + while (linesleft >= 0) + { + /* ---------------- + * if current tuple qualifies, return it. + * ---------------- + */ + if ((rtup = heap_tuple_satisfies(lpp, relation, *b, (PageHeader) dp, + timeQual, nkeys, key)) != NULL) + { + ItemPointer iptr = &(rtup->t_ctid); + + if (ItemPointerGetBlockNumber(iptr) != page) + { + + /* + * set block id to the correct page number --- this is + * a hack to support the virtual fragment concept + */ + ItemPointerSetBlockNumber(iptr, page); + } + return (rtup); + } + + /* ---------------- + * otherwise move to the next item on the page + * ---------------- + */ + --linesleft; + if (dir < 0) + { + --lpp; /* move back in this page's ItemId array */ + } + else + { + ++lpp; /* move forward in this page's ItemId + * array */ + } + } + + /* ---------------- + * if we get here, it means we've exhausted the items on + * this page and it's time to move to the next.. + * ---------------- + */ + page = nextpage(page, dir); + + /* ---------------- + * return NULL if we've exhausted all the pages.. + * ---------------- + */ + if (page < 0 || page >= pages) + { + if (BufferIsValid(*b)) + ReleaseBuffer(*b); + *b = InvalidBuffer; + return (NULL); + } + + *b = ReleaseAndReadBuffer(*b, relation, page); + #ifndef NO_BUFFERISVALID - if (!BufferIsValid(*b)) { - elog(WARN, "heapgettup: failed ReadBuffer"); - } + if (!BufferIsValid(*b)) + { + elog(WARN, "heapgettup: failed ReadBuffer"); + } #endif - dp = (Page) BufferGetPage(*b); - lines = lineoff = PageGetMaxOffsetNumber((Page) dp); - linesleft = lines - 1; - if (dir < 0) { - lpp = PageGetItemId(dp, lineoff); - } else { - lpp = PageGetItemId(dp, FirstOffsetNumber); + dp = (Page) BufferGetPage(*b); + lines = lineoff = PageGetMaxOffsetNumber((Page) dp); + linesleft = lines - 1; + if (dir < 0) + { + lpp = PageGetItemId(dp, lineoff); + } + else + { + lpp = PageGetItemId(dp, FirstOffsetNumber); + } } - } } void doinsert(Relation relation, HeapTuple tup) { - RelationPutHeapTupleAtEnd(relation, tup); - return; + RelationPutHeapTupleAtEnd(relation, tup); + return; } -/* - * HeapScanIsValid is now a macro in relscan.h -cim 4/27/91 +/* + * HeapScanIsValid is now a macro in relscan.h -cim 4/27/91 */ #ifdef NOT_USED /* ---------------- - * SetHeapAccessMethodImmediateInvalidation + * SetHeapAccessMethodImmediateInvalidation * ---------------- */ void SetHeapAccessMethodImmediateInvalidation(bool on) { - ImmediateInvalidation = on; + ImmediateInvalidation = on; } + #endif /* ---------------------------------------------------------------- - * heap access method interface + * heap access method interface * ---------------------------------------------------------------- */ /* ---------------- - * heap_open - open a heap relation by relationId + * heap_open - open a heap relation by relationId * - * presently the relcache routines do all the work we need - * to open/close heap relations. + * presently the relcache routines do all the work we need + * to open/close heap relations. * ---------------- */ Relation heap_open(Oid relationId) { - Relation r; - - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_open); - IncrHeapAccessStat(global_open); - - r = (Relation) RelationIdGetRelation(relationId); - - if (RelationIsValid(r) && r->rd_rel->relkind == RELKIND_INDEX) { - elog(WARN, "%s is an index relation", r->rd_rel->relname.data); - } - - return (r); + Relation r; + + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_open); + IncrHeapAccessStat(global_open); + + r = (Relation) RelationIdGetRelation(relationId); + + if (RelationIsValid(r) && r->rd_rel->relkind == RELKIND_INDEX) + { + elog(WARN, "%s is an index relation", r->rd_rel->relname.data); + } + + return (r); } /* ---------------- - * heap_openr - open a heap relation by name + * heap_openr - open a heap relation by name * - * presently the relcache routines do all the work we need - * to open/close heap relations. + * presently the relcache routines do all the work we need + * to open/close heap relations. * ---------------- */ Relation heap_openr(char *relationName) { - Relation r; - - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_openr); - IncrHeapAccessStat(global_openr); - - r = RelationNameGetRelation(relationName); - - if (RelationIsValid(r) && r->rd_rel->relkind == RELKIND_INDEX) { - elog(WARN, "%s is an index relation", r->rd_rel->relname.data); - } - - return (r); + Relation r; + + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_openr); + IncrHeapAccessStat(global_openr); + + r = RelationNameGetRelation(relationName); + + if (RelationIsValid(r) && r->rd_rel->relkind == RELKIND_INDEX) + { + elog(WARN, "%s is an index relation", r->rd_rel->relname.data); + } + + return (r); } /* ---------------- - * heap_close - close a heap relation + * heap_close - close a heap relation * - * presently the relcache routines do all the work we need - * to open/close heap relations. + * presently the relcache routines do all the work we need + * to open/close heap relations. * ---------------- */ void heap_close(Relation relation) { - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_close); - IncrHeapAccessStat(global_close); - - RelationClose(relation); + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_close); + IncrHeapAccessStat(global_close); + + RelationClose(relation); } /* ---------------- - * heap_beginscan - begin relation scan + * heap_beginscan - begin relation scan * ---------------- */ HeapScanDesc heap_beginscan(Relation relation, - int atend, - TimeQual timeQual, - unsigned nkeys, - ScanKey key) + int atend, + TimeQual timeQual, + unsigned nkeys, + ScanKey key) { - HeapScanDesc sdesc; - - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_beginscan); - IncrHeapAccessStat(global_beginscan); - - /* ---------------- - * sanity checks - * ---------------- - */ - if (RelationIsValid(relation) == false) - elog(WARN, "heap_beginscan: !RelationIsValid(relation)"); - - /* ---------------- - * set relation level read lock - * ---------------- - */ - RelationSetLockForRead(relation); - - /* XXX someday assert SelfTimeQual if relkind == RELKIND_UNCATALOGED */ - if (relation->rd_rel->relkind == RELKIND_UNCATALOGED) { - timeQual = SelfTimeQual; - } - - /* ---------------- - * increment relation ref count while scanning relation - * ---------------- - */ - RelationIncrementReferenceCount(relation); - - /* ---------------- - * allocate and initialize scan descriptor - * ---------------- - */ - sdesc = (HeapScanDesc) palloc(sizeof(HeapScanDescData)); - - relation->rd_nblocks = smgrnblocks(relation->rd_rel->relsmgr, relation); - sdesc->rs_rd = relation; - - if (nkeys) { - /* - * we do this here instead of in initsdesc() because heap_rescan also - * calls initsdesc() and we don't want to allocate memory again + HeapScanDesc sdesc; + + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_beginscan); + IncrHeapAccessStat(global_beginscan); + + /* ---------------- + * sanity checks + * ---------------- */ - sdesc->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); - } else { - sdesc->rs_key = NULL; - } - - initsdesc(sdesc, relation, atend, nkeys, key); - - sdesc->rs_atend = atend; - sdesc->rs_tr = timeQual; - sdesc->rs_nkeys = (short)nkeys; - - return (sdesc); + if (RelationIsValid(relation) == false) + elog(WARN, "heap_beginscan: !RelationIsValid(relation)"); + + /* ---------------- + * set relation level read lock + * ---------------- + */ + RelationSetLockForRead(relation); + + /* XXX someday assert SelfTimeQual if relkind == RELKIND_UNCATALOGED */ + if (relation->rd_rel->relkind == RELKIND_UNCATALOGED) + { + timeQual = SelfTimeQual; + } + + /* ---------------- + * increment relation ref count while scanning relation + * ---------------- + */ + RelationIncrementReferenceCount(relation); + + /* ---------------- + * allocate and initialize scan descriptor + * ---------------- + */ + sdesc = (HeapScanDesc) palloc(sizeof(HeapScanDescData)); + + relation->rd_nblocks = smgrnblocks(relation->rd_rel->relsmgr, relation); + sdesc->rs_rd = relation; + + if (nkeys) + { + + /* + * we do this here instead of in initsdesc() because heap_rescan + * also calls initsdesc() and we don't want to allocate memory + * again + */ + sdesc->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); + } + else + { + sdesc->rs_key = NULL; + } + + initsdesc(sdesc, relation, atend, nkeys, key); + + sdesc->rs_atend = atend; + sdesc->rs_tr = timeQual; + sdesc->rs_nkeys = (short) nkeys; + + return (sdesc); } /* ---------------- - * heap_rescan - restart a relation scan + * heap_rescan - restart a relation scan * ---------------- */ void heap_rescan(HeapScanDesc sdesc, - bool scanFromEnd, - ScanKey key) + bool scanFromEnd, + ScanKey key) { - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_rescan); - IncrHeapAccessStat(global_rescan); - - /* Note: set relation level read lock is still set */ - - /* ---------------- - * unpin scan buffers - * ---------------- - */ - unpinsdesc(sdesc); - - /* ---------------- - * reinitialize scan descriptor - * ---------------- - */ - initsdesc(sdesc, sdesc->rs_rd, scanFromEnd, sdesc->rs_nkeys, key); - sdesc->rs_atend = (bool) scanFromEnd; + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_rescan); + IncrHeapAccessStat(global_rescan); + + /* Note: set relation level read lock is still set */ + + /* ---------------- + * unpin scan buffers + * ---------------- + */ + unpinsdesc(sdesc); + + /* ---------------- + * reinitialize scan descriptor + * ---------------- + */ + initsdesc(sdesc, sdesc->rs_rd, scanFromEnd, sdesc->rs_nkeys, key); + sdesc->rs_atend = (bool) scanFromEnd; } /* ---------------- - * heap_endscan - end relation scan + * heap_endscan - end relation scan * - * See how to integrate with index scans. - * Check handling if reldesc caching. + * See how to integrate with index scans. + * Check handling if reldesc caching. * ---------------- */ void heap_endscan(HeapScanDesc sdesc) { - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_endscan); - IncrHeapAccessStat(global_endscan); - - /* Note: no locking manipulations needed */ - - /* ---------------- - * unpin scan buffers - * ---------------- - */ - unpinsdesc(sdesc); - - /* ---------------- - * decrement relation reference count and free scan descriptor storage - * ---------------- - */ - RelationDecrementReferenceCount(sdesc->rs_rd); - - /* ---------------- - * Non 2-phase read locks on catalog relations - * ---------------- - */ - if ( IsSystemRelationName(RelationGetRelationName(sdesc->rs_rd)->data) ) - - RelationUnsetLockForRead(sdesc->rs_rd); - - pfree(sdesc); /* XXX */ + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_endscan); + IncrHeapAccessStat(global_endscan); + + /* Note: no locking manipulations needed */ + + /* ---------------- + * unpin scan buffers + * ---------------- + */ + unpinsdesc(sdesc); + + /* ---------------- + * decrement relation reference count and free scan descriptor storage + * ---------------- + */ + RelationDecrementReferenceCount(sdesc->rs_rd); + + /* ---------------- + * Non 2-phase read locks on catalog relations + * ---------------- + */ + if (IsSystemRelationName(RelationGetRelationName(sdesc->rs_rd)->data)) + + RelationUnsetLockForRead(sdesc->rs_rd); + + pfree(sdesc); /* XXX */ } /* ---------------- - * heap_getnext - retrieve next tuple in scan + * heap_getnext - retrieve next tuple in scan * - * Fix to work with index relations. + * Fix to work with index relations. * ---------------- */ #ifdef HEAPDEBUGALL #define HEAPDEBUG_1 \ elog(DEBUG, "heap_getnext([%s,nkeys=%d],backw=%d,0x%x) called", \ - sdesc->rs_rd->rd_rel->relname.data, sdesc->rs_nkeys, backw, b) - + sdesc->rs_rd->rd_rel->relname.data, sdesc->rs_nkeys, backw, b) + #define HEAPDEBUG_2 \ - elog(DEBUG, "heap_getnext called with backw (no tracing yet)") - + elog(DEBUG, "heap_getnext called with backw (no tracing yet)") + #define HEAPDEBUG_3 \ - elog(DEBUG, "heap_getnext returns NULL at end") - + elog(DEBUG, "heap_getnext returns NULL at end") + #define HEAPDEBUG_4 \ - elog(DEBUG, "heap_getnext valid buffer UNPIN'd") - + elog(DEBUG, "heap_getnext valid buffer UNPIN'd") + #define HEAPDEBUG_5 \ - elog(DEBUG, "heap_getnext next tuple was cached") - + elog(DEBUG, "heap_getnext next tuple was cached") + #define HEAPDEBUG_6 \ - elog(DEBUG, "heap_getnext returning EOS") - + elog(DEBUG, "heap_getnext returning EOS") + #define HEAPDEBUG_7 \ - elog(DEBUG, "heap_getnext returning tuple"); + elog(DEBUG, "heap_getnext returning tuple"); #else #define HEAPDEBUG_1 #define HEAPDEBUG_2 @@ -737,715 +799,759 @@ elog(DEBUG, "heap_getnext([%s,nkeys=%d],backw=%d,0x%x) called", \ #define HEAPDEBUG_5 #define HEAPDEBUG_6 #define HEAPDEBUG_7 -#endif /* !defined(HEAPDEBUGALL) */ - - +#endif /* !defined(HEAPDEBUGALL) */ + + HeapTuple heap_getnext(HeapScanDesc scandesc, - int backw, - Buffer *b) + int backw, + Buffer * b) { - register HeapScanDesc sdesc = scandesc; - Buffer localb; - - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_getnext); - IncrHeapAccessStat(global_getnext); - - /* Note: no locking manipulations needed */ - - /* ---------------- - * argument checks - * ---------------- - */ - if (sdesc == NULL) - elog(WARN, "heap_getnext: NULL relscan"); - - /* ---------------- - * initialize return buffer to InvalidBuffer - * ---------------- - */ - if (! PointerIsValid(b)) b = &localb; - (*b) = InvalidBuffer; - - HEAPDEBUG_1; /* heap_getnext( info ) */ - - if (backw) { + register HeapScanDesc sdesc = scandesc; + Buffer localb; + /* ---------------- - * handle reverse scan + * increment access statistics * ---------------- */ - HEAPDEBUG_2; /* heap_getnext called with backw */ - - if (sdesc->rs_ptup == sdesc->rs_ctup && - BufferIsInvalid(sdesc->rs_pbuf)) - { - if (BufferIsValid(sdesc->rs_nbuf)) - ReleaseBuffer(sdesc->rs_nbuf); - return (NULL); - } - - /* - * Copy the "current" tuple/buffer - * to "next". Pin/unpin the buffers - * accordingly + IncrHeapAccessStat(local_getnext); + IncrHeapAccessStat(global_getnext); + + /* Note: no locking manipulations needed */ + + /* ---------------- + * argument checks + * ---------------- */ - if (sdesc->rs_nbuf != sdesc->rs_cbuf) { - if (BufferIsValid(sdesc->rs_nbuf)) - ReleaseBuffer(sdesc->rs_nbuf); - if (BufferIsValid(sdesc->rs_cbuf)) - IncrBufferRefCount(sdesc->rs_cbuf); - } - sdesc->rs_ntup = sdesc->rs_ctup; - sdesc->rs_nbuf = sdesc->rs_cbuf; - - if (sdesc->rs_ptup != NULL) { - if (sdesc->rs_cbuf != sdesc->rs_pbuf) { - if (BufferIsValid(sdesc->rs_cbuf)) - ReleaseBuffer(sdesc->rs_cbuf); - if (BufferIsValid(sdesc->rs_pbuf)) - IncrBufferRefCount(sdesc->rs_pbuf); - } - sdesc->rs_ctup = sdesc->rs_ptup; - sdesc->rs_cbuf = sdesc->rs_pbuf; - } else { /* NONTUP */ - ItemPointer iptr; - - iptr = (sdesc->rs_ctup != NULL) ? - &(sdesc->rs_ctup->t_ctid) : (ItemPointer) NULL; - - /* Don't release sdesc->rs_cbuf at this point, because - heapgettup doesn't increase PrivateRefCount if it - is already set. On a backward scan, both rs_ctup and rs_ntup - usually point to the same buffer page, so - PrivateRefCount[rs_cbuf] should be 2 (or more, if for instance - ctup is stored in a TupleTableSlot). - 01/09/94 */ - - sdesc->rs_ctup = (HeapTuple) - heapgettup(sdesc->rs_rd, - iptr, - -1, - &(sdesc->rs_cbuf), - sdesc->rs_tr, - sdesc->rs_nkeys, - sdesc->rs_key); - } - - if (sdesc->rs_ctup == NULL && !BufferIsValid(sdesc->rs_cbuf)) - { + if (sdesc == NULL) + elog(WARN, "heap_getnext: NULL relscan"); + + /* ---------------- + * initialize return buffer to InvalidBuffer + * ---------------- + */ + if (!PointerIsValid(b)) + b = &localb; + (*b) = InvalidBuffer; + + HEAPDEBUG_1; /* heap_getnext( info ) */ + + if (backw) + { + /* ---------------- + * handle reverse scan + * ---------------- + */ + HEAPDEBUG_2; /* heap_getnext called with backw */ + + if (sdesc->rs_ptup == sdesc->rs_ctup && + BufferIsInvalid(sdesc->rs_pbuf)) + { + if (BufferIsValid(sdesc->rs_nbuf)) + ReleaseBuffer(sdesc->rs_nbuf); + return (NULL); + } + + /* + * Copy the "current" tuple/buffer to "next". Pin/unpin the + * buffers accordingly + */ + if (sdesc->rs_nbuf != sdesc->rs_cbuf) + { + if (BufferIsValid(sdesc->rs_nbuf)) + ReleaseBuffer(sdesc->rs_nbuf); + if (BufferIsValid(sdesc->rs_cbuf)) + IncrBufferRefCount(sdesc->rs_cbuf); + } + sdesc->rs_ntup = sdesc->rs_ctup; + sdesc->rs_nbuf = sdesc->rs_cbuf; + + if (sdesc->rs_ptup != NULL) + { + if (sdesc->rs_cbuf != sdesc->rs_pbuf) + { + if (BufferIsValid(sdesc->rs_cbuf)) + ReleaseBuffer(sdesc->rs_cbuf); + if (BufferIsValid(sdesc->rs_pbuf)) + IncrBufferRefCount(sdesc->rs_pbuf); + } + sdesc->rs_ctup = sdesc->rs_ptup; + sdesc->rs_cbuf = sdesc->rs_pbuf; + } + else + { /* NONTUP */ + ItemPointer iptr; + + iptr = (sdesc->rs_ctup != NULL) ? + &(sdesc->rs_ctup->t_ctid) : (ItemPointer) NULL; + + /* + * Don't release sdesc->rs_cbuf at this point, because + * heapgettup doesn't increase PrivateRefCount if it is + * already set. On a backward scan, both rs_ctup and rs_ntup + * usually point to the same buffer page, so + * PrivateRefCount[rs_cbuf] should be 2 (or more, if for + * instance ctup is stored in a TupleTableSlot). - 01/09/94 + */ + + sdesc->rs_ctup = (HeapTuple) + heapgettup(sdesc->rs_rd, + iptr, + -1, + &(sdesc->rs_cbuf), + sdesc->rs_tr, + sdesc->rs_nkeys, + sdesc->rs_key); + } + + if (sdesc->rs_ctup == NULL && !BufferIsValid(sdesc->rs_cbuf)) + { + if (BufferIsValid(sdesc->rs_pbuf)) + ReleaseBuffer(sdesc->rs_pbuf); + sdesc->rs_ptup = NULL; + sdesc->rs_pbuf = InvalidBuffer; + if (BufferIsValid(sdesc->rs_nbuf)) + ReleaseBuffer(sdesc->rs_nbuf); + sdesc->rs_ntup = NULL; + sdesc->rs_nbuf = InvalidBuffer; + return (NULL); + } + if (BufferIsValid(sdesc->rs_pbuf)) - ReleaseBuffer(sdesc->rs_pbuf); + ReleaseBuffer(sdesc->rs_pbuf); sdesc->rs_ptup = NULL; - sdesc->rs_pbuf = InvalidBuffer; + sdesc->rs_pbuf = UnknownBuffer; + + } + else + { + /* ---------------- + * handle forward scan + * ---------------- + */ + if (sdesc->rs_ctup == sdesc->rs_ntup && + BufferIsInvalid(sdesc->rs_nbuf)) + { + if (BufferIsValid(sdesc->rs_pbuf)) + ReleaseBuffer(sdesc->rs_pbuf); + HEAPDEBUG_3; /* heap_getnext returns NULL at end */ + return (NULL); + } + + /* + * Copy the "current" tuple/buffer to "previous". Pin/unpin the + * buffers accordingly + */ + if (sdesc->rs_pbuf != sdesc->rs_cbuf) + { + if (BufferIsValid(sdesc->rs_pbuf)) + ReleaseBuffer(sdesc->rs_pbuf); + if (BufferIsValid(sdesc->rs_cbuf)) + IncrBufferRefCount(sdesc->rs_cbuf); + } + sdesc->rs_ptup = sdesc->rs_ctup; + sdesc->rs_pbuf = sdesc->rs_cbuf; + + if (sdesc->rs_ntup != NULL) + { + if (sdesc->rs_cbuf != sdesc->rs_nbuf) + { + if (BufferIsValid(sdesc->rs_cbuf)) + ReleaseBuffer(sdesc->rs_cbuf); + if (BufferIsValid(sdesc->rs_nbuf)) + IncrBufferRefCount(sdesc->rs_nbuf); + } + sdesc->rs_ctup = sdesc->rs_ntup; + sdesc->rs_cbuf = sdesc->rs_nbuf; + HEAPDEBUG_5; /* heap_getnext next tuple was cached */ + } + else + { /* NONTUP */ + ItemPointer iptr; + + iptr = (sdesc->rs_ctup != NULL) ? + &sdesc->rs_ctup->t_ctid : (ItemPointer) NULL; + + /* + * Don't release sdesc->rs_cbuf at this point, because + * heapgettup doesn't increase PrivateRefCount if it is + * already set. On a forward scan, both rs_ctup and rs_ptup + * usually point to the same buffer page, so + * PrivateRefCount[rs_cbuf] should be 2 (or more, if for + * instance ctup is stored in a TupleTableSlot). - 01/09/93 + */ + + sdesc->rs_ctup = (HeapTuple) + heapgettup(sdesc->rs_rd, + iptr, + 1, + &sdesc->rs_cbuf, + sdesc->rs_tr, + sdesc->rs_nkeys, + sdesc->rs_key); + } + + if (sdesc->rs_ctup == NULL && !BufferIsValid(sdesc->rs_cbuf)) + { + if (BufferIsValid(sdesc->rs_nbuf)) + ReleaseBuffer(sdesc->rs_nbuf); + sdesc->rs_ntup = NULL; + sdesc->rs_nbuf = InvalidBuffer; + if (BufferIsValid(sdesc->rs_pbuf)) + ReleaseBuffer(sdesc->rs_pbuf); + sdesc->rs_ptup = NULL; + sdesc->rs_pbuf = InvalidBuffer; + HEAPDEBUG_6; /* heap_getnext returning EOS */ + return (NULL); + } + if (BufferIsValid(sdesc->rs_nbuf)) - ReleaseBuffer(sdesc->rs_nbuf); + ReleaseBuffer(sdesc->rs_nbuf); sdesc->rs_ntup = NULL; - sdesc->rs_nbuf = InvalidBuffer; - return (NULL); - } - - if (BufferIsValid(sdesc->rs_pbuf)) - ReleaseBuffer(sdesc->rs_pbuf); - sdesc->rs_ptup = NULL; - sdesc->rs_pbuf = UnknownBuffer; - - } else { + sdesc->rs_nbuf = UnknownBuffer; + } + /* ---------------- - * handle forward scan + * if we get here it means we have a new current scan tuple, so + * point to the proper return buffer and return the tuple. * ---------------- */ - if (sdesc->rs_ctup == sdesc->rs_ntup && - BufferIsInvalid(sdesc->rs_nbuf)) { - if (BufferIsValid(sdesc->rs_pbuf)) - ReleaseBuffer(sdesc->rs_pbuf); - HEAPDEBUG_3; /* heap_getnext returns NULL at end */ - return (NULL); - } - - /* - * Copy the "current" tuple/buffer - * to "previous". Pin/unpin the buffers - * accordingly - */ - if (sdesc->rs_pbuf != sdesc->rs_cbuf) { - if (BufferIsValid(sdesc->rs_pbuf)) - ReleaseBuffer(sdesc->rs_pbuf); - if (BufferIsValid(sdesc->rs_cbuf)) - IncrBufferRefCount(sdesc->rs_cbuf); - } - sdesc->rs_ptup = sdesc->rs_ctup; - sdesc->rs_pbuf = sdesc->rs_cbuf; - - if (sdesc->rs_ntup != NULL) { - if (sdesc->rs_cbuf != sdesc->rs_nbuf) { - if (BufferIsValid(sdesc->rs_cbuf)) - ReleaseBuffer(sdesc->rs_cbuf); - if (BufferIsValid(sdesc->rs_nbuf)) - IncrBufferRefCount(sdesc->rs_nbuf); - } - sdesc->rs_ctup = sdesc->rs_ntup; - sdesc->rs_cbuf = sdesc->rs_nbuf; - HEAPDEBUG_5; /* heap_getnext next tuple was cached */ - } else { /* NONTUP */ - ItemPointer iptr; - - iptr = (sdesc->rs_ctup != NULL) ? - &sdesc->rs_ctup->t_ctid : (ItemPointer) NULL; - - /* Don't release sdesc->rs_cbuf at this point, because - heapgettup doesn't increase PrivateRefCount if it - is already set. On a forward scan, both rs_ctup and rs_ptup - usually point to the same buffer page, so - PrivateRefCount[rs_cbuf] should be 2 (or more, if for instance - ctup is stored in a TupleTableSlot). - 01/09/93 */ - - sdesc->rs_ctup = (HeapTuple) - heapgettup(sdesc->rs_rd, - iptr, - 1, - &sdesc->rs_cbuf, - sdesc->rs_tr, - sdesc->rs_nkeys, - sdesc->rs_key); - } - - if (sdesc->rs_ctup == NULL && !BufferIsValid(sdesc->rs_cbuf)) { - if (BufferIsValid(sdesc->rs_nbuf)) - ReleaseBuffer(sdesc->rs_nbuf); - sdesc->rs_ntup = NULL; - sdesc->rs_nbuf = InvalidBuffer; - if (BufferIsValid(sdesc->rs_pbuf)) - ReleaseBuffer(sdesc->rs_pbuf); - sdesc->rs_ptup = NULL; - sdesc->rs_pbuf = InvalidBuffer; - HEAPDEBUG_6; /* heap_getnext returning EOS */ - return (NULL); - } - - if (BufferIsValid(sdesc->rs_nbuf)) - ReleaseBuffer(sdesc->rs_nbuf); - sdesc->rs_ntup = NULL; - sdesc->rs_nbuf = UnknownBuffer; - } - - /* ---------------- - * if we get here it means we have a new current scan tuple, so - * point to the proper return buffer and return the tuple. - * ---------------- - */ - (*b) = sdesc->rs_cbuf; - - HEAPDEBUG_7; /* heap_getnext returning tuple */ - - return (sdesc->rs_ctup); + (*b) = sdesc->rs_cbuf; + + HEAPDEBUG_7; /* heap_getnext returning tuple */ + + return (sdesc->rs_ctup); } /* ---------------- - * heap_fetch - retrive tuple with tid + * heap_fetch - retrive tuple with tid * - * Currently ignores LP_IVALID during processing! + * Currently ignores LP_IVALID during processing! * ---------------- */ HeapTuple heap_fetch(Relation relation, - TimeQual timeQual, - ItemPointer tid, - Buffer *b) + TimeQual timeQual, + ItemPointer tid, + Buffer * b) { - ItemId lp; - Buffer buffer; - PageHeader dp; - HeapTuple tuple; - OffsetNumber offnum; - - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_fetch); - IncrHeapAccessStat(global_fetch); - - /* - * Note: This is collosally expensive - does two system calls per - * indexscan tuple fetch. Not good, and since we should be doing - * page level locking by the scanner anyway, it is commented out. - */ - - /* RelationSetLockForTupleRead(relation, tid); */ - - /* ---------------- - * get the buffer from the relation descriptor - * Note that this does a buffer pin. - * ---------------- - */ - - buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); - + ItemId lp; + Buffer buffer; + PageHeader dp; + HeapTuple tuple; + OffsetNumber offnum; + + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_fetch); + IncrHeapAccessStat(global_fetch); + + /* + * Note: This is collosally expensive - does two system calls per + * indexscan tuple fetch. Not good, and since we should be doing page + * level locking by the scanner anyway, it is commented out. + */ + + /* RelationSetLockForTupleRead(relation, tid); */ + + /* ---------------- + * get the buffer from the relation descriptor + * Note that this does a buffer pin. + * ---------------- + */ + + buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); + #ifndef NO_BUFFERISVALID - if (!BufferIsValid(buffer)) { - elog(WARN, "heap_fetch: %s relation: ReadBuffer(%lx) failed", - &relation->rd_rel->relname, (long)tid); - } + if (!BufferIsValid(buffer)) + { + elog(WARN, "heap_fetch: %s relation: ReadBuffer(%lx) failed", + &relation->rd_rel->relname, (long) tid); + } #endif - - /* ---------------- - * get the item line pointer corresponding to the requested tid - * ---------------- - */ - dp = (PageHeader) BufferGetPage(buffer); - offnum = ItemPointerGetOffsetNumber(tid); - lp = PageGetItemId(dp, offnum); - - /* ---------------- - * more sanity checks - * ---------------- - */ - - Assert(ItemIdIsUsed(lp)); - - /* ---------------- - * check time qualification of tid - * ---------------- - */ - - tuple = heap_tuple_satisfies(lp, relation, buffer, dp, - timeQual, 0,(ScanKey)NULL); - - if (tuple == NULL) + + /* ---------------- + * get the item line pointer corresponding to the requested tid + * ---------------- + */ + dp = (PageHeader) BufferGetPage(buffer); + offnum = ItemPointerGetOffsetNumber(tid); + lp = PageGetItemId(dp, offnum); + + /* ---------------- + * more sanity checks + * ---------------- + */ + + Assert(ItemIdIsUsed(lp)); + + /* ---------------- + * check time qualification of tid + * ---------------- + */ + + tuple = heap_tuple_satisfies(lp, relation, buffer, dp, + timeQual, 0, (ScanKey) NULL); + + if (tuple == NULL) { - ReleaseBuffer(buffer); - return (NULL); + ReleaseBuffer(buffer); + return (NULL); } - - /* ---------------- - * all checks passed, now either return a copy of the tuple - * or pin the buffer page and return a pointer, depending on - * whether caller gave us a valid b. - * ---------------- - */ - - if (PointerIsValid(b)) { - *b = buffer; - } else { - tuple = heap_copytuple(tuple); - ReleaseBuffer(buffer); - } - return (tuple); + + /* ---------------- + * all checks passed, now either return a copy of the tuple + * or pin the buffer page and return a pointer, depending on + * whether caller gave us a valid b. + * ---------------- + */ + + if (PointerIsValid(b)) + { + *b = buffer; + } + else + { + tuple = heap_copytuple(tuple); + ReleaseBuffer(buffer); + } + return (tuple); } /* ---------------- - * heap_insert - insert tuple + * heap_insert - insert tuple * - * The assignment of t_min (and thus the others) should be - * removed eventually. + * The assignment of t_min (and thus the others) should be + * removed eventually. * - * Currently places the tuple onto the last page. If there is no room, - * it is placed on new pages. (Heap relations) - * Note that concurrent inserts during a scan will probably have - * unexpected results, though this will be fixed eventually. + * Currently places the tuple onto the last page. If there is no room, + * it is placed on new pages. (Heap relations) + * Note that concurrent inserts during a scan will probably have + * unexpected results, though this will be fixed eventually. * - * Fix to work with indexes. + * Fix to work with indexes. * ---------------- */ Oid heap_insert(Relation relation, HeapTuple tup) { - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_insert); - IncrHeapAccessStat(global_insert); - - /* ---------------- - * set relation level write lock. If this is a "local" relation (not - * visible to others), we don't need to set a write lock. - * ---------------- - */ - if (!relation->rd_islocal) - RelationSetLockForWrite(relation); + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_insert); + IncrHeapAccessStat(global_insert); - /* ---------------- - * If the object id of this tuple has already been assigned, trust - * the caller. There are a couple of ways this can happen. At initial - * db creation, the backend program sets oids for tuples. When we - * define an index, we set the oid. Finally, in the future, we may - * allow users to set their own object ids in order to support a - * persistent object store (objects need to contain pointers to one - * another). - * ---------------- - */ - if (!OidIsValid(tup->t_oid)) { - tup->t_oid = newoid(); - LastOidProcessed = tup->t_oid; - } - else - CheckMaxObjectId(tup->t_oid); - - TransactionIdStore(GetCurrentTransactionId(), &(tup->t_xmin)); - tup->t_cmin = GetCurrentCommandId(); - StoreInvalidTransactionId(&(tup->t_xmax)); - tup->t_tmin = INVALID_ABSTIME; - tup->t_tmax = CURRENT_ABSTIME; - - doinsert(relation, tup); - - if ( IsSystemRelationName(RelationGetRelationName(relation)->data)) { - RelationUnsetLockForWrite(relation); - /* ---------------- - * invalidate caches (only works for system relations) + * set relation level write lock. If this is a "local" relation (not + * visible to others), we don't need to set a write lock. * ---------------- */ - SetRefreshWhenInvalidate(ImmediateInvalidation); - RelationInvalidateHeapTuple(relation, tup); - SetRefreshWhenInvalidate((bool)!ImmediateInvalidation); - } - - return(tup->t_oid); + if (!relation->rd_islocal) + RelationSetLockForWrite(relation); + + /* ---------------- + * If the object id of this tuple has already been assigned, trust + * the caller. There are a couple of ways this can happen. At initial + * db creation, the backend program sets oids for tuples. When we + * define an index, we set the oid. Finally, in the future, we may + * allow users to set their own object ids in order to support a + * persistent object store (objects need to contain pointers to one + * another). + * ---------------- + */ + if (!OidIsValid(tup->t_oid)) + { + tup->t_oid = newoid(); + LastOidProcessed = tup->t_oid; + } + else + CheckMaxObjectId(tup->t_oid); + + TransactionIdStore(GetCurrentTransactionId(), &(tup->t_xmin)); + tup->t_cmin = GetCurrentCommandId(); + StoreInvalidTransactionId(&(tup->t_xmax)); + tup->t_tmin = INVALID_ABSTIME; + tup->t_tmax = CURRENT_ABSTIME; + + doinsert(relation, tup); + + if (IsSystemRelationName(RelationGetRelationName(relation)->data)) + { + RelationUnsetLockForWrite(relation); + + /* ---------------- + * invalidate caches (only works for system relations) + * ---------------- + */ + SetRefreshWhenInvalidate(ImmediateInvalidation); + RelationInvalidateHeapTuple(relation, tup); + SetRefreshWhenInvalidate((bool) ! ImmediateInvalidation); + } + + return (tup->t_oid); } /* ---------------- - * heap_delete - delete a tuple + * heap_delete - delete a tuple * - * Must decide how to handle errors. + * Must decide how to handle errors. * ---------------- */ int heap_delete(Relation relation, ItemPointer tid) { - ItemId lp; - HeapTuple tp; - PageHeader dp; - Buffer b; - - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_delete); - IncrHeapAccessStat(global_delete); - - /* ---------------- - * sanity check - * ---------------- - */ - Assert(ItemPointerIsValid(tid)); - - /* ---------------- - * set relation level write lock - * ---------------- - */ - RelationSetLockForWrite(relation); - - b = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); - + ItemId lp; + HeapTuple tp; + PageHeader dp; + Buffer b; + + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_delete); + IncrHeapAccessStat(global_delete); + + /* ---------------- + * sanity check + * ---------------- + */ + Assert(ItemPointerIsValid(tid)); + + /* ---------------- + * set relation level write lock + * ---------------- + */ + RelationSetLockForWrite(relation); + + b = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); + #ifndef NO_BUFFERISVALID - if (!BufferIsValid(b)) { /* XXX L_SH better ??? */ - elog(WARN, "heap_delete: failed ReadBuffer"); - } -#endif /* NO_BUFFERISVALID */ - - dp = (PageHeader) BufferGetPage(b); - lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid)); - - /* - * Just like test against non-functional updates we try to catch - * non-functional delete attempts. - vadim 05/05/97 - */ - tp = (HeapTuple) PageGetItem((Page)dp, lp); - Assert(HeapTupleIsValid(tp)); - if (TupleUpdatedByCurXactAndCmd(tp)) { - elog(NOTICE, "Non-functional delete, tuple already deleted"); - if ( IsSystemRelationName(RelationGetRelationName(relation)->data) ) - RelationUnsetLockForWrite(relation); - ReleaseBuffer(b); - return (1); - } - /* ---------------- - * check that we're deleteing a valid item - * ---------------- - */ - if (!(tp = heap_tuple_satisfies(lp, relation, b, dp, - NowTimeQual, 0, (ScanKey) NULL))) { - - /* XXX call something else */ - ReleaseBuffer(b); - - elog(WARN, "heap_delete: (am)invalid tid"); - } - - /* ---------------- - * get the tuple and lock tell the buffer manager we want - * exclusive access to the page - * ---------------- - */ - - /* ---------------- - * store transaction information of xact deleting the tuple - * ---------------- - */ - TransactionIdStore(GetCurrentTransactionId(), &(tp->t_xmax)); - tp->t_cmax = GetCurrentCommandId(); - ItemPointerSetInvalid(&tp->t_chain); - - /* ---------------- - * invalidate caches - * ---------------- - */ - SetRefreshWhenInvalidate(ImmediateInvalidation); - RelationInvalidateHeapTuple(relation, tp); - SetRefreshWhenInvalidate((bool)!ImmediateInvalidation); - - WriteBuffer(b); - if ( IsSystemRelationName(RelationGetRelationName(relation)->data) ) - RelationUnsetLockForWrite(relation); - - return(0); + if (!BufferIsValid(b)) + { /* XXX L_SH better ??? */ + elog(WARN, "heap_delete: failed ReadBuffer"); + } +#endif /* NO_BUFFERISVALID */ + + dp = (PageHeader) BufferGetPage(b); + lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid)); + + /* + * Just like test against non-functional updates we try to catch + * non-functional delete attempts. - vadim 05/05/97 + */ + tp = (HeapTuple) PageGetItem((Page) dp, lp); + Assert(HeapTupleIsValid(tp)); + if (TupleUpdatedByCurXactAndCmd(tp)) + { + elog(NOTICE, "Non-functional delete, tuple already deleted"); + if (IsSystemRelationName(RelationGetRelationName(relation)->data)) + RelationUnsetLockForWrite(relation); + ReleaseBuffer(b); + return (1); + } + /* ---------------- + * check that we're deleteing a valid item + * ---------------- + */ + if (!(tp = heap_tuple_satisfies(lp, relation, b, dp, + NowTimeQual, 0, (ScanKey) NULL))) + { + + /* XXX call something else */ + ReleaseBuffer(b); + + elog(WARN, "heap_delete: (am)invalid tid"); + } + + /* ---------------- + * get the tuple and lock tell the buffer manager we want + * exclusive access to the page + * ---------------- + */ + + /* ---------------- + * store transaction information of xact deleting the tuple + * ---------------- + */ + TransactionIdStore(GetCurrentTransactionId(), &(tp->t_xmax)); + tp->t_cmax = GetCurrentCommandId(); + ItemPointerSetInvalid(&tp->t_chain); + + /* ---------------- + * invalidate caches + * ---------------- + */ + SetRefreshWhenInvalidate(ImmediateInvalidation); + RelationInvalidateHeapTuple(relation, tp); + SetRefreshWhenInvalidate((bool) ! ImmediateInvalidation); + + WriteBuffer(b); + if (IsSystemRelationName(RelationGetRelationName(relation)->data)) + RelationUnsetLockForWrite(relation); + + return (0); } /* ---------------- - * heap_replace - replace a tuple + * heap_replace - replace a tuple + * + * Must decide how to handle errors. * - * Must decide how to handle errors. + * Fix arguments, work with indexes. * - * Fix arguments, work with indexes. - * - * 12/30/93 - modified the return value to be 1 when - * a non-functional update is detected. This - * prevents the calling routine from updating - * indices unnecessarily. -kw + * 12/30/93 - modified the return value to be 1 when + * a non-functional update is detected. This + * prevents the calling routine from updating + * indices unnecessarily. -kw * * ---------------- */ int heap_replace(Relation relation, ItemPointer otid, HeapTuple tup) { - ItemId lp; - HeapTuple tp; - Page dp; - Buffer buffer; - - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_replace); - IncrHeapAccessStat(global_replace); - - /* ---------------- - * sanity checks - * ---------------- - */ - Assert(ItemPointerIsValid(otid)); - - /* ---------------- - * set relation level write lock - * ---------------- - */ - if (!relation->rd_islocal) - RelationSetLockForWrite(relation); - - buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(otid)); + ItemId lp; + HeapTuple tp; + Page dp; + Buffer buffer; + + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_replace); + IncrHeapAccessStat(global_replace); + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(ItemPointerIsValid(otid)); + + /* ---------------- + * set relation level write lock + * ---------------- + */ + if (!relation->rd_islocal) + RelationSetLockForWrite(relation); + + buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(otid)); #ifndef NO_BUFFERISVALID - if (!BufferIsValid(buffer)) { - /* XXX L_SH better ??? */ - elog(WARN, "amreplace: failed ReadBuffer"); - } -#endif /* NO_BUFFERISVALID */ - - dp = (Page) BufferGetPage(buffer); - lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(otid)); - - /* ---------------- - * logically delete old item - * ---------------- - */ - - tp = (HeapTuple) PageGetItem(dp, lp); - Assert(HeapTupleIsValid(tp)); - - /* ----------------- - * the following test should be able to catch all non-functional - * update attempts and shut out all ghost tuples. - * XXX In the future, Spyros may need to update the rule lock on a tuple - * more than once within the same command and same transaction. - * He will have to introduce a new flag to override the following check. - * -- Wei - * - * ----------------- - */ - - if (TupleUpdatedByCurXactAndCmd(tp)) { - elog(NOTICE, "Non-functional update, only first update is performed"); - if ( IsSystemRelationName(RelationGetRelationName(relation)->data) ) - RelationUnsetLockForWrite(relation); - ReleaseBuffer(buffer); - return(1); - } - - /* ---------------- - * check that we're replacing a valid item - - * - * NOTE that this check must follow the non-functional update test - * above as it can happen that we try to 'replace' the same tuple - * twice in a single transaction. The second time around the - * tuple will fail the NowTimeQual. We don't want to abort the - * xact, we only want to flag the 'non-functional' NOTICE. -mer - * ---------------- - */ - if (!heap_tuple_satisfies(lp, - relation, - buffer, - (PageHeader)dp, - NowTimeQual, - 0, - (ScanKey)NULL)) - { - ReleaseBuffer(buffer); - elog(WARN, "heap_replace: (am)invalid otid"); - } - - /* XXX order problems if not atomic assignment ??? */ - tup->t_oid = tp->t_oid; - TransactionIdStore(GetCurrentTransactionId(), &(tup->t_xmin)); - tup->t_cmin = GetCurrentCommandId(); - StoreInvalidTransactionId(&(tup->t_xmax)); - tup->t_tmin = INVALID_ABSTIME; - tup->t_tmax = CURRENT_ABSTIME; - ItemPointerSetInvalid(&tup->t_chain); - - /* ---------------- - * insert new item - * ---------------- - */ - if ((unsigned)DOUBLEALIGN(tup->t_len) <= PageGetFreeSpace((Page) dp)) { - RelationPutHeapTuple(relation, BufferGetBlockNumber(buffer), tup); - } else { + if (!BufferIsValid(buffer)) + { + /* XXX L_SH better ??? */ + elog(WARN, "amreplace: failed ReadBuffer"); + } +#endif /* NO_BUFFERISVALID */ + + dp = (Page) BufferGetPage(buffer); + lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(otid)); + /* ---------------- - * new item won't fit on same page as old item, have to look - * for a new place to put it. + * logically delete old item * ---------------- */ - doinsert(relation, tup); - } - - /* ---------------- - * new item in place, now record transaction information - * ---------------- - */ - TransactionIdStore(GetCurrentTransactionId(), &(tp->t_xmax)); - tp->t_cmax = GetCurrentCommandId(); - tp->t_chain = tup->t_ctid; - - /* ---------------- - * invalidate caches - * ---------------- - */ - SetRefreshWhenInvalidate(ImmediateInvalidation); - RelationInvalidateHeapTuple(relation, tp); - SetRefreshWhenInvalidate((bool)!ImmediateInvalidation); - - WriteBuffer(buffer); - - if ( IsSystemRelationName(RelationGetRelationName(relation)->data) ) - RelationUnsetLockForWrite(relation); - - return(0); + + tp = (HeapTuple) PageGetItem(dp, lp); + Assert(HeapTupleIsValid(tp)); + + /* ----------------- + * the following test should be able to catch all non-functional + * update attempts and shut out all ghost tuples. + * XXX In the future, Spyros may need to update the rule lock on a tuple + * more than once within the same command and same transaction. + * He will have to introduce a new flag to override the following check. + * -- Wei + * + * ----------------- + */ + + if (TupleUpdatedByCurXactAndCmd(tp)) + { + elog(NOTICE, "Non-functional update, only first update is performed"); + if (IsSystemRelationName(RelationGetRelationName(relation)->data)) + RelationUnsetLockForWrite(relation); + ReleaseBuffer(buffer); + return (1); + } + + /* ---------------- + * check that we're replacing a valid item - + * + * NOTE that this check must follow the non-functional update test + * above as it can happen that we try to 'replace' the same tuple + * twice in a single transaction. The second time around the + * tuple will fail the NowTimeQual. We don't want to abort the + * xact, we only want to flag the 'non-functional' NOTICE. -mer + * ---------------- + */ + if (!heap_tuple_satisfies(lp, + relation, + buffer, + (PageHeader) dp, + NowTimeQual, + 0, + (ScanKey) NULL)) + { + ReleaseBuffer(buffer); + elog(WARN, "heap_replace: (am)invalid otid"); + } + + /* XXX order problems if not atomic assignment ??? */ + tup->t_oid = tp->t_oid; + TransactionIdStore(GetCurrentTransactionId(), &(tup->t_xmin)); + tup->t_cmin = GetCurrentCommandId(); + StoreInvalidTransactionId(&(tup->t_xmax)); + tup->t_tmin = INVALID_ABSTIME; + tup->t_tmax = CURRENT_ABSTIME; + ItemPointerSetInvalid(&tup->t_chain); + + /* ---------------- + * insert new item + * ---------------- + */ + if ((unsigned) DOUBLEALIGN(tup->t_len) <= PageGetFreeSpace((Page) dp)) + { + RelationPutHeapTuple(relation, BufferGetBlockNumber(buffer), tup); + } + else + { + /* ---------------- + * new item won't fit on same page as old item, have to look + * for a new place to put it. + * ---------------- + */ + doinsert(relation, tup); + } + + /* ---------------- + * new item in place, now record transaction information + * ---------------- + */ + TransactionIdStore(GetCurrentTransactionId(), &(tp->t_xmax)); + tp->t_cmax = GetCurrentCommandId(); + tp->t_chain = tup->t_ctid; + + /* ---------------- + * invalidate caches + * ---------------- + */ + SetRefreshWhenInvalidate(ImmediateInvalidation); + RelationInvalidateHeapTuple(relation, tp); + SetRefreshWhenInvalidate((bool) ! ImmediateInvalidation); + + WriteBuffer(buffer); + + if (IsSystemRelationName(RelationGetRelationName(relation)->data)) + RelationUnsetLockForWrite(relation); + + return (0); } /* ---------------- - * heap_markpos - mark scan position + * heap_markpos - mark scan position * - * Note: - * Should only one mark be maintained per scan at one time. - * Check if this can be done generally--say calls to get the - * next/previous tuple and NEVER pass struct scandesc to the - * user AM's. Now, the mark is sent to the executor for safekeeping. - * Probably can store this info into a GENERAL scan structure. + * Note: + * Should only one mark be maintained per scan at one time. + * Check if this can be done generally--say calls to get the + * next/previous tuple and NEVER pass struct scandesc to the + * user AM's. Now, the mark is sent to the executor for safekeeping. + * Probably can store this info into a GENERAL scan structure. * - * May be best to change this call to store the marked position - * (up to 2?) in the scan structure itself. - * Fix to use the proper caching structure. + * May be best to change this call to store the marked position + * (up to 2?) in the scan structure itself. + * Fix to use the proper caching structure. * ---------------- */ void heap_markpos(HeapScanDesc sdesc) { - - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_markpos); - IncrHeapAccessStat(global_markpos); - - /* Note: no locking manipulations needed */ - - if (sdesc->rs_ptup == NULL && - BufferIsUnknown(sdesc->rs_pbuf)) { /* == NONTUP */ - sdesc->rs_ptup = (HeapTuple) - heapgettup(sdesc->rs_rd, - (sdesc->rs_ctup == NULL) ? - (ItemPointer)NULL : &sdesc->rs_ctup->t_ctid, - -1, - &sdesc->rs_pbuf, - sdesc->rs_tr, - sdesc->rs_nkeys, - sdesc->rs_key); - - } else if (sdesc->rs_ntup == NULL && - BufferIsUnknown(sdesc->rs_nbuf)) { /* == NONTUP */ - sdesc->rs_ntup = (HeapTuple) - heapgettup(sdesc->rs_rd, - (sdesc->rs_ctup == NULL) ? - (ItemPointer)NULL : &sdesc->rs_ctup->t_ctid, - 1, - &sdesc->rs_nbuf, - sdesc->rs_tr, - sdesc->rs_nkeys, - sdesc->rs_key); - } - - /* ---------------- - * Should not unpin the buffer pages. They may still be in use. - * ---------------- - */ - if (sdesc->rs_ptup != NULL) { - sdesc->rs_mptid = sdesc->rs_ptup->t_ctid; - } else { - ItemPointerSetInvalid(&sdesc->rs_mptid); - } - if (sdesc->rs_ctup != NULL) { - sdesc->rs_mctid = sdesc->rs_ctup->t_ctid; - } else { - ItemPointerSetInvalid(&sdesc->rs_mctid); - } - if (sdesc->rs_ntup != NULL) { - sdesc->rs_mntid = sdesc->rs_ntup->t_ctid; - } else { - ItemPointerSetInvalid(&sdesc->rs_mntid); - } + + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_markpos); + IncrHeapAccessStat(global_markpos); + + /* Note: no locking manipulations needed */ + + if (sdesc->rs_ptup == NULL && + BufferIsUnknown(sdesc->rs_pbuf)) + { /* == NONTUP */ + sdesc->rs_ptup = (HeapTuple) + heapgettup(sdesc->rs_rd, + (sdesc->rs_ctup == NULL) ? + (ItemPointer) NULL : &sdesc->rs_ctup->t_ctid, + -1, + &sdesc->rs_pbuf, + sdesc->rs_tr, + sdesc->rs_nkeys, + sdesc->rs_key); + + } + else if (sdesc->rs_ntup == NULL && + BufferIsUnknown(sdesc->rs_nbuf)) + { /* == NONTUP */ + sdesc->rs_ntup = (HeapTuple) + heapgettup(sdesc->rs_rd, + (sdesc->rs_ctup == NULL) ? + (ItemPointer) NULL : &sdesc->rs_ctup->t_ctid, + 1, + &sdesc->rs_nbuf, + sdesc->rs_tr, + sdesc->rs_nkeys, + sdesc->rs_key); + } + + /* ---------------- + * Should not unpin the buffer pages. They may still be in use. + * ---------------- + */ + if (sdesc->rs_ptup != NULL) + { + sdesc->rs_mptid = sdesc->rs_ptup->t_ctid; + } + else + { + ItemPointerSetInvalid(&sdesc->rs_mptid); + } + if (sdesc->rs_ctup != NULL) + { + sdesc->rs_mctid = sdesc->rs_ctup->t_ctid; + } + else + { + ItemPointerSetInvalid(&sdesc->rs_mctid); + } + if (sdesc->rs_ntup != NULL) + { + sdesc->rs_mntid = sdesc->rs_ntup->t_ctid; + } + else + { + ItemPointerSetInvalid(&sdesc->rs_mntid); + } } /* ---------------- - * heap_restrpos - restore position to marked location + * heap_restrpos - restore position to marked location * - * Note: there are bad side effects here. If we were past the end - * of a relation when heapmarkpos is called, then if the relation is - * extended via insert, then the next call to heaprestrpos will set - * cause the added tuples to be visible when the scan continues. - * Problems also arise if the TID's are rearranged!!! + * Note: there are bad side effects here. If we were past the end + * of a relation when heapmarkpos is called, then if the relation is + * extended via insert, then the next call to heaprestrpos will set + * cause the added tuples to be visible when the scan continues. + * Problems also arise if the TID's are rearranged!!! * - * Now pins buffer once for each valid tuple pointer (rs_ptup, - * rs_ctup, rs_ntup) referencing it. - * - 01/13/94 + * Now pins buffer once for each valid tuple pointer (rs_ptup, + * rs_ctup, rs_ntup) referencing it. + * - 01/13/94 * * XXX might be better to do direct access instead of - * using the generality of heapgettup(). + * using the generality of heapgettup(). * * XXX It is very possible that when a scan is restored, that a tuple * XXX which previously qualified may fail for time range purposes, unless @@ -1455,60 +1561,69 @@ heap_markpos(HeapScanDesc sdesc) void heap_restrpos(HeapScanDesc sdesc) { - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_restrpos); - IncrHeapAccessStat(global_restrpos); - - /* XXX no amrestrpos checking that ammarkpos called */ - - /* Note: no locking manipulations needed */ - - unpinsdesc(sdesc); - - /* force heapgettup to pin buffer for each loaded tuple */ - sdesc->rs_pbuf = InvalidBuffer; - sdesc->rs_cbuf = InvalidBuffer; - sdesc->rs_nbuf = InvalidBuffer; - - if (!ItemPointerIsValid(&sdesc->rs_mptid)) { - sdesc->rs_ptup = NULL; - } else { - sdesc->rs_ptup = (HeapTuple) - heapgettup(sdesc->rs_rd, - &sdesc->rs_mptid, - 0, - &sdesc->rs_pbuf, - NowTimeQual, - 0, - (ScanKey) NULL); - } - - if (!ItemPointerIsValid(&sdesc->rs_mctid)) { - sdesc->rs_ctup = NULL; - } else { - sdesc->rs_ctup = (HeapTuple) - heapgettup(sdesc->rs_rd, - &sdesc->rs_mctid, - 0, - &sdesc->rs_cbuf, - NowTimeQual, - 0, - (ScanKey) NULL); - } - - if (!ItemPointerIsValid(&sdesc->rs_mntid)) { - sdesc->rs_ntup = NULL; - } else { - sdesc->rs_ntup = (HeapTuple) - heapgettup(sdesc->rs_rd, - &sdesc->rs_mntid, - 0, - &sdesc->rs_nbuf, - NowTimeQual, - 0, - (ScanKey) NULL); - } + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_restrpos); + IncrHeapAccessStat(global_restrpos); + + /* XXX no amrestrpos checking that ammarkpos called */ + + /* Note: no locking manipulations needed */ + + unpinsdesc(sdesc); + + /* force heapgettup to pin buffer for each loaded tuple */ + sdesc->rs_pbuf = InvalidBuffer; + sdesc->rs_cbuf = InvalidBuffer; + sdesc->rs_nbuf = InvalidBuffer; + + if (!ItemPointerIsValid(&sdesc->rs_mptid)) + { + sdesc->rs_ptup = NULL; + } + else + { + sdesc->rs_ptup = (HeapTuple) + heapgettup(sdesc->rs_rd, + &sdesc->rs_mptid, + 0, + &sdesc->rs_pbuf, + NowTimeQual, + 0, + (ScanKey) NULL); + } + + if (!ItemPointerIsValid(&sdesc->rs_mctid)) + { + sdesc->rs_ctup = NULL; + } + else + { + sdesc->rs_ctup = (HeapTuple) + heapgettup(sdesc->rs_rd, + &sdesc->rs_mctid, + 0, + &sdesc->rs_cbuf, + NowTimeQual, + 0, + (ScanKey) NULL); + } + + if (!ItemPointerIsValid(&sdesc->rs_mntid)) + { + sdesc->rs_ntup = NULL; + } + else + { + sdesc->rs_ntup = (HeapTuple) + heapgettup(sdesc->rs_rd, + &sdesc->rs_mntid, + 0, + &sdesc->rs_nbuf, + NowTimeQual, + 0, + (ScanKey) NULL); + } } diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index f172a40470..0854b69bf0 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * hio.c-- - * POSTGRES heap access method input/output code. + * POSTGRES heap access method input/output code. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Id: hio.c,v 1.9 1996/11/05 09:53:02 scrappy Exp $ + * $Id: hio.c,v 1.10 1997/09/07 04:38:11 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -21,64 +21,65 @@ /* * amputunique - place tuple at tid - * Currently on errors, calls elog. Perhaps should return -1? - * Possible errors include the addition of a tuple to the page - * between the time the linep is chosen and the page is L_UP'd. + * Currently on errors, calls elog. Perhaps should return -1? + * Possible errors include the addition of a tuple to the page + * between the time the linep is chosen and the page is L_UP'd. * - * This should be coordinated with the B-tree code. - * Probably needs to have an amdelunique to allow for - * internal index records to be deleted and reordered as needed. - * For the heap AM, this should never be needed. + * This should be coordinated with the B-tree code. + * Probably needs to have an amdelunique to allow for + * internal index records to be deleted and reordered as needed. + * For the heap AM, this should never be needed. */ void RelationPutHeapTuple(Relation relation, - BlockNumber blockIndex, - HeapTuple tuple) + BlockNumber blockIndex, + HeapTuple tuple) { - Buffer buffer; - Page pageHeader; - BlockNumber numberOfBlocks; - OffsetNumber offnum; - unsigned int len; - ItemId itemId; - Item item; - - /* ---------------- - * increment access statistics - * ---------------- - */ - IncrHeapAccessStat(local_RelationPutHeapTuple); - IncrHeapAccessStat(global_RelationPutHeapTuple); - - Assert(RelationIsValid(relation)); - Assert(HeapTupleIsValid(tuple)); - - numberOfBlocks = RelationGetNumberOfBlocks(relation); - Assert(blockIndex < numberOfBlocks); - - buffer = ReadBuffer(relation, blockIndex); + Buffer buffer; + Page pageHeader; + BlockNumber numberOfBlocks; + OffsetNumber offnum; + unsigned int len; + ItemId itemId; + Item item; + + /* ---------------- + * increment access statistics + * ---------------- + */ + IncrHeapAccessStat(local_RelationPutHeapTuple); + IncrHeapAccessStat(global_RelationPutHeapTuple); + + Assert(RelationIsValid(relation)); + Assert(HeapTupleIsValid(tuple)); + + numberOfBlocks = RelationGetNumberOfBlocks(relation); + Assert(blockIndex < numberOfBlocks); + + buffer = ReadBuffer(relation, blockIndex); #ifndef NO_BUFFERISVALID - if (!BufferIsValid(buffer)) { - elog(WARN, "RelationPutHeapTuple: no buffer for %ld in %s", - blockIndex, &relation->rd_rel->relname); - } + if (!BufferIsValid(buffer)) + { + elog(WARN, "RelationPutHeapTuple: no buffer for %ld in %s", + blockIndex, &relation->rd_rel->relname); + } #endif - - pageHeader = (Page)BufferGetPage(buffer); - len = (unsigned)DOUBLEALIGN(tuple->t_len); /* be conservative */ - Assert((int)len <= PageGetFreeSpace(pageHeader)); - - offnum = PageAddItem((Page)pageHeader, (Item)tuple, - tuple->t_len, InvalidOffsetNumber, LP_USED); - - itemId = PageGetItemId((Page)pageHeader, offnum); - item = PageGetItem((Page)pageHeader, itemId); - - ItemPointerSet(&((HeapTuple)item)->t_ctid, blockIndex, offnum); - - WriteBuffer(buffer); - /* return an accurate tuple */ - ItemPointerSet(&tuple->t_ctid, blockIndex, offnum); + + pageHeader = (Page) BufferGetPage(buffer); + len = (unsigned) DOUBLEALIGN(tuple->t_len); /* be conservative */ + Assert((int) len <= PageGetFreeSpace(pageHeader)); + + offnum = PageAddItem((Page) pageHeader, (Item) tuple, + tuple->t_len, InvalidOffsetNumber, LP_USED); + + itemId = PageGetItemId((Page) pageHeader, offnum); + item = PageGetItem((Page) pageHeader, itemId); + + ItemPointerSet(&((HeapTuple) item)->t_ctid, blockIndex, offnum); + + WriteBuffer(buffer); + /* return an accurate tuple */ + ItemPointerSet(&tuple->t_ctid, blockIndex, offnum); } /* @@ -91,7 +92,7 @@ RelationPutHeapTuple(Relation relation, * Eventually, we should cache the number of blocks in a relation somewhere. * Until that time, this code will have to do an lseek to determine the number * of blocks in a relation. - * + * * This code should ideally do at most 4 semops, 1 lseek, and possibly 1 write * to do an append; it's possible to eliminate 2 of the semops if we do direct * buffer stuff (!); the lseek and the write can go if we get @@ -107,70 +108,70 @@ RelationPutHeapTuple(Relation relation, void RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple) { - Buffer buffer; - Page pageHeader; - BlockNumber lastblock; - OffsetNumber offnum; - unsigned int len; - ItemId itemId; - Item item; - - Assert(RelationIsValid(relation)); - Assert(HeapTupleIsValid(tuple)); - - /* - * XXX This does an lseek - VERY expensive - but at the moment it - * is the only way to accurately determine how many blocks are in - * a relation. A good optimization would be to get this to actually - * work properly. - */ - - lastblock = RelationGetNumberOfBlocks(relation); - - if (lastblock == 0) + Buffer buffer; + Page pageHeader; + BlockNumber lastblock; + OffsetNumber offnum; + unsigned int len; + ItemId itemId; + Item item; + + Assert(RelationIsValid(relation)); + Assert(HeapTupleIsValid(tuple)); + + /* + * XXX This does an lseek - VERY expensive - but at the moment it is + * the only way to accurately determine how many blocks are in a + * relation. A good optimization would be to get this to actually + * work properly. + */ + + lastblock = RelationGetNumberOfBlocks(relation); + + if (lastblock == 0) { - buffer = ReadBuffer(relation, lastblock); - pageHeader = (Page)BufferGetPage(buffer); - if (PageIsNew((PageHeader) pageHeader)) + buffer = ReadBuffer(relation, lastblock); + pageHeader = (Page) BufferGetPage(buffer); + if (PageIsNew((PageHeader) pageHeader)) { - buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW); - pageHeader = (Page)BufferGetPage(buffer); - PageInit(pageHeader, BufferGetPageSize(buffer), 0); + buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW); + pageHeader = (Page) BufferGetPage(buffer); + PageInit(pageHeader, BufferGetPageSize(buffer), 0); } } - else - buffer = ReadBuffer(relation, lastblock - 1); - - pageHeader = (Page)BufferGetPage(buffer); - len = (unsigned)DOUBLEALIGN(tuple->t_len); /* be conservative */ - - /* - * Note that this is true if the above returned a bogus page, which - * it will do for a completely empty relation. - */ - - if (len > PageGetFreeSpace(pageHeader)) + else + buffer = ReadBuffer(relation, lastblock - 1); + + pageHeader = (Page) BufferGetPage(buffer); + len = (unsigned) DOUBLEALIGN(tuple->t_len); /* be conservative */ + + /* + * Note that this is true if the above returned a bogus page, which it + * will do for a completely empty relation. + */ + + if (len > PageGetFreeSpace(pageHeader)) { - buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW); - pageHeader = (Page)BufferGetPage(buffer); - PageInit(pageHeader, BufferGetPageSize(buffer), 0); - - if (len > PageGetFreeSpace(pageHeader)) - elog(WARN, "Tuple is too big: size %d", len); + buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW); + pageHeader = (Page) BufferGetPage(buffer); + PageInit(pageHeader, BufferGetPageSize(buffer), 0); + + if (len > PageGetFreeSpace(pageHeader)) + elog(WARN, "Tuple is too big: size %d", len); } - - offnum = PageAddItem((Page)pageHeader, (Item)tuple, - tuple->t_len, InvalidOffsetNumber, LP_USED); - - itemId = PageGetItemId((Page)pageHeader, offnum); - item = PageGetItem((Page)pageHeader, itemId); - - lastblock = BufferGetBlockNumber(buffer); - - ItemPointerSet(&((HeapTuple)item)->t_ctid, lastblock, offnum); - - /* return an accurate tuple */ - ItemPointerSet(&tuple->t_ctid, lastblock, offnum); - - WriteBuffer(buffer); + + offnum = PageAddItem((Page) pageHeader, (Item) tuple, + tuple->t_len, InvalidOffsetNumber, LP_USED); + + itemId = PageGetItemId((Page) pageHeader, offnum); + item = PageGetItem((Page) pageHeader, itemId); + + lastblock = BufferGetBlockNumber(buffer); + + ItemPointerSet(&((HeapTuple) item)->t_ctid, lastblock, offnum); + + /* return an accurate tuple */ + ItemPointerSet(&tuple->t_ctid, lastblock, offnum); + + WriteBuffer(buffer); } diff --git a/src/backend/access/heap/stats.c b/src/backend/access/heap/stats.c index ae8273ac81..aa16803779 100644 --- a/src/backend/access/heap/stats.c +++ b/src/backend/access/heap/stats.c @@ -1,16 +1,16 @@ /*------------------------------------------------------------------------- * * stats.c-- - * heap access method debugging statistic collection routines + * heap access method debugging statistic collection routines * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/Attic/stats.c,v 1.11 1997/08/19 21:29:21 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/Attic/stats.c,v 1.12 1997/09/07 04:38:13 momjian Exp $ * * NOTES - * initam should be moved someplace else. + * initam should be moved someplace else. * *------------------------------------------------------------------------- */ @@ -23,322 +23,327 @@ #include <utils/mcxt.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static void InitHeapAccessStatistics(void); +static void InitHeapAccessStatistics(void); /* ---------------- - * InitHeapAccessStatistics + * InitHeapAccessStatistics * ---------------- */ HeapAccessStatistics heap_access_stats = (HeapAccessStatistics) NULL; - + static void -InitHeapAccessStatistics() +InitHeapAccessStatistics() { - MemoryContext oldContext; - HeapAccessStatistics stats; - - /* ---------------- - * make sure we don't initialize things twice - * ---------------- - */ - if (heap_access_stats != NULL) - return; - - /* ---------------- - * allocate statistics structure from the top memory context - * ---------------- - */ - oldContext = MemoryContextSwitchTo(TopMemoryContext); - - stats = (HeapAccessStatistics) - palloc(sizeof(HeapAccessStatisticsData)); - - /* ---------------- - * initialize fields to default values - * ---------------- - */ - stats->global_open = 0; - stats->global_openr = 0; - stats->global_close = 0; - stats->global_beginscan = 0; - stats->global_rescan = 0; - stats->global_endscan = 0; - stats->global_getnext = 0; - stats->global_fetch = 0; - stats->global_insert = 0; - stats->global_delete = 0; - stats->global_replace = 0; - stats->global_markpos = 0; - stats->global_restrpos = 0; - stats->global_BufferGetRelation = 0; - stats->global_RelationIdGetRelation = 0; - stats->global_RelationIdGetRelation_Buf = 0; - stats->global_getreldesc = 0; - stats->global_heapgettup = 0; - stats->global_RelationPutHeapTuple = 0; - stats->global_RelationPutLongHeapTuple = 0; - - stats->local_open = 0; - stats->local_openr = 0; - stats->local_close = 0; - stats->local_beginscan = 0; - stats->local_rescan = 0; - stats->local_endscan = 0; - stats->local_getnext = 0; - stats->local_fetch = 0; - stats->local_insert = 0; - stats->local_delete = 0; - stats->local_replace = 0; - stats->local_markpos = 0; - stats->local_restrpos = 0; - stats->local_BufferGetRelation = 0; - stats->local_RelationIdGetRelation = 0; - stats->local_RelationIdGetRelation_Buf = 0; - stats->local_getreldesc = 0; - stats->local_heapgettup = 0; - stats->local_RelationPutHeapTuple = 0; - stats->local_RelationPutLongHeapTuple = 0; - stats->local_RelationNameGetRelation = 0; - stats->global_RelationNameGetRelation = 0; - - /* ---------------- - * record init times - * ---------------- - */ - time(&stats->init_global_timestamp); - time(&stats->local_reset_timestamp); - time(&stats->last_request_timestamp); - - /* ---------------- - * return to old memory context - * ---------------- - */ - MemoryContextSwitchTo(oldContext); - - heap_access_stats = stats; + MemoryContext oldContext; + HeapAccessStatistics stats; + + /* ---------------- + * make sure we don't initialize things twice + * ---------------- + */ + if (heap_access_stats != NULL) + return; + + /* ---------------- + * allocate statistics structure from the top memory context + * ---------------- + */ + oldContext = MemoryContextSwitchTo(TopMemoryContext); + + stats = (HeapAccessStatistics) + palloc(sizeof(HeapAccessStatisticsData)); + + /* ---------------- + * initialize fields to default values + * ---------------- + */ + stats->global_open = 0; + stats->global_openr = 0; + stats->global_close = 0; + stats->global_beginscan = 0; + stats->global_rescan = 0; + stats->global_endscan = 0; + stats->global_getnext = 0; + stats->global_fetch = 0; + stats->global_insert = 0; + stats->global_delete = 0; + stats->global_replace = 0; + stats->global_markpos = 0; + stats->global_restrpos = 0; + stats->global_BufferGetRelation = 0; + stats->global_RelationIdGetRelation = 0; + stats->global_RelationIdGetRelation_Buf = 0; + stats->global_getreldesc = 0; + stats->global_heapgettup = 0; + stats->global_RelationPutHeapTuple = 0; + stats->global_RelationPutLongHeapTuple = 0; + + stats->local_open = 0; + stats->local_openr = 0; + stats->local_close = 0; + stats->local_beginscan = 0; + stats->local_rescan = 0; + stats->local_endscan = 0; + stats->local_getnext = 0; + stats->local_fetch = 0; + stats->local_insert = 0; + stats->local_delete = 0; + stats->local_replace = 0; + stats->local_markpos = 0; + stats->local_restrpos = 0; + stats->local_BufferGetRelation = 0; + stats->local_RelationIdGetRelation = 0; + stats->local_RelationIdGetRelation_Buf = 0; + stats->local_getreldesc = 0; + stats->local_heapgettup = 0; + stats->local_RelationPutHeapTuple = 0; + stats->local_RelationPutLongHeapTuple = 0; + stats->local_RelationNameGetRelation = 0; + stats->global_RelationNameGetRelation = 0; + + /* ---------------- + * record init times + * ---------------- + */ + time(&stats->init_global_timestamp); + time(&stats->local_reset_timestamp); + time(&stats->last_request_timestamp); + + /* ---------------- + * return to old memory context + * ---------------- + */ + MemoryContextSwitchTo(oldContext); + + heap_access_stats = stats; } #ifdef NOT_USED /* ---------------- - * ResetHeapAccessStatistics + * ResetHeapAccessStatistics * ---------------- */ void -ResetHeapAccessStatistics() +ResetHeapAccessStatistics() { - HeapAccessStatistics stats; - - /* ---------------- - * do nothing if stats aren't initialized - * ---------------- - */ - if (heap_access_stats == NULL) - return; - - stats = heap_access_stats; - - /* ---------------- - * reset local counts - * ---------------- - */ - stats->local_open = 0; - stats->local_openr = 0; - stats->local_close = 0; - stats->local_beginscan = 0; - stats->local_rescan = 0; - stats->local_endscan = 0; - stats->local_getnext = 0; - stats->local_fetch = 0; - stats->local_insert = 0; - stats->local_delete = 0; - stats->local_replace = 0; - stats->local_markpos = 0; - stats->local_restrpos = 0; - stats->local_BufferGetRelation = 0; - stats->local_RelationIdGetRelation = 0; - stats->local_RelationIdGetRelation_Buf = 0; - stats->local_getreldesc = 0; - stats->local_heapgettup = 0; - stats->local_RelationPutHeapTuple = 0; - stats->local_RelationPutLongHeapTuple = 0; - - /* ---------------- - * reset local timestamps - * ---------------- - */ - time(&stats->local_reset_timestamp); - time(&stats->last_request_timestamp); + HeapAccessStatistics stats; + + /* ---------------- + * do nothing if stats aren't initialized + * ---------------- + */ + if (heap_access_stats == NULL) + return; + + stats = heap_access_stats; + + /* ---------------- + * reset local counts + * ---------------- + */ + stats->local_open = 0; + stats->local_openr = 0; + stats->local_close = 0; + stats->local_beginscan = 0; + stats->local_rescan = 0; + stats->local_endscan = 0; + stats->local_getnext = 0; + stats->local_fetch = 0; + stats->local_insert = 0; + stats->local_delete = 0; + stats->local_replace = 0; + stats->local_markpos = 0; + stats->local_restrpos = 0; + stats->local_BufferGetRelation = 0; + stats->local_RelationIdGetRelation = 0; + stats->local_RelationIdGetRelation_Buf = 0; + stats->local_getreldesc = 0; + stats->local_heapgettup = 0; + stats->local_RelationPutHeapTuple = 0; + stats->local_RelationPutLongHeapTuple = 0; + + /* ---------------- + * reset local timestamps + * ---------------- + */ + time(&stats->local_reset_timestamp); + time(&stats->last_request_timestamp); } + #endif #ifdef NOT_USED /* ---------------- - * GetHeapAccessStatistics + * GetHeapAccessStatistics * ---------------- */ -HeapAccessStatistics GetHeapAccessStatistics() +HeapAccessStatistics +GetHeapAccessStatistics() { - HeapAccessStatistics stats; - - /* ---------------- - * return nothing if stats aren't initialized - * ---------------- - */ - if (heap_access_stats == NULL) - return NULL; - - /* ---------------- - * record the current request time - * ---------------- - */ - time(&heap_access_stats->last_request_timestamp); - - /* ---------------- - * allocate a copy of the stats and return it to the caller. - * ---------------- - */ - stats = (HeapAccessStatistics) - palloc(sizeof(HeapAccessStatisticsData)); - - memmove(stats, - heap_access_stats, - sizeof(HeapAccessStatisticsData)); - - return stats; + HeapAccessStatistics stats; + + /* ---------------- + * return nothing if stats aren't initialized + * ---------------- + */ + if (heap_access_stats == NULL) + return NULL; + + /* ---------------- + * record the current request time + * ---------------- + */ + time(&heap_access_stats->last_request_timestamp); + + /* ---------------- + * allocate a copy of the stats and return it to the caller. + * ---------------- + */ + stats = (HeapAccessStatistics) + palloc(sizeof(HeapAccessStatisticsData)); + + memmove(stats, + heap_access_stats, + sizeof(HeapAccessStatisticsData)); + + return stats; } + #endif #ifdef NOT_USED /* ---------------- - * PrintHeapAccessStatistics + * PrintHeapAccessStatistics * ---------------- */ void PrintHeapAccessStatistics(HeapAccessStatistics stats) { - /* ---------------- - * return nothing if stats aren't valid - * ---------------- - */ - if (stats == NULL) - return; - - printf("======== heap am statistics ========\n"); - printf("init_global_timestamp: %s", - ctime(&(stats->init_global_timestamp))); - - printf("local_reset_timestamp: %s", - ctime(&(stats->local_reset_timestamp))); - - printf("last_request_timestamp: %s", - ctime(&(stats->last_request_timestamp))); - - printf("local/global_open: %6d/%6d\n", - stats->local_open, stats->global_open); - - printf("local/global_openr: %6d/%6d\n", - stats->local_openr, stats->global_openr); - - printf("local/global_close: %6d/%6d\n", - stats->local_close, stats->global_close); - - printf("local/global_beginscan: %6d/%6d\n", - stats->local_beginscan, stats->global_beginscan); - - printf("local/global_rescan: %6d/%6d\n", - stats->local_rescan, stats->global_rescan); - - printf("local/global_endscan: %6d/%6d\n", - stats->local_endscan, stats->global_endscan); - - printf("local/global_getnext: %6d/%6d\n", - stats->local_getnext, stats->global_getnext); - - printf("local/global_fetch: %6d/%6d\n", - stats->local_fetch, stats->global_fetch); - - printf("local/global_insert: %6d/%6d\n", - stats->local_insert, stats->global_insert); - - printf("local/global_delete: %6d/%6d\n", - stats->local_delete, stats->global_delete); - - printf("local/global_replace: %6d/%6d\n", - stats->local_replace, stats->global_replace); - - printf("local/global_markpos: %6d/%6d\n", - stats->local_markpos, stats->global_markpos); - - printf("local/global_restrpos: %6d/%6d\n", - stats->local_restrpos, stats->global_restrpos); - - printf("================\n"); - - printf("local/global_BufferGetRelation: %6d/%6d\n", - stats->local_BufferGetRelation, - stats->global_BufferGetRelation); - - printf("local/global_RelationIdGetRelation: %6d/%6d\n", - stats->local_RelationIdGetRelation, - stats->global_RelationIdGetRelation); - - printf("local/global_RelationIdGetRelation_Buf: %6d/%6d\n", - stats->local_RelationIdGetRelation_Buf, - stats->global_RelationIdGetRelation_Buf); - - printf("local/global_getreldesc: %6d/%6d\n", - stats->local_getreldesc, stats->global_getreldesc); - - printf("local/global_heapgettup: %6d/%6d\n", - stats->local_heapgettup, stats->global_heapgettup); - - printf("local/global_RelationPutHeapTuple: %6d/%6d\n", - stats->local_RelationPutHeapTuple, - stats->global_RelationPutHeapTuple); - - printf("local/global_RelationPutLongHeapTuple: %6d/%6d\n", - stats->local_RelationPutLongHeapTuple, - stats->global_RelationPutLongHeapTuple); - - printf("===================================\n"); - - printf("\n"); + /* ---------------- + * return nothing if stats aren't valid + * ---------------- + */ + if (stats == NULL) + return; + + printf("======== heap am statistics ========\n"); + printf("init_global_timestamp: %s", + ctime(&(stats->init_global_timestamp))); + + printf("local_reset_timestamp: %s", + ctime(&(stats->local_reset_timestamp))); + + printf("last_request_timestamp: %s", + ctime(&(stats->last_request_timestamp))); + + printf("local/global_open: %6d/%6d\n", + stats->local_open, stats->global_open); + + printf("local/global_openr: %6d/%6d\n", + stats->local_openr, stats->global_openr); + + printf("local/global_close: %6d/%6d\n", + stats->local_close, stats->global_close); + + printf("local/global_beginscan: %6d/%6d\n", + stats->local_beginscan, stats->global_beginscan); + + printf("local/global_rescan: %6d/%6d\n", + stats->local_rescan, stats->global_rescan); + + printf("local/global_endscan: %6d/%6d\n", + stats->local_endscan, stats->global_endscan); + + printf("local/global_getnext: %6d/%6d\n", + stats->local_getnext, stats->global_getnext); + + printf("local/global_fetch: %6d/%6d\n", + stats->local_fetch, stats->global_fetch); + + printf("local/global_insert: %6d/%6d\n", + stats->local_insert, stats->global_insert); + + printf("local/global_delete: %6d/%6d\n", + stats->local_delete, stats->global_delete); + + printf("local/global_replace: %6d/%6d\n", + stats->local_replace, stats->global_replace); + + printf("local/global_markpos: %6d/%6d\n", + stats->local_markpos, stats->global_markpos); + + printf("local/global_restrpos: %6d/%6d\n", + stats->local_restrpos, stats->global_restrpos); + + printf("================\n"); + + printf("local/global_BufferGetRelation: %6d/%6d\n", + stats->local_BufferGetRelation, + stats->global_BufferGetRelation); + + printf("local/global_RelationIdGetRelation: %6d/%6d\n", + stats->local_RelationIdGetRelation, + stats->global_RelationIdGetRelation); + + printf("local/global_RelationIdGetRelation_Buf: %6d/%6d\n", + stats->local_RelationIdGetRelation_Buf, + stats->global_RelationIdGetRelation_Buf); + + printf("local/global_getreldesc: %6d/%6d\n", + stats->local_getreldesc, stats->global_getreldesc); + + printf("local/global_heapgettup: %6d/%6d\n", + stats->local_heapgettup, stats->global_heapgettup); + + printf("local/global_RelationPutHeapTuple: %6d/%6d\n", + stats->local_RelationPutHeapTuple, + stats->global_RelationPutHeapTuple); + + printf("local/global_RelationPutLongHeapTuple: %6d/%6d\n", + stats->local_RelationPutLongHeapTuple, + stats->global_RelationPutLongHeapTuple); + + printf("===================================\n"); + + printf("\n"); } + #endif #ifdef NOT_USED /* ---------------- - * PrintAndFreeHeapAccessStatistics + * PrintAndFreeHeapAccessStatistics * ---------------- */ void PrintAndFreeHeapAccessStatistics(HeapAccessStatistics stats) { - PrintHeapAccessStatistics(stats); - if (stats != NULL) - pfree(stats); + PrintHeapAccessStatistics(stats); + if (stats != NULL) + pfree(stats); } + #endif /* ---------------------------------------------------------------- - * access method initialization + * access method initialization * ---------------------------------------------------------------- */ /* ---------------- - * initam should someday be moved someplace else. + * initam should someday be moved someplace else. * ---------------- */ void initam(void) { - /* ---------------- - * initialize heap statistics. - * ---------------- - */ - InitHeapAccessStatistics(); + /* ---------------- + * initialize heap statistics. + * ---------------- + */ + InitHeapAccessStatistics(); } diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 52b7b1473b..da7fc0dc09 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -1,17 +1,17 @@ /*------------------------------------------------------------------------- * * genam.c-- - * general index access method routines + * general index access method routines * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/index/genam.c,v 1.7 1997/08/19 21:29:26 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/index/genam.c,v 1.8 1997/09/07 04:38:17 momjian Exp $ * * NOTES - * many of the old access method routines have been turned into - * macros and moved to genam.h -cim 4/30/91 + * many of the old access method routines have been turned into + * macros and moved to genam.h -cim 4/30/91 * *------------------------------------------------------------------------- */ @@ -29,18 +29,18 @@ * previous, current, next. Note that the case of reverse scans works * identically. * - * State Result - * (1) + + - + 0 0 (if the next item pointer is invalid) - * (2) + X - (otherwise) - * (3) * 0 0 * 0 0 (no change) - * (4) + X 0 X 0 0 (shift) - * (5) * + X + X - (shift, add unknown) + * State Result + * (1) + + - + 0 0 (if the next item pointer is invalid) + * (2) + X - (otherwise) + * (3) * 0 0 * 0 0 (no change) + * (4) + X 0 X 0 0 (shift) + * (5) * + X + X - (shift, add unknown) * * All other states cannot occur. * * Note: *It would be possible to cache the status of the previous and - * next item pointer using the flags. + * next item pointer using the flags. * ---------------------------------------------------------------- */ @@ -51,220 +51,234 @@ #include <storage/bufmgr.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif /* ---------------------------------------------------------------- - * general access method routines + * general access method routines * - * All indexed access methods use an identical scan structure. - * We don't know how the various AMs do locking, however, so we don't - * do anything about that here. + * All indexed access methods use an identical scan structure. + * We don't know how the various AMs do locking, however, so we don't + * do anything about that here. * - * The intent is that an AM implementor will define a front-end routine - * that calls this one, to fill in the scan, and then does whatever kind - * of locking he wants. + * The intent is that an AM implementor will define a front-end routine + * that calls this one, to fill in the scan, and then does whatever kind + * of locking he wants. * ---------------------------------------------------------------- */ /* ---------------- - * RelationGetIndexScan -- Create and fill an IndexScanDesc. + * RelationGetIndexScan -- Create and fill an IndexScanDesc. * - * This routine creates an index scan structure and sets its contents - * up correctly. This routine calls AMrescan to set up the scan with - * the passed key. + * This routine creates an index scan structure and sets its contents + * up correctly. This routine calls AMrescan to set up the scan with + * the passed key. * - * Parameters: - * relation -- index relation for scan. - * scanFromEnd -- if true, begin scan at one of the index's - * endpoints. - * numberOfKeys -- count of scan keys (more than one won't - * necessarily do anything useful, yet). - * key -- the ScanKey for the starting position of the scan. + * Parameters: + * relation -- index relation for scan. + * scanFromEnd -- if true, begin scan at one of the index's + * endpoints. + * numberOfKeys -- count of scan keys (more than one won't + * necessarily do anything useful, yet). + * key -- the ScanKey for the starting position of the scan. * - * Returns: - * An initialized IndexScanDesc. + * Returns: + * An initialized IndexScanDesc. + * + * Side Effects: + * Bumps the ref count on the relation to keep it in the cache. * - * Side Effects: - * Bumps the ref count on the relation to keep it in the cache. - * * ---------------- */ IndexScanDesc RelationGetIndexScan(Relation relation, - bool scanFromEnd, - uint16 numberOfKeys, - ScanKey key) + bool scanFromEnd, + uint16 numberOfKeys, + ScanKey key) { - IndexScanDesc scan; - - if (! RelationIsValid(relation)) - elog(WARN, "RelationGetIndexScan: relation invalid"); - - scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData)); - - scan->relation = relation; - scan->opaque = NULL; - scan->numberOfKeys = numberOfKeys; - - ItemPointerSetInvalid(&scan->previousItemData); - ItemPointerSetInvalid(&scan->currentItemData); - ItemPointerSetInvalid(&scan->nextItemData); - ItemPointerSetInvalid(&scan->previousMarkData); - ItemPointerSetInvalid(&scan->currentMarkData); - ItemPointerSetInvalid(&scan->nextMarkData); + IndexScanDesc scan; + + if (!RelationIsValid(relation)) + elog(WARN, "RelationGetIndexScan: relation invalid"); + + scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData)); - if (numberOfKeys > 0) { - scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * numberOfKeys); - } else { - scan->keyData = NULL; - } + scan->relation = relation; + scan->opaque = NULL; + scan->numberOfKeys = numberOfKeys; + + ItemPointerSetInvalid(&scan->previousItemData); + ItemPointerSetInvalid(&scan->currentItemData); + ItemPointerSetInvalid(&scan->nextItemData); + ItemPointerSetInvalid(&scan->previousMarkData); + ItemPointerSetInvalid(&scan->currentMarkData); + ItemPointerSetInvalid(&scan->nextMarkData); + + if (numberOfKeys > 0) + { + scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * numberOfKeys); + } + else + { + scan->keyData = NULL; + } - index_rescan(scan, scanFromEnd, key); - - return (scan); + index_rescan(scan, scanFromEnd, key); + + return (scan); } #ifdef NOT_USED /* ---------------- - * IndexScanRestart -- Restart an index scan. + * IndexScanRestart -- Restart an index scan. * - * This routine isn't used by any existing access method. It's - * appropriate if relation level locks are what you want. + * This routine isn't used by any existing access method. It's + * appropriate if relation level locks are what you want. * - * Returns: - * None. + * Returns: + * None. * - * Side Effects: - * None. + * Side Effects: + * None. * ---------------- */ void IndexScanRestart(IndexScanDesc scan, - bool scanFromEnd, - ScanKey key) + bool scanFromEnd, + ScanKey key) { - if (! IndexScanIsValid(scan)) - elog(WARN, "IndexScanRestart: invalid scan"); - - ItemPointerSetInvalid(&scan->previousItemData); - ItemPointerSetInvalid(&scan->currentItemData); - ItemPointerSetInvalid(&scan->nextItemData); - - if (RelationGetNumberOfBlocks(scan->relation) == 0) - scan->flags = ScanUnmarked; - else if (scanFromEnd) - scan->flags = ScanUnmarked | ScanUncheckedPrevious; - else - scan->flags = ScanUnmarked | ScanUncheckedNext; - - scan->scanFromEnd = (bool) scanFromEnd; - - if (scan->numberOfKeys > 0) - memmove(scan->keyData, - key, - scan->numberOfKeys * sizeof(ScanKeyData)); + if (!IndexScanIsValid(scan)) + elog(WARN, "IndexScanRestart: invalid scan"); + + ItemPointerSetInvalid(&scan->previousItemData); + ItemPointerSetInvalid(&scan->currentItemData); + ItemPointerSetInvalid(&scan->nextItemData); + + if (RelationGetNumberOfBlocks(scan->relation) == 0) + scan->flags = ScanUnmarked; + else if (scanFromEnd) + scan->flags = ScanUnmarked | ScanUncheckedPrevious; + else + scan->flags = ScanUnmarked | ScanUncheckedNext; + + scan->scanFromEnd = (bool) scanFromEnd; + + if (scan->numberOfKeys > 0) + memmove(scan->keyData, + key, + scan->numberOfKeys * sizeof(ScanKeyData)); } + #endif #ifdef NOT_USED /* ---------------- - * IndexScanEnd -- End and index scan. + * IndexScanEnd -- End and index scan. * - * This routine is not used by any existing access method, but is - * suitable for use if you don't want to do sophisticated locking. + * This routine is not used by any existing access method, but is + * suitable for use if you don't want to do sophisticated locking. * - * Returns: - * None. + * Returns: + * None. * - * Side Effects: - * None. + * Side Effects: + * None. * ---------------- */ void IndexScanEnd(IndexScanDesc scan) { - if (! IndexScanIsValid(scan)) - elog(WARN, "IndexScanEnd: invalid scan"); - - pfree(scan); + if (!IndexScanIsValid(scan)) + elog(WARN, "IndexScanEnd: invalid scan"); + + pfree(scan); } + #endif /* ---------------- - * IndexScanMarkPosition -- Mark current position in a scan. + * IndexScanMarkPosition -- Mark current position in a scan. * - * This routine isn't used by any existing access method, but is the - * one that AM implementors should use, if they don't want to do any - * special locking. If relation-level locking is sufficient, this is - * the routine for you. + * This routine isn't used by any existing access method, but is the + * one that AM implementors should use, if they don't want to do any + * special locking. If relation-level locking is sufficient, this is + * the routine for you. * - * Returns: - * None. + * Returns: + * None. * - * Side Effects: - * None. + * Side Effects: + * None. * ---------------- */ void IndexScanMarkPosition(IndexScanDesc scan) { - RetrieveIndexResult result; - - if (scan->flags & ScanUncheckedPrevious) { - result = - index_getnext(scan, BackwardScanDirection); - - if (result != NULL) { - scan->previousItemData = result->index_iptr; - } else { - ItemPointerSetInvalid(&scan->previousItemData); + RetrieveIndexResult result; + + if (scan->flags & ScanUncheckedPrevious) + { + result = + index_getnext(scan, BackwardScanDirection); + + if (result != NULL) + { + scan->previousItemData = result->index_iptr; + } + else + { + ItemPointerSetInvalid(&scan->previousItemData); + } + } - - } else if (scan->flags & ScanUncheckedNext) { - result = (RetrieveIndexResult) - index_getnext(scan, ForwardScanDirection); - - if (result != NULL) { - scan->nextItemData = result->index_iptr; - } else { - ItemPointerSetInvalid(&scan->nextItemData); + else if (scan->flags & ScanUncheckedNext) + { + result = (RetrieveIndexResult) + index_getnext(scan, ForwardScanDirection); + + if (result != NULL) + { + scan->nextItemData = result->index_iptr; + } + else + { + ItemPointerSetInvalid(&scan->nextItemData); + } } - } - - scan->previousMarkData = scan->previousItemData; - scan->currentMarkData = scan->currentItemData; - scan->nextMarkData = scan->nextItemData; - - scan->flags = 0x0; /* XXX should have a symbolic name */ + + scan->previousMarkData = scan->previousItemData; + scan->currentMarkData = scan->currentItemData; + scan->nextMarkData = scan->nextItemData; + + scan->flags = 0x0; /* XXX should have a symbolic name */ } /* ---------------- - * IndexScanRestorePosition -- Restore position on a marked scan. + * IndexScanRestorePosition -- Restore position on a marked scan. * - * This routine isn't used by any existing access method, but is the - * one that AM implementors should use if they don't want to do any - * special locking. If relation-level locking is sufficient, then - * this is the one you want. + * This routine isn't used by any existing access method, but is the + * one that AM implementors should use if they don't want to do any + * special locking. If relation-level locking is sufficient, then + * this is the one you want. * - * Returns: - * None. + * Returns: + * None. * - * Side Effects: - * None. + * Side Effects: + * None. * ---------------- */ void IndexScanRestorePosition(IndexScanDesc scan) -{ - if (scan->flags & ScanUnmarked) - elog(WARN, "IndexScanRestorePosition: no mark to restore"); - - scan->previousItemData = scan->previousMarkData; - scan->currentItemData = scan->currentMarkData; - scan->nextItemData = scan->nextMarkData; - - scan->flags = 0x0; /* XXX should have a symbolic name */ +{ + if (scan->flags & ScanUnmarked) + elog(WARN, "IndexScanRestorePosition: no mark to restore"); + + scan->previousItemData = scan->previousMarkData; + scan->currentItemData = scan->currentMarkData; + scan->nextItemData = scan->nextMarkData; + + scan->flags = 0x0; /* XXX should have a symbolic name */ } diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 3068f7ccee..6841899fa3 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -1,80 +1,80 @@ /*------------------------------------------------------------------------- * * indexam.c-- - * general index access method routines + * general index access method routines * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.13 1997/08/26 23:31:28 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.14 1997/09/07 04:38:26 momjian Exp $ * * INTERFACE ROUTINES - * index_open - open an index relation by relationId - * index_openr - open a index relation by name - * index_close - close a index relation - * index_beginscan - start a scan of an index - * index_rescan - restart a scan of an index - * index_endscan - end a scan - * index_insert - insert an index tuple into a relation - * index_delete - delete an item from an index relation - * index_markpos - mark a scan position - * index_restrpos - restore a scan position - * index_getnext - get the next tuple from a scan - * ** index_fetch - retrieve tuple with tid + * index_open - open an index relation by relationId + * index_openr - open a index relation by name + * index_close - close a index relation + * index_beginscan - start a scan of an index + * index_rescan - restart a scan of an index + * index_endscan - end a scan + * index_insert - insert an index tuple into a relation + * index_delete - delete an item from an index relation + * index_markpos - mark a scan position + * index_restrpos - restore a scan position + * index_getnext - get the next tuple from a scan + * ** index_fetch - retrieve tuple with tid * ** index_replace - replace a tuple * ** index_getattr - get an attribute from an index tuple - * index_getprocid - get a support procedure id from the rel tuple - * - * IndexScanIsValid - check index scan + * index_getprocid - get a support procedure id from the rel tuple + * + * IndexScanIsValid - check index scan * * NOTES - * This file contains the index_ routines which used - * to be a scattered collection of stuff in access/genam. + * This file contains the index_ routines which used + * to be a scattered collection of stuff in access/genam. * - * The ** routines: index_fetch, index_replace, and index_getattr - * have not yet been implemented. They may not be needed. + * The ** routines: index_fetch, index_replace, and index_getattr + * have not yet been implemented. They may not be needed. * * old comments - * Scans are implemented as follows: + * Scans are implemented as follows: * - * `0' represents an invalid item pointer. - * `-' represents an unknown item pointer. - * `X' represents a known item pointers. - * `+' represents known or invalid item pointers. - * `*' represents any item pointers. + * `0' represents an invalid item pointer. + * `-' represents an unknown item pointer. + * `X' represents a known item pointers. + * `+' represents known or invalid item pointers. + * `*' represents any item pointers. * - * State is represented by a triple of these symbols in the order of - * previous, current, next. Note that the case of reverse scans works - * identically. + * State is represented by a triple of these symbols in the order of + * previous, current, next. Note that the case of reverse scans works + * identically. * - * State Result - * (1) + + - + 0 0 (if the next item pointer is invalid) - * (2) + X - (otherwise) - * (3) * 0 0 * 0 0 (no change) - * (4) + X 0 X 0 0 (shift) - * (5) * + X + X - (shift, add unknown) + * State Result + * (1) + + - + 0 0 (if the next item pointer is invalid) + * (2) + X - (otherwise) + * (3) * 0 0 * 0 0 (no change) + * (4) + X 0 X 0 0 (shift) + * (5) * + X + X - (shift, add unknown) * - * All other states cannot occur. + * All other states cannot occur. * - * Note: It would be possible to cache the status of the previous and - * next item pointer using the flags. + * Note: It would be possible to cache the status of the previous and + * next item pointer using the flags. * *------------------------------------------------------------------------- */ #include <postgres.h> - -#include <access/genam.h> + +#include <access/genam.h> #include <utils/relcache.h> #include <fmgr.h> #include <storage/lmgr.h> #include <access/heapam.h> /* ---------------- - * undefine macros we aren't going to use that would otherwise - * get in our way.. delete is defined in c.h and the am's are - * defined in heapam.h + * undefine macros we aren't going to use that would otherwise + * get in our way.. delete is defined in c.h and the am's are + * defined in heapam.h * ---------------- */ #undef delete @@ -88,314 +88,320 @@ #undef amgettuple /* ---------------------------------------------------------------- - * macros used in index_ routines + * macros used in index_ routines * ---------------------------------------------------------------- */ #define RELATION_CHECKS \ Assert(RelationIsValid(relation)); \ - Assert(PointerIsValid(relation->rd_am)) - + Assert(PointerIsValid(relation->rd_am)) + #define SCAN_CHECKS \ - Assert(IndexScanIsValid(scan)); \ - Assert(RelationIsValid(scan->relation)); \ - Assert(PointerIsValid(scan->relation->rd_am)) - + Assert(IndexScanIsValid(scan)); \ + Assert(RelationIsValid(scan->relation)); \ + Assert(PointerIsValid(scan->relation->rd_am)) + #define GET_REL_PROCEDURE(x,y) \ - procedure = relation->rd_am->y; \ - if (! RegProcedureIsValid(procedure)) \ - elog(WARN, "index_%s: invalid %s regproc", \ - CppAsString(x), CppAsString(y)) - + procedure = relation->rd_am->y; \ + if (! RegProcedureIsValid(procedure)) \ + elog(WARN, "index_%s: invalid %s regproc", \ + CppAsString(x), CppAsString(y)) + #define GET_SCAN_PROCEDURE(x,y) \ - procedure = scan->relation->rd_am->y; \ - if (! RegProcedureIsValid(procedure)) \ - elog(WARN, "index_%s: invalid %s regproc", \ - CppAsString(x), CppAsString(y)) - - + procedure = scan->relation->rd_am->y; \ + if (! RegProcedureIsValid(procedure)) \ + elog(WARN, "index_%s: invalid %s regproc", \ + CppAsString(x), CppAsString(y)) + + /* ---------------------------------------------------------------- - * index_ interface functions + * index_ interface functions * ---------------------------------------------------------------- */ /* ---------------- - * index_open - open an index relation by relationId + * index_open - open an index relation by relationId * - * presently the relcache routines do all the work we need - * to open/close index relations. + * presently the relcache routines do all the work we need + * to open/close index relations. * ---------------- */ Relation index_open(Oid relationId) { - return RelationIdGetRelation(relationId); + return RelationIdGetRelation(relationId); } /* ---------------- - * index_openr - open a index relation by name + * index_openr - open a index relation by name * - * presently the relcache routines do all the work we need - * to open/close index relations. + * presently the relcache routines do all the work we need + * to open/close index relations. * ---------------- */ Relation index_openr(char *relationName) { - return RelationNameGetRelation(relationName); + return RelationNameGetRelation(relationName); } /* ---------------- - * index_close - close a index relation + * index_close - close a index relation * - * presently the relcache routines do all the work we need - * to open/close index relations. + * presently the relcache routines do all the work we need + * to open/close index relations. * ---------------- */ void index_close(Relation relation) { - RelationClose(relation); + RelationClose(relation); } /* ---------------- - * index_insert - insert an index tuple into a relation + * index_insert - insert an index tuple into a relation * ---------------- */ InsertIndexResult index_insert(Relation relation, - Datum *datum, - char *nulls, - ItemPointer heap_t_ctid, - Relation heapRel) + Datum * datum, + char *nulls, + ItemPointer heap_t_ctid, + Relation heapRel) { - RegProcedure procedure; - InsertIndexResult specificResult; - - RELATION_CHECKS; - GET_REL_PROCEDURE(insert,aminsert); - - /* ---------------- - * have the am's insert proc do all the work. - * ---------------- - */ - specificResult = (InsertIndexResult) - fmgr(procedure, relation, datum, nulls, heap_t_ctid, heapRel, NULL); - - /* ---------------- - * the insert proc is supposed to return a "specific result" and - * this routine has to return a "general result" so after we get - * something back from the insert proc, we allocate a - * "general result" and copy some crap between the two. - * - * As far as I'm concerned all this result shit is needlessly c - * omplicated and should be eliminated. -cim 1/19/91 - * - * mao concurs. regardless of how we feel here, however, it is - * important to free memory we don't intend to return to anyone. - * 2/28/91 - * - * this "general result" crap is now gone. -ay 3/6/95 - * ---------------- - */ - - return (specificResult); + RegProcedure procedure; + InsertIndexResult specificResult; + + RELATION_CHECKS; + GET_REL_PROCEDURE(insert, aminsert); + + /* ---------------- + * have the am's insert proc do all the work. + * ---------------- + */ + specificResult = (InsertIndexResult) + fmgr(procedure, relation, datum, nulls, heap_t_ctid, heapRel, NULL); + + /* ---------------- + * the insert proc is supposed to return a "specific result" and + * this routine has to return a "general result" so after we get + * something back from the insert proc, we allocate a + * "general result" and copy some crap between the two. + * + * As far as I'm concerned all this result shit is needlessly c + * omplicated and should be eliminated. -cim 1/19/91 + * + * mao concurs. regardless of how we feel here, however, it is + * important to free memory we don't intend to return to anyone. + * 2/28/91 + * + * this "general result" crap is now gone. -ay 3/6/95 + * ---------------- + */ + + return (specificResult); } /* ---------------- - * index_delete - delete an item from an index relation + * index_delete - delete an item from an index relation * ---------------- */ void index_delete(Relation relation, ItemPointer indexItem) { - RegProcedure procedure; - - RELATION_CHECKS; - GET_REL_PROCEDURE(delete,amdelete); - - fmgr(procedure, relation, indexItem); + RegProcedure procedure; + + RELATION_CHECKS; + GET_REL_PROCEDURE(delete, amdelete); + + fmgr(procedure, relation, indexItem); } /* ---------------- - * index_beginscan - start a scan of an index + * index_beginscan - start a scan of an index * ---------------- */ IndexScanDesc index_beginscan(Relation relation, - bool scanFromEnd, - uint16 numberOfKeys, - ScanKey key) + bool scanFromEnd, + uint16 numberOfKeys, + ScanKey key) { - IndexScanDesc scandesc; - RegProcedure procedure; - - RELATION_CHECKS; - GET_REL_PROCEDURE(beginscan,ambeginscan); - - RelationSetRIntentLock(relation); - - scandesc = (IndexScanDesc) - fmgr(procedure, relation, scanFromEnd, numberOfKeys, key); - - return scandesc; + IndexScanDesc scandesc; + RegProcedure procedure; + + RELATION_CHECKS; + GET_REL_PROCEDURE(beginscan, ambeginscan); + + RelationSetRIntentLock(relation); + + scandesc = (IndexScanDesc) + fmgr(procedure, relation, scanFromEnd, numberOfKeys, key); + + return scandesc; } /* ---------------- - * index_rescan - restart a scan of an index + * index_rescan - restart a scan of an index * ---------------- */ void index_rescan(IndexScanDesc scan, bool scanFromEnd, ScanKey key) { - RegProcedure procedure; - - SCAN_CHECKS; - GET_SCAN_PROCEDURE(rescan,amrescan); - - fmgr(procedure, scan, scanFromEnd, key); + RegProcedure procedure; + + SCAN_CHECKS; + GET_SCAN_PROCEDURE(rescan, amrescan); + + fmgr(procedure, scan, scanFromEnd, key); } /* ---------------- - * index_endscan - end a scan + * index_endscan - end a scan * ---------------- */ void index_endscan(IndexScanDesc scan) { - RegProcedure procedure; - - SCAN_CHECKS; - GET_SCAN_PROCEDURE(endscan,amendscan); - - fmgr(procedure, scan); - - RelationUnsetRIntentLock(scan->relation); + RegProcedure procedure; + + SCAN_CHECKS; + GET_SCAN_PROCEDURE(endscan, amendscan); + + fmgr(procedure, scan); + + RelationUnsetRIntentLock(scan->relation); } #ifdef NOT_USED /* ---------------- - * index_markpos - mark a scan position + * index_markpos - mark a scan position * ---------------- */ void index_markpos(IndexScanDesc scan) { - RegProcedure procedure; - - SCAN_CHECKS; - GET_SCAN_PROCEDURE(markpos,ammarkpos); - - fmgr(procedure, scan); + RegProcedure procedure; + + SCAN_CHECKS; + GET_SCAN_PROCEDURE(markpos, ammarkpos); + + fmgr(procedure, scan); } + #endif #ifdef NOT_USED /* ---------------- - * index_restrpos - restore a scan position + * index_restrpos - restore a scan position * ---------------- */ void index_restrpos(IndexScanDesc scan) { - RegProcedure procedure; - - SCAN_CHECKS; - GET_SCAN_PROCEDURE(restrpos,amrestrpos); - - fmgr(procedure, scan); + RegProcedure procedure; + + SCAN_CHECKS; + GET_SCAN_PROCEDURE(restrpos, amrestrpos); + + fmgr(procedure, scan); } + #endif /* ---------------- - * index_getnext - get the next tuple from a scan + * index_getnext - get the next tuple from a scan * - * A RetrieveIndexResult is a index tuple/heap tuple pair + * A RetrieveIndexResult is a index tuple/heap tuple pair * ---------------- */ RetrieveIndexResult index_getnext(IndexScanDesc scan, - ScanDirection direction) + ScanDirection direction) { - RegProcedure procedure; - RetrieveIndexResult result; - - SCAN_CHECKS; - GET_SCAN_PROCEDURE(getnext,amgettuple); - - /* ---------------- - * have the am's gettuple proc do all the work. - * ---------------- - */ - result = (RetrieveIndexResult) - fmgr(procedure, scan, direction); - - return result; + RegProcedure procedure; + RetrieveIndexResult result; + + SCAN_CHECKS; + GET_SCAN_PROCEDURE(getnext, amgettuple); + + /* ---------------- + * have the am's gettuple proc do all the work. + * ---------------- + */ + result = (RetrieveIndexResult) + fmgr(procedure, scan, direction); + + return result; } /* ---------------- - * index_getprocid + * index_getprocid * - * Some indexed access methods may require support routines that are - * not in the operator class/operator model imposed by pg_am. These - * access methods may store the OIDs of registered procedures they - * need in pg_amproc. These registered procedure OIDs are ordered in - * a way that makes sense to the access method, and used only by the - * access method. The general index code doesn't know anything about - * the routines involved; it just builds an ordered list of them for - * each attribute on which an index is defined. + * Some indexed access methods may require support routines that are + * not in the operator class/operator model imposed by pg_am. These + * access methods may store the OIDs of registered procedures they + * need in pg_amproc. These registered procedure OIDs are ordered in + * a way that makes sense to the access method, and used only by the + * access method. The general index code doesn't know anything about + * the routines involved; it just builds an ordered list of them for + * each attribute on which an index is defined. * - * This routine returns the requested procedure OID for a particular - * indexed attribute. + * This routine returns the requested procedure OID for a particular + * indexed attribute. * ---------------- */ RegProcedure index_getprocid(Relation irel, - AttrNumber attnum, - uint16 procnum) + AttrNumber attnum, + uint16 procnum) { - RegProcedure *loc; - int natts; - - natts = irel->rd_rel->relnatts; - - loc = irel->rd_support; - - Assert(loc != NULL); - - return (loc[(natts * (procnum - 1)) + (attnum - 1)]); + RegProcedure *loc; + int natts; + + natts = irel->rd_rel->relnatts; + + loc = irel->rd_support; + + Assert(loc != NULL); + + return (loc[(natts * (procnum - 1)) + (attnum - 1)]); } Datum GetIndexValue(HeapTuple tuple, - TupleDesc hTupDesc, - int attOff, - AttrNumber attrNums[], - FuncIndexInfo *fInfo, - bool *attNull, - Buffer buffer) + TupleDesc hTupDesc, + int attOff, + AttrNumber attrNums[], + FuncIndexInfo * fInfo, + bool * attNull, + Buffer buffer) { - Datum returnVal; - bool isNull; - - if (PointerIsValid(fInfo) && FIgetProcOid(fInfo) != InvalidOid) { - int i; - Datum *attData = (Datum *)palloc(FIgetnArgs(fInfo)*sizeof(Datum)); - - for (i = 0; i < FIgetnArgs(fInfo); i++) { - attData[i] = (Datum) heap_getattr(tuple, - buffer, - attrNums[i], - hTupDesc, - attNull); + Datum returnVal; + bool isNull; + + if (PointerIsValid(fInfo) && FIgetProcOid(fInfo) != InvalidOid) + { + int i; + Datum *attData = (Datum *) palloc(FIgetnArgs(fInfo) * sizeof(Datum)); + + for (i = 0; i < FIgetnArgs(fInfo); i++) + { + attData[i] = (Datum) heap_getattr(tuple, + buffer, + attrNums[i], + hTupDesc, + attNull); + } + returnVal = (Datum) fmgr_array_args(FIgetProcOid(fInfo), + FIgetnArgs(fInfo), + (char **) attData, + &isNull); + pfree(attData); + *attNull = FALSE; + } + else + { + returnVal = (Datum) heap_getattr(tuple, buffer, attrNums[attOff], + hTupDesc, attNull); } - returnVal = (Datum)fmgr_array_args(FIgetProcOid(fInfo), - FIgetnArgs(fInfo), - (char **) attData, - &isNull); - pfree(attData); - *attNull = FALSE; - }else { - returnVal = (Datum) heap_getattr(tuple, buffer, attrNums[attOff], - hTupDesc, attNull); - } - return returnVal; + return returnVal; } diff --git a/src/backend/access/index/istrat.c b/src/backend/access/index/istrat.c index 5c143f0aa5..35158c2217 100644 --- a/src/backend/access/index/istrat.c +++ b/src/backend/access/index/istrat.c @@ -1,689 +1,730 @@ /*------------------------------------------------------------------------- * * istrat.c-- - * index scan strategy manipulation code and index strategy manipulation - * operator code. + * index scan strategy manipulation code and index strategy manipulation + * operator code. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/index/Attic/istrat.c,v 1.9 1997/08/22 16:48:14 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/index/Attic/istrat.c,v 1.10 1997/09/07 04:38:32 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <catalog/pg_proc.h> #include <catalog/pg_operator.h> #include <catalog/catname.h> #include <catalog/pg_index.h> #include <catalog/pg_amop.h> #include <catalog/pg_amproc.h> -#include <utils/memutils.h> /* could have been access/itup.h */ +#include <utils/memutils.h> /* could have been access/itup.h */ #include <access/heapam.h> #include <access/istrat.h> #include <fmgr.h> -#ifndef NO_ASSERT_CHECKING -static bool StrategyEvaluationIsValid(StrategyEvaluation evaluation); -static bool StrategyExpressionIsValid(StrategyExpression expression, - StrategyNumber maxStrategy); -static ScanKey StrategyMapGetScanKeyEntry(StrategyMap map, - StrategyNumber strategyNumber); -static bool StrategyOperatorIsValid(StrategyOperator operator, - StrategyNumber maxStrategy); -static bool StrategyTermIsValid(StrategyTerm term, - StrategyNumber maxStrategy); +#ifndef NO_ASSERT_CHECKING +static bool StrategyEvaluationIsValid(StrategyEvaluation evaluation); +static bool +StrategyExpressionIsValid(StrategyExpression expression, + StrategyNumber maxStrategy); +static ScanKey +StrategyMapGetScanKeyEntry(StrategyMap map, + StrategyNumber strategyNumber); +static bool +StrategyOperatorIsValid(StrategyOperator operator, + StrategyNumber maxStrategy); +static bool +StrategyTermIsValid(StrategyTerm term, + StrategyNumber maxStrategy); + #endif /* ---------------------------------------------------------------- - * misc strategy support routines + * misc strategy support routines * ---------------------------------------------------------------- */ - -/* - * StrategyNumberIsValid - * StrategyNumberIsInBounds - * StrategyMapIsValid - * StrategyTransformMapIsValid - * IndexStrategyIsValid + +/* + * StrategyNumberIsValid + * StrategyNumberIsInBounds + * StrategyMapIsValid + * StrategyTransformMapIsValid + * IndexStrategyIsValid * - * ... are now macros in istrat.h -cim 4/27/91 + * ... are now macros in istrat.h -cim 4/27/91 */ - + /* * StrategyMapGetScanKeyEntry -- - * Returns a scan key entry of a index strategy mapping member. + * Returns a scan key entry of a index strategy mapping member. * * Note: - * Assumes that the index strategy mapping is valid. - * Assumes that the index strategy number is valid. - * Bounds checking should be done outside this routine. + * Assumes that the index strategy mapping is valid. + * Assumes that the index strategy number is valid. + * Bounds checking should be done outside this routine. */ -static ScanKey +static ScanKey StrategyMapGetScanKeyEntry(StrategyMap map, - StrategyNumber strategyNumber) + StrategyNumber strategyNumber) { - Assert(StrategyMapIsValid(map)); - Assert(StrategyNumberIsValid(strategyNumber)); - return (&map->entry[strategyNumber - 1]); + Assert(StrategyMapIsValid(map)); + Assert(StrategyNumberIsValid(strategyNumber)); + return (&map->entry[strategyNumber - 1]); } /* * IndexStrategyGetStrategyMap -- - * Returns an index strategy mapping of an index strategy. + * Returns an index strategy mapping of an index strategy. * * Note: - * Assumes that the index strategy is valid. - * Assumes that the number of index strategies is valid. - * Bounds checking should be done outside this routine. + * Assumes that the index strategy is valid. + * Assumes that the number of index strategies is valid. + * Bounds checking should be done outside this routine. */ StrategyMap IndexStrategyGetStrategyMap(IndexStrategy indexStrategy, - StrategyNumber maxStrategyNum, - AttrNumber attrNum) + StrategyNumber maxStrategyNum, + AttrNumber attrNum) { - Assert(IndexStrategyIsValid(indexStrategy)); - Assert(StrategyNumberIsValid(maxStrategyNum)); - Assert(AttributeNumberIsValid(attrNum)); - - maxStrategyNum = AMStrategies(maxStrategyNum); /* XXX */ - return - &indexStrategy->strategyMapData[maxStrategyNum * (attrNum - 1)]; + Assert(IndexStrategyIsValid(indexStrategy)); + Assert(StrategyNumberIsValid(maxStrategyNum)); + Assert(AttributeNumberIsValid(attrNum)); + + maxStrategyNum = AMStrategies(maxStrategyNum); /* XXX */ + return + &indexStrategy->strategyMapData[maxStrategyNum * (attrNum - 1)]; } /* * AttributeNumberGetIndexStrategySize -- - * Computes the size of an index strategy. + * Computes the size of an index strategy. */ Size AttributeNumberGetIndexStrategySize(AttrNumber maxAttributeNumber, - StrategyNumber maxStrategyNumber) + StrategyNumber maxStrategyNumber) { - maxStrategyNumber = AMStrategies(maxStrategyNumber); /* XXX */ - return - maxAttributeNumber * maxStrategyNumber * sizeof (ScanKeyData); + maxStrategyNumber = AMStrategies(maxStrategyNumber); /* XXX */ + return + maxAttributeNumber * maxStrategyNumber * sizeof(ScanKeyData); } -#ifndef NO_ASSERT_CHECKING -/* +#ifndef NO_ASSERT_CHECKING +/* * StrategyTransformMapIsValid is now a macro in istrat.h -cim 4/27/91 */ /* ---------------- - * StrategyOperatorIsValid + * StrategyOperatorIsValid * ---------------- */ -static bool +static bool StrategyOperatorIsValid(StrategyOperator operator, - StrategyNumber maxStrategy) + StrategyNumber maxStrategy) { - return (bool) + return (bool) (PointerIsValid(operator) && StrategyNumberIsInBounds(operator->strategy, maxStrategy) && !(operator->flags & ~(SK_NEGATE | SK_COMMUTE))); } /* ---------------- - * StrategyTermIsValid + * StrategyTermIsValid * ---------------- */ -static bool +static bool StrategyTermIsValid(StrategyTerm term, - StrategyNumber maxStrategy) + StrategyNumber maxStrategy) { - Index index; - - if (! PointerIsValid(term) || term->degree == 0) - return false; - - for (index = 0; index < term->degree; index += 1) { - if (! StrategyOperatorIsValid(&term->operatorData[index], - maxStrategy)) { - - return false; + Index index; + + if (!PointerIsValid(term) || term->degree == 0) + return false; + + for (index = 0; index < term->degree; index += 1) + { + if (!StrategyOperatorIsValid(&term->operatorData[index], + maxStrategy)) + { + + return false; + } } - } - - return true; + + return true; } /* ---------------- - * StrategyExpressionIsValid + * StrategyExpressionIsValid * ---------------- */ -static bool +static bool StrategyExpressionIsValid(StrategyExpression expression, - StrategyNumber maxStrategy) + StrategyNumber maxStrategy) { - StrategyTerm *termP; - - if (!PointerIsValid(expression)) - return true; - - if (!StrategyTermIsValid(expression->term[0], maxStrategy)) - return false; - - termP = &expression->term[1]; - while (StrategyTermIsValid(*termP, maxStrategy)) - termP += 1; - - return (bool) - (! PointerIsValid(*termP)); + StrategyTerm *termP; + + if (!PointerIsValid(expression)) + return true; + + if (!StrategyTermIsValid(expression->term[0], maxStrategy)) + return false; + + termP = &expression->term[1]; + while (StrategyTermIsValid(*termP, maxStrategy)) + termP += 1; + + return (bool) + (!PointerIsValid(*termP)); } /* ---------------- - * StrategyEvaluationIsValid + * StrategyEvaluationIsValid * ---------------- */ -static bool +static bool StrategyEvaluationIsValid(StrategyEvaluation evaluation) { - Index index; - - if (! PointerIsValid(evaluation) || - ! StrategyNumberIsValid(evaluation->maxStrategy) || - ! StrategyTransformMapIsValid(evaluation->negateTransform) || - ! StrategyTransformMapIsValid(evaluation->commuteTransform) || - ! StrategyTransformMapIsValid(evaluation->negateCommuteTransform)) { - - return false; - } - - for (index = 0; index < evaluation->maxStrategy; index += 1) { - if (! StrategyExpressionIsValid(evaluation->expression[index], - evaluation->maxStrategy)) { - - return false; + Index index; + + if (!PointerIsValid(evaluation) || + !StrategyNumberIsValid(evaluation->maxStrategy) || + !StrategyTransformMapIsValid(evaluation->negateTransform) || + !StrategyTransformMapIsValid(evaluation->commuteTransform) || + !StrategyTransformMapIsValid(evaluation->negateCommuteTransform)) + { + + return false; } - } - return true; + + for (index = 0; index < evaluation->maxStrategy; index += 1) + { + if (!StrategyExpressionIsValid(evaluation->expression[index], + evaluation->maxStrategy)) + { + + return false; + } + } + return true; } + #endif /* ---------------- - * StrategyTermEvaluate + * StrategyTermEvaluate * ---------------- */ -static bool +static bool StrategyTermEvaluate(StrategyTerm term, - StrategyMap map, - Datum left, - Datum right) + StrategyMap map, + Datum left, + Datum right) { - Index index; - long tmpres = 0; - bool result = 0; - StrategyOperator operator; - ScanKey entry; - - for (index = 0, operator = &term->operatorData[0]; - index < term->degree; index += 1, operator += 1) { - - entry = &map->entry[operator->strategy - 1]; - - Assert(RegProcedureIsValid(entry->sk_procedure)); - - switch (operator->flags ^ entry->sk_flags) { - case 0x0: - tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure, - left, right); - break; - - case SK_NEGATE: - tmpres = (long) !FMGR_PTR2(entry->sk_func, entry->sk_procedure, - left, right); - break; - - case SK_COMMUTE: - tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure, - right, left); - break; - - case SK_NEGATE | SK_COMMUTE: - tmpres = (long) !FMGR_PTR2(entry->sk_func, entry->sk_procedure, - right, left); - break; - - default: - elog(FATAL, "StrategyTermEvaluate: impossible case %d", - operator->flags ^ entry->sk_flags); + Index index; + long tmpres = 0; + bool result = 0; + StrategyOperator operator; + ScanKey entry; + + for (index = 0, operator = &term->operatorData[0]; + index < term->degree; index += 1, operator += 1) + { + + entry = &map->entry[operator->strategy - 1]; + + Assert(RegProcedureIsValid(entry->sk_procedure)); + + switch (operator->flags ^ entry->sk_flags) + { + case 0x0: + tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure, + left, right); + break; + + case SK_NEGATE: + tmpres = (long) !FMGR_PTR2(entry->sk_func, entry->sk_procedure, + left, right); + break; + + case SK_COMMUTE: + tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure, + right, left); + break; + + case SK_NEGATE | SK_COMMUTE: + tmpres = (long) !FMGR_PTR2(entry->sk_func, entry->sk_procedure, + right, left); + break; + + default: + elog(FATAL, "StrategyTermEvaluate: impossible case %d", + operator->flags ^ entry->sk_flags); + } + + result = (bool) tmpres; + if (!result) + return result; } - - result = (bool) tmpres; - if (!result) - return result; - } - - return result; + + return result; } /* ---------------- - * RelationGetStrategy + * RelationGetStrategy * ---------------- */ StrategyNumber RelationGetStrategy(Relation relation, - AttrNumber attributeNumber, - StrategyEvaluation evaluation, - RegProcedure procedure) + AttrNumber attributeNumber, + StrategyEvaluation evaluation, + RegProcedure procedure) { - StrategyNumber strategy; - StrategyMap strategyMap; - ScanKey entry; - Index index; - int numattrs; - - Assert(RelationIsValid(relation)); - numattrs = RelationGetNumberOfAttributes(relation); - - Assert(relation->rd_rel->relkind == RELKIND_INDEX); /* XXX use accessor */ - Assert(AttributeNumberIsValid(attributeNumber)); - Assert( (attributeNumber >= 1) && (attributeNumber < 1 + numattrs)); - - Assert(StrategyEvaluationIsValid(evaluation)); - Assert(RegProcedureIsValid(procedure)); - - strategyMap = - IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation), - evaluation->maxStrategy, - attributeNumber); - - /* get a strategy number for the procedure ignoring flags for now */ - for (index = 0; index < evaluation->maxStrategy; index += 1) { - if (strategyMap->entry[index].sk_procedure == procedure) { - break; + StrategyNumber strategy; + StrategyMap strategyMap; + ScanKey entry; + Index index; + int numattrs; + + Assert(RelationIsValid(relation)); + numattrs = RelationGetNumberOfAttributes(relation); + + Assert(relation->rd_rel->relkind == RELKIND_INDEX); /* XXX use accessor */ + Assert(AttributeNumberIsValid(attributeNumber)); + Assert((attributeNumber >= 1) && (attributeNumber < 1 + numattrs)); + + Assert(StrategyEvaluationIsValid(evaluation)); + Assert(RegProcedureIsValid(procedure)); + + strategyMap = + IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation), + evaluation->maxStrategy, + attributeNumber); + + /* get a strategy number for the procedure ignoring flags for now */ + for (index = 0; index < evaluation->maxStrategy; index += 1) + { + if (strategyMap->entry[index].sk_procedure == procedure) + { + break; + } } - } - - if (index == evaluation->maxStrategy) - return InvalidStrategy; - - strategy = 1 + index; - entry = StrategyMapGetScanKeyEntry(strategyMap, strategy); - - Assert(!(entry->sk_flags & ~(SK_NEGATE | SK_COMMUTE))); - - switch (entry->sk_flags & (SK_NEGATE | SK_COMMUTE)) { - case 0x0: - return strategy; - - case SK_NEGATE: - strategy = evaluation->negateTransform->strategy[strategy - 1]; - break; - - case SK_COMMUTE: - strategy = evaluation->commuteTransform->strategy[strategy - 1]; - break; - - case SK_NEGATE | SK_COMMUTE: - strategy = evaluation->negateCommuteTransform->strategy[strategy - 1]; - break; - - default: - elog(FATAL, "RelationGetStrategy: impossible case %d", entry->sk_flags); - } - - - if (! StrategyNumberIsInBounds(strategy, evaluation->maxStrategy)) { - if (! StrategyNumberIsValid(strategy)) { - elog(WARN, "RelationGetStrategy: corrupted evaluation"); + + if (index == evaluation->maxStrategy) + return InvalidStrategy; + + strategy = 1 + index; + entry = StrategyMapGetScanKeyEntry(strategyMap, strategy); + + Assert(!(entry->sk_flags & ~(SK_NEGATE | SK_COMMUTE))); + + switch (entry->sk_flags & (SK_NEGATE | SK_COMMUTE)) + { + case 0x0: + return strategy; + + case SK_NEGATE: + strategy = evaluation->negateTransform->strategy[strategy - 1]; + break; + + case SK_COMMUTE: + strategy = evaluation->commuteTransform->strategy[strategy - 1]; + break; + + case SK_NEGATE | SK_COMMUTE: + strategy = evaluation->negateCommuteTransform->strategy[strategy - 1]; + break; + + default: + elog(FATAL, "RelationGetStrategy: impossible case %d", entry->sk_flags); } - } - - return strategy; + + + if (!StrategyNumberIsInBounds(strategy, evaluation->maxStrategy)) + { + if (!StrategyNumberIsValid(strategy)) + { + elog(WARN, "RelationGetStrategy: corrupted evaluation"); + } + } + + return strategy; } /* ---------------- - * RelationInvokeStrategy + * RelationInvokeStrategy * ---------------- */ -bool /* XXX someday, this may return Datum */ +bool /* XXX someday, this may return Datum */ RelationInvokeStrategy(Relation relation, - StrategyEvaluation evaluation, - AttrNumber attributeNumber, - StrategyNumber strategy, - Datum left, - Datum right) + StrategyEvaluation evaluation, + AttrNumber attributeNumber, + StrategyNumber strategy, + Datum left, + Datum right) { - StrategyNumber newStrategy; - StrategyMap strategyMap; - ScanKey entry; - StrategyTermData termData; - int numattrs; - - Assert(RelationIsValid(relation)); - Assert(relation->rd_rel->relkind == RELKIND_INDEX); /* XXX use accessor */ - numattrs = RelationGetNumberOfAttributes(relation); - - Assert(StrategyEvaluationIsValid(evaluation)); - Assert(AttributeNumberIsValid(attributeNumber)); - Assert( (attributeNumber >= 1) && (attributeNumber < 1 + numattrs)); - - Assert(StrategyNumberIsInBounds(strategy, evaluation->maxStrategy)); - - termData.degree = 1; - - strategyMap = - IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation), - evaluation->maxStrategy, - attributeNumber); - - entry = StrategyMapGetScanKeyEntry(strategyMap, strategy); - - if (RegProcedureIsValid(entry->sk_procedure)) { - termData.operatorData[0].strategy = strategy; - termData.operatorData[0].flags = 0x0; - - return - StrategyTermEvaluate(&termData, strategyMap, left, right); - } - - - newStrategy = evaluation->negateTransform->strategy[strategy - 1]; - if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) { - - entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy); - - if (RegProcedureIsValid(entry->sk_procedure)) { - termData.operatorData[0].strategy = newStrategy; - termData.operatorData[0].flags = SK_NEGATE; - - return - StrategyTermEvaluate(&termData, strategyMap, left, right); + StrategyNumber newStrategy; + StrategyMap strategyMap; + ScanKey entry; + StrategyTermData termData; + int numattrs; + + Assert(RelationIsValid(relation)); + Assert(relation->rd_rel->relkind == RELKIND_INDEX); /* XXX use accessor */ + numattrs = RelationGetNumberOfAttributes(relation); + + Assert(StrategyEvaluationIsValid(evaluation)); + Assert(AttributeNumberIsValid(attributeNumber)); + Assert((attributeNumber >= 1) && (attributeNumber < 1 + numattrs)); + + Assert(StrategyNumberIsInBounds(strategy, evaluation->maxStrategy)); + + termData.degree = 1; + + strategyMap = + IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation), + evaluation->maxStrategy, + attributeNumber); + + entry = StrategyMapGetScanKeyEntry(strategyMap, strategy); + + if (RegProcedureIsValid(entry->sk_procedure)) + { + termData.operatorData[0].strategy = strategy; + termData.operatorData[0].flags = 0x0; + + return + StrategyTermEvaluate(&termData, strategyMap, left, right); } - } - - newStrategy = evaluation->commuteTransform->strategy[strategy - 1]; - if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) { - - entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy); - - if (RegProcedureIsValid(entry->sk_procedure)) { - termData.operatorData[0].strategy = newStrategy; - termData.operatorData[0].flags = SK_COMMUTE; - - return - StrategyTermEvaluate(&termData, strategyMap, left, right); + + + newStrategy = evaluation->negateTransform->strategy[strategy - 1]; + if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) + { + + entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy); + + if (RegProcedureIsValid(entry->sk_procedure)) + { + termData.operatorData[0].strategy = newStrategy; + termData.operatorData[0].flags = SK_NEGATE; + + return + StrategyTermEvaluate(&termData, strategyMap, left, right); + } } - } - - newStrategy = evaluation->negateCommuteTransform->strategy[strategy - 1]; - if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) { - - entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy); - - if (RegProcedureIsValid(entry->sk_procedure)) { - termData.operatorData[0].strategy = newStrategy; - termData.operatorData[0].flags = SK_NEGATE | SK_COMMUTE; - - return - StrategyTermEvaluate(&termData, strategyMap, left, right); + + newStrategy = evaluation->commuteTransform->strategy[strategy - 1]; + if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) + { + + entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy); + + if (RegProcedureIsValid(entry->sk_procedure)) + { + termData.operatorData[0].strategy = newStrategy; + termData.operatorData[0].flags = SK_COMMUTE; + + return + StrategyTermEvaluate(&termData, strategyMap, left, right); + } } - } - - if (PointerIsValid(evaluation->expression[strategy - 1])) { - StrategyTerm *termP; - - termP = &evaluation->expression[strategy - 1]->term[0]; - while (PointerIsValid(*termP)) { - Index index; - - for (index = 0; index < (*termP)->degree; index += 1) { - entry = StrategyMapGetScanKeyEntry(strategyMap, - (*termP)->operatorData[index].strategy); - - if (! RegProcedureIsValid(entry->sk_procedure)) { - break; + + newStrategy = evaluation->negateCommuteTransform->strategy[strategy - 1]; + if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) + { + + entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy); + + if (RegProcedureIsValid(entry->sk_procedure)) + { + termData.operatorData[0].strategy = newStrategy; + termData.operatorData[0].flags = SK_NEGATE | SK_COMMUTE; + + return + StrategyTermEvaluate(&termData, strategyMap, left, right); } - } - - if (index == (*termP)->degree) { - return - StrategyTermEvaluate(*termP, strategyMap, left, right); - } - - termP += 1; } - } - - elog(WARN, "RelationInvokeStrategy: cannot evaluate strategy %d", - strategy); - /* not reached, just to make compiler happy */ - return FALSE; + if (PointerIsValid(evaluation->expression[strategy - 1])) + { + StrategyTerm *termP; + + termP = &evaluation->expression[strategy - 1]->term[0]; + while (PointerIsValid(*termP)) + { + Index index; + + for (index = 0; index < (*termP)->degree; index += 1) + { + entry = StrategyMapGetScanKeyEntry(strategyMap, + (*termP)->operatorData[index].strategy); + + if (!RegProcedureIsValid(entry->sk_procedure)) + { + break; + } + } + + if (index == (*termP)->degree) + { + return + StrategyTermEvaluate(*termP, strategyMap, left, right); + } + + termP += 1; + } + } + + elog(WARN, "RelationInvokeStrategy: cannot evaluate strategy %d", + strategy); + + /* not reached, just to make compiler happy */ + return FALSE; } /* ---------------- - * OperatorRelationFillScanKeyEntry + * OperatorRelationFillScanKeyEntry * ---------------- */ static void OperatorRelationFillScanKeyEntry(Relation operatorRelation, - Oid operatorObjectId, - ScanKey entry) + Oid operatorObjectId, + ScanKey entry) { - HeapScanDesc scan; - ScanKeyData scanKeyData; - HeapTuple tuple; - - ScanKeyEntryInitialize(&scanKeyData, 0, - ObjectIdAttributeNumber, - ObjectIdEqualRegProcedure, - ObjectIdGetDatum(operatorObjectId)); - - scan = heap_beginscan(operatorRelation, false, NowTimeQual, - 1, &scanKeyData); - - tuple = heap_getnext(scan, false, (Buffer *)NULL); - if (! HeapTupleIsValid(tuple)) { - elog(WARN, "OperatorObjectIdFillScanKeyEntry: unknown operator %lu", - (uint32) operatorObjectId); - } - - entry->sk_flags = 0; - entry->sk_procedure = - ((OperatorTupleForm) GETSTRUCT(tuple))->oprcode; - fmgr_info(entry->sk_procedure, &entry->sk_func, &entry->sk_nargs); - - if (! RegProcedureIsValid(entry->sk_procedure)) { - elog(WARN, - "OperatorObjectIdFillScanKeyEntry: no procedure for operator %lu", - (uint32) operatorObjectId); - } - - heap_endscan(scan); + HeapScanDesc scan; + ScanKeyData scanKeyData; + HeapTuple tuple; + + ScanKeyEntryInitialize(&scanKeyData, 0, + ObjectIdAttributeNumber, + ObjectIdEqualRegProcedure, + ObjectIdGetDatum(operatorObjectId)); + + scan = heap_beginscan(operatorRelation, false, NowTimeQual, + 1, &scanKeyData); + + tuple = heap_getnext(scan, false, (Buffer *) NULL); + if (!HeapTupleIsValid(tuple)) + { + elog(WARN, "OperatorObjectIdFillScanKeyEntry: unknown operator %lu", + (uint32) operatorObjectId); + } + + entry->sk_flags = 0; + entry->sk_procedure = + ((OperatorTupleForm) GETSTRUCT(tuple))->oprcode; + fmgr_info(entry->sk_procedure, &entry->sk_func, &entry->sk_nargs); + + if (!RegProcedureIsValid(entry->sk_procedure)) + { + elog(WARN, + "OperatorObjectIdFillScanKeyEntry: no procedure for operator %lu", + (uint32) operatorObjectId); + } + + heap_endscan(scan); } /* * IndexSupportInitialize -- - * Initializes an index strategy and associated support procedures. + * Initializes an index strategy and associated support procedures. */ void IndexSupportInitialize(IndexStrategy indexStrategy, - RegProcedure *indexSupport, - Oid indexObjectId, - Oid accessMethodObjectId, - StrategyNumber maxStrategyNumber, - StrategyNumber maxSupportNumber, - AttrNumber maxAttributeNumber) + RegProcedure * indexSupport, + Oid indexObjectId, + Oid accessMethodObjectId, + StrategyNumber maxStrategyNumber, + StrategyNumber maxSupportNumber, + AttrNumber maxAttributeNumber) { - Relation relation; - Relation operatorRelation; - HeapScanDesc scan; - HeapTuple tuple; - ScanKeyData entry[2]; - StrategyMap map; - AttrNumber attributeNumber; - int attributeIndex; - Oid operatorClassObjectId[ MaxIndexAttributeNumber ]; - - maxStrategyNumber = AMStrategies(maxStrategyNumber); - - ScanKeyEntryInitialize(&entry[0], 0, Anum_pg_index_indexrelid, - ObjectIdEqualRegProcedure, - ObjectIdGetDatum(indexObjectId)); - - relation = heap_openr(IndexRelationName); - scan = heap_beginscan(relation, false, NowTimeQual, 1, entry); - tuple = heap_getnext(scan, 0, (Buffer *)NULL); - if (! HeapTupleIsValid(tuple)) - elog(WARN, "IndexSupportInitialize: corrupted catalogs"); - - /* - * XXX note that the following assumes the INDEX tuple is well formed and - * that the key[] and class[] are 0 terminated. - */ - for (attributeIndex=0; attributeIndex<maxAttributeNumber; attributeIndex++) + Relation relation; + Relation operatorRelation; + HeapScanDesc scan; + HeapTuple tuple; + ScanKeyData entry[2]; + StrategyMap map; + AttrNumber attributeNumber; + int attributeIndex; + Oid operatorClassObjectId[MaxIndexAttributeNumber]; + + maxStrategyNumber = AMStrategies(maxStrategyNumber); + + ScanKeyEntryInitialize(&entry[0], 0, Anum_pg_index_indexrelid, + ObjectIdEqualRegProcedure, + ObjectIdGetDatum(indexObjectId)); + + relation = heap_openr(IndexRelationName); + scan = heap_beginscan(relation, false, NowTimeQual, 1, entry); + tuple = heap_getnext(scan, 0, (Buffer *) NULL); + if (!HeapTupleIsValid(tuple)) + elog(WARN, "IndexSupportInitialize: corrupted catalogs"); + + /* + * XXX note that the following assumes the INDEX tuple is well formed + * and that the key[] and class[] are 0 terminated. + */ + for (attributeIndex = 0; attributeIndex < maxAttributeNumber; attributeIndex++) { - IndexTupleForm iform; - - iform = (IndexTupleForm) GETSTRUCT(tuple); - - if (!OidIsValid(iform->indkey[attributeIndex])) { - if (attributeIndex == 0) { - elog(WARN, "IndexSupportInitialize: no pg_index tuple"); + IndexTupleForm iform; + + iform = (IndexTupleForm) GETSTRUCT(tuple); + + if (!OidIsValid(iform->indkey[attributeIndex])) + { + if (attributeIndex == 0) + { + elog(WARN, "IndexSupportInitialize: no pg_index tuple"); + } + break; } - break; - } - - operatorClassObjectId[attributeIndex] - = iform->indclass[attributeIndex]; + + operatorClassObjectId[attributeIndex] + = iform->indclass[attributeIndex]; } - - heap_endscan(scan); - heap_close(relation); - - /* if support routines exist for this access method, load them */ - if (maxSupportNumber > 0) { - - ScanKeyEntryInitialize(&entry[0], 0, Anum_pg_amproc_amid, - ObjectIdEqualRegProcedure, - ObjectIdGetDatum(accessMethodObjectId)); - - ScanKeyEntryInitialize(&entry[1], 0, Anum_pg_amproc_amopclaid, - ObjectIdEqualRegProcedure, 0); - -/* relation = heap_openr(Name_pg_amproc); */ - relation = heap_openr(AccessMethodProcedureRelationName); - - + + heap_endscan(scan); + heap_close(relation); + + /* if support routines exist for this access method, load them */ + if (maxSupportNumber > 0) + { + + ScanKeyEntryInitialize(&entry[0], 0, Anum_pg_amproc_amid, + ObjectIdEqualRegProcedure, + ObjectIdGetDatum(accessMethodObjectId)); + + ScanKeyEntryInitialize(&entry[1], 0, Anum_pg_amproc_amopclaid, + ObjectIdEqualRegProcedure, 0); + +/* relation = heap_openr(Name_pg_amproc); */ + relation = heap_openr(AccessMethodProcedureRelationName); + + + for (attributeNumber = maxAttributeNumber; attributeNumber > 0; + attributeNumber--) + { + + int16 support; + Form_pg_amproc form; + RegProcedure *loc; + + loc = &indexSupport[((attributeNumber - 1) * maxSupportNumber)]; + + for (support = maxSupportNumber; --support >= 0;) + { + loc[support] = InvalidOid; + } + + entry[1].sk_argument = + ObjectIdGetDatum(operatorClassObjectId[attributeNumber - 1]); + + scan = heap_beginscan(relation, false, NowTimeQual, 2, entry); + + while (tuple = heap_getnext(scan, 0, (Buffer *) NULL), + HeapTupleIsValid(tuple)) + { + + form = (Form_pg_amproc) GETSTRUCT(tuple); + loc[(form->amprocnum - 1)] = form->amproc; + } + + heap_endscan(scan); + } + heap_close(relation); + } + + ScanKeyEntryInitialize(&entry[0], 0, + Anum_pg_amop_amopid, + ObjectIdEqualRegProcedure, + ObjectIdGetDatum(accessMethodObjectId)); + + ScanKeyEntryInitialize(&entry[1], 0, + Anum_pg_amop_amopclaid, + ObjectIdEqualRegProcedure, 0); + + relation = heap_openr(AccessMethodOperatorRelationName); + operatorRelation = heap_openr(OperatorRelationName); + for (attributeNumber = maxAttributeNumber; attributeNumber > 0; - attributeNumber--) { - - int16 support; - Form_pg_amproc form; - RegProcedure *loc; - - loc = &indexSupport[((attributeNumber - 1) * maxSupportNumber)]; - - for (support = maxSupportNumber; --support >= 0; ) { - loc[support] = InvalidOid; - } - - entry[1].sk_argument = - ObjectIdGetDatum(operatorClassObjectId[attributeNumber - 1]); - - scan = heap_beginscan(relation, false, NowTimeQual, 2, entry); - - while (tuple = heap_getnext(scan, 0, (Buffer *)NULL), - HeapTupleIsValid(tuple)) { - - form = (Form_pg_amproc) GETSTRUCT(tuple); - loc[(form->amprocnum - 1)] = form->amproc; - } - - heap_endscan(scan); + attributeNumber--) + { + + StrategyNumber strategy; + + entry[1].sk_argument = + ObjectIdGetDatum(operatorClassObjectId[attributeNumber - 1]); + + map = IndexStrategyGetStrategyMap(indexStrategy, + maxStrategyNumber, + attributeNumber); + + for (strategy = 1; strategy <= maxStrategyNumber; strategy++) + ScanKeyEntrySetIllegal(StrategyMapGetScanKeyEntry(map, strategy)); + + scan = heap_beginscan(relation, false, NowTimeQual, 2, entry); + + while (tuple = heap_getnext(scan, 0, (Buffer *) NULL), + HeapTupleIsValid(tuple)) + { + Form_pg_amop form; + + form = (Form_pg_amop) GETSTRUCT(tuple); + + OperatorRelationFillScanKeyEntry(operatorRelation, + form->amopopr, + StrategyMapGetScanKeyEntry(map, form->amopstrategy)); + } + + heap_endscan(scan); } + + heap_close(operatorRelation); heap_close(relation); - } - - ScanKeyEntryInitialize(&entry[0], 0, - Anum_pg_amop_amopid, - ObjectIdEqualRegProcedure, - ObjectIdGetDatum(accessMethodObjectId)); - - ScanKeyEntryInitialize(&entry[1], 0, - Anum_pg_amop_amopclaid, - ObjectIdEqualRegProcedure, 0); - - relation = heap_openr(AccessMethodOperatorRelationName); - operatorRelation = heap_openr(OperatorRelationName); - - for (attributeNumber = maxAttributeNumber; attributeNumber > 0; - attributeNumber--) { - - StrategyNumber strategy; - - entry[1].sk_argument = - ObjectIdGetDatum(operatorClassObjectId[attributeNumber - 1]); - - map = IndexStrategyGetStrategyMap(indexStrategy, - maxStrategyNumber, - attributeNumber); - - for (strategy = 1; strategy <= maxStrategyNumber; strategy++) - ScanKeyEntrySetIllegal(StrategyMapGetScanKeyEntry(map, strategy)); - - scan = heap_beginscan(relation, false, NowTimeQual, 2, entry); - - while (tuple = heap_getnext(scan, 0, (Buffer *)NULL), - HeapTupleIsValid(tuple)) { - Form_pg_amop form; - - form = (Form_pg_amop) GETSTRUCT(tuple); - - OperatorRelationFillScanKeyEntry(operatorRelation, - form->amopopr, - StrategyMapGetScanKeyEntry(map, form->amopstrategy)); - } - - heap_endscan(scan); - } - - heap_close(operatorRelation); - heap_close(relation); } /* ---------------- - * IndexStrategyDisplay + * IndexStrategyDisplay * ---------------- */ #ifdef ISTRATDEBUG int IndexStrategyDisplay(IndexStrategy indexStrategy, - StrategyNumber numberOfStrategies, - int numberOfAttributes) + StrategyNumber numberOfStrategies, + int numberOfAttributes) { - StrategyMap strategyMap; - AttrNumber attributeNumber; - StrategyNumber strategyNumber; - - for (attributeNumber = 1; attributeNumber <= numberOfAttributes; - attributeNumber += 1) { - - strategyMap = IndexStrategyGetStrategyMap(indexStrategy, - numberOfStrategies, - attributeNumber); - - for (strategyNumber = 1; - strategyNumber <= AMStrategies(numberOfStrategies); - strategyNumber += 1) { - - printf(":att %d\t:str %d\t:opr 0x%x(%d)\n", - attributeNumber, strategyNumber, - strategyMap->entry[strategyNumber - 1].sk_procedure, - strategyMap->entry[strategyNumber - 1].sk_procedure); + StrategyMap strategyMap; + AttrNumber attributeNumber; + StrategyNumber strategyNumber; + + for (attributeNumber = 1; attributeNumber <= numberOfAttributes; + attributeNumber += 1) + { + + strategyMap = IndexStrategyGetStrategyMap(indexStrategy, + numberOfStrategies, + attributeNumber); + + for (strategyNumber = 1; + strategyNumber <= AMStrategies(numberOfStrategies); + strategyNumber += 1) + { + + printf(":att %d\t:str %d\t:opr 0x%x(%d)\n", + attributeNumber, strategyNumber, + strategyMap->entry[strategyNumber - 1].sk_procedure, + strategyMap->entry[strategyNumber - 1].sk_procedure); + } } - } } -#endif /* defined(ISTRATDEBUG) */ - +#endif /* defined(ISTRATDEBUG) */ diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c index f005509be0..0312bbb69d 100644 --- a/src/backend/access/nbtree/nbtcompare.c +++ b/src/backend/access/nbtree/nbtcompare.c @@ -1,22 +1,22 @@ /*------------------------------------------------------------------------- * * nbtcompare.c-- - * Comparison functions for btree access method. + * Comparison functions for btree access method. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.10 1997/06/11 05:20:05 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.11 1997/09/07 04:38:39 momjian Exp $ * - * NOTES - * These functions are stored in pg_amproc. For each operator class - * defined on btrees, they compute + * NOTES + * These functions are stored in pg_amproc. For each operator class + * defined on btrees, they compute * - * compare(a, b): - * < 0 if a < b, - * = 0 if a == b, - * > 0 if a > b. + * compare(a, b): + * < 0 if a < b, + * = 0 if a == b, + * > 0 if a > b. *------------------------------------------------------------------------- */ @@ -30,168 +30,171 @@ int32 btint2cmp(int16 a, int16 b) { - return ((int32) (a - b)); + return ((int32) (a - b)); } int32 btint4cmp(int32 a, int32 b) { - return (a - b); + return (a - b); } int32 btint24cmp(int16 a, int32 b) { - return (((int32) a) - b); + return (((int32) a) - b); } int32 btint42cmp(int32 a, int16 b) { - return (a - ((int32) b)); + return (a - ((int32) b)); } int32 btfloat4cmp(float32 a, float32 b) { - if (*a > *b) - return (1); - else if (*a == *b) - return (0); - else - return (-1); + if (*a > *b) + return (1); + else if (*a == *b) + return (0); + else + return (-1); } int32 btfloat8cmp(float64 a, float64 b) { - if (*a > *b) - return (1); - else if (*a == *b) - return (0); - else - return (-1); + if (*a > *b) + return (1); + else if (*a == *b) + return (0); + else + return (-1); } int32 btoidcmp(Oid a, Oid b) { - if (a > b) - return (1); - else if (a == b) - return (0); - else - return (-1); + if (a > b) + return (1); + else if (a == b) + return (0); + else + return (-1); } int32 btabstimecmp(AbsoluteTime a, AbsoluteTime b) { - if (AbsoluteTimeIsBefore(a, b)) - return (-1); - else if (AbsoluteTimeIsBefore(b, a)) - return (1); - else - return (0); + if (AbsoluteTimeIsBefore(a, b)) + return (-1); + else if (AbsoluteTimeIsBefore(b, a)) + return (1); + else + return (0); } int32 btcharcmp(char a, char b) { - return ((int32) ((uint8)a - (uint8)b)); + return ((int32) ((uint8) a - (uint8) b)); } int32 btchar2cmp(uint16 a, uint16 b) { - return (strncmp((char *) &a, (char *) &b, 2)); + return (strncmp((char *) &a, (char *) &b, 2)); } int32 btchar4cmp(uint32 a, uint32 b) { - return (strncmp((char *) &a, (char *) &b, 4)); + return (strncmp((char *) &a, (char *) &b, 4)); } int32 btchar8cmp(char *a, char *b) { - return (strncmp(a, b, 8)); + return (strncmp(a, b, 8)); } int32 btchar16cmp(char *a, char *b) { - return (strncmp(a, b, 16)); + return (strncmp(a, b, 16)); } int32 -btnamecmp(NameData *a, NameData *b) +btnamecmp(NameData * a, NameData * b) { - return (strncmp(a->data, b->data, NAMEDATALEN)); + return (strncmp(a->data, b->data, NAMEDATALEN)); } int32 -bttextcmp(struct varlena *a, struct varlena *b) +bttextcmp(struct varlena * a, struct varlena * b) { - int res; - unsigned char *ap, *bp; + int res; + unsigned char *ap, + *bp; #ifdef USE_LOCALE - int la = VARSIZE(a) - VARHDRSZ; - int lb = VARSIZE(b) - VARHDRSZ; - - ap = (unsigned char *) palloc (la + 1); - bp = (unsigned char *) palloc (lb + 1); - - memcpy(ap, VARDATA(a), la); - *(ap + la) = '\0'; - memcpy(bp, VARDATA(b), lb); - *(bp + lb) = '\0'; - - res = strcoll (ap, bp); - - pfree (ap); - pfree (bp); + int la = VARSIZE(a) - VARHDRSZ; + int lb = VARSIZE(b) - VARHDRSZ; + + ap = (unsigned char *) palloc(la + 1); + bp = (unsigned char *) palloc(lb + 1); + + memcpy(ap, VARDATA(a), la); + *(ap + la) = '\0'; + memcpy(bp, VARDATA(b), lb); + *(bp + lb) = '\0'; + + res = strcoll(ap, bp); + + pfree(ap); + pfree(bp); #else - int len = VARSIZE(a); - - /* len is the length of the shorter of the two strings */ - if ( len > VARSIZE(b) ) - len = VARSIZE(b); - - len -= VARHDRSZ; - - ap = (unsigned char *) VARDATA(a); - bp = (unsigned char *) VARDATA(b); - - /* - * If the two strings differ in the first len bytes, or if they're - * the same in the first len bytes and they're both len bytes long, - * we're done. - */ - - res = 0; - if (len > 0) { - do { - res = (int) (*ap++ - *bp++); - len--; - } while (res == 0 && len != 0); - } + int len = VARSIZE(a); + + /* len is the length of the shorter of the two strings */ + if (len > VARSIZE(b)) + len = VARSIZE(b); + + len -= VARHDRSZ; + + ap = (unsigned char *) VARDATA(a); + bp = (unsigned char *) VARDATA(b); + + /* + * If the two strings differ in the first len bytes, or if they're the + * same in the first len bytes and they're both len bytes long, we're + * done. + */ + + res = 0; + if (len > 0) + { + do + { + res = (int) (*ap++ - *bp++); + len--; + } while (res == 0 && len != 0); + } #endif - - if (res != 0 || VARSIZE(a) == VARSIZE(b)) - return (res); - - /* - * The two strings are the same in the first len bytes, and they - * are of different lengths. - */ - - if (VARSIZE(a) < VARSIZE(b)) - return (-1); - else - return (1); + + if (res != 0 || VARSIZE(a) == VARSIZE(b)) + return (res); + + /* + * The two strings are the same in the first len bytes, and they are + * of different lengths. + */ + + if (VARSIZE(a) < VARSIZE(b)) + return (-1); + else + return (1); } diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 4dfa6fd255..4bafbc2ddb 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * btinsert.c-- - * Item insertion in Lehman and Yao btrees for Postgres. + * Item insertion in Lehman and Yao btrees for Postgres. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.17 1997/08/20 14:53:15 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.18 1997/09/07 04:38:45 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -22,1386 +22,1437 @@ #include <fmgr.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif static InsertIndexResult _bt_insertonpg(Relation rel, Buffer buf, BTStack stack, int keysz, ScanKey scankey, BTItem btitem, BTItem afteritem); -static Buffer _bt_split(Relation rel, Buffer buf, OffsetNumber firstright); +static Buffer _bt_split(Relation rel, Buffer buf, OffsetNumber firstright); static OffsetNumber _bt_findsplitloc(Relation rel, Page page, OffsetNumber start, OffsetNumber maxoff, Size llimit); -static void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf); +static void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf); static OffsetNumber _bt_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, BTItem btitem, BTItem afteritem); -static bool _bt_goesonpg(Relation rel, Buffer buf, Size keysz, ScanKey scankey, BTItem afteritem); -static void _bt_updateitem(Relation rel, Size keysz, Buffer buf, BTItem oldItem, BTItem newItem); -static bool _bt_isequal (TupleDesc itupdesc, Page page, OffsetNumber offnum, int keysz, ScanKey scankey); +static bool _bt_goesonpg(Relation rel, Buffer buf, Size keysz, ScanKey scankey, BTItem afteritem); +static void _bt_updateitem(Relation rel, Size keysz, Buffer buf, BTItem oldItem, BTItem newItem); +static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum, int keysz, ScanKey scankey); /* - * _bt_doinsert() -- Handle insertion of a single btitem in the tree. + * _bt_doinsert() -- Handle insertion of a single btitem in the tree. * - * This routine is called by the public interface routines, btbuild - * and btinsert. By here, btitem is filled in, and has a unique - * (xid, seqno) pair. + * This routine is called by the public interface routines, btbuild + * and btinsert. By here, btitem is filled in, and has a unique + * (xid, seqno) pair. */ InsertIndexResult _bt_doinsert(Relation rel, BTItem btitem, bool index_is_unique, Relation heapRel) { - ScanKey itup_scankey; - IndexTuple itup; - BTStack stack; - Buffer buf; - BlockNumber blkno; - int natts = rel->rd_rel->relnatts; - InsertIndexResult res; - - itup = &(btitem->bti_itup); - - /* we need a scan key to do our search, so build one */ - itup_scankey = _bt_mkscankey(rel, itup); - - /* find the page containing this key */ - stack = _bt_search(rel, natts, itup_scankey, &buf); - - blkno = BufferGetBlockNumber(buf); - - /* trade in our read lock for a write lock */ - _bt_relbuf(rel, buf, BT_READ); - buf = _bt_getbuf(rel, blkno, BT_WRITE); - - /* - * If the page was split between the time that we surrendered our - * read lock and acquired our write lock, then this page may no - * longer be the right place for the key we want to insert. In this - * case, we need to move right in the tree. See Lehman and Yao for - * an excruciatingly precise description. - */ - - buf = _bt_moveright(rel, buf, natts, itup_scankey, BT_WRITE); - - /* if we're not allowing duplicates, make sure the key isn't */ - /* already in the node */ - if ( index_is_unique ) - { - OffsetNumber offset, maxoff; - Page page; + ScanKey itup_scankey; + IndexTuple itup; + BTStack stack; + Buffer buf; + BlockNumber blkno; + int natts = rel->rd_rel->relnatts; + InsertIndexResult res; - page = BufferGetPage(buf); - maxoff = PageGetMaxOffsetNumber (page); + itup = &(btitem->bti_itup); + + /* we need a scan key to do our search, so build one */ + itup_scankey = _bt_mkscankey(rel, itup); + + /* find the page containing this key */ + stack = _bt_search(rel, natts, itup_scankey, &buf); - offset = _bt_binsrch(rel, buf, natts, itup_scankey, BT_DESCENT); + blkno = BufferGetBlockNumber(buf); - /* make sure the offset we're given points to an actual */ - /* key on the page before trying to compare it */ - if ( !PageIsEmpty (page) && offset <= maxoff ) + /* trade in our read lock for a write lock */ + _bt_relbuf(rel, buf, BT_READ); + buf = _bt_getbuf(rel, blkno, BT_WRITE); + + /* + * If the page was split between the time that we surrendered our read + * lock and acquired our write lock, then this page may no longer be + * the right place for the key we want to insert. In this case, we + * need to move right in the tree. See Lehman and Yao for an + * excruciatingly precise description. + */ + + buf = _bt_moveright(rel, buf, natts, itup_scankey, BT_WRITE); + + /* if we're not allowing duplicates, make sure the key isn't */ + /* already in the node */ + if (index_is_unique) { - TupleDesc itupdesc; - BTItem btitem; - IndexTuple itup; - HeapTuple htup; - BTPageOpaque opaque; - Buffer nbuf; - BlockNumber blkno; - - itupdesc = RelationGetTupleDescriptor(rel); - nbuf = InvalidBuffer; - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - /* - * _bt_compare returns 0 for (1,NULL) and (1,NULL) - - * this's how we handling NULLs - and so we must not use - * _bt_compare in real comparison, but only for - * ordering/finding items on pages. - vadim 03/24/97 - - while ( !_bt_compare (rel, itupdesc, page, - natts, itup_scankey, offset) ) - */ - while ( _bt_isequal (itupdesc, page, offset, natts, itup_scankey) ) - { /* they're equal */ - btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offset)); - itup = &(btitem->bti_itup); - htup = heap_fetch (heapRel, SelfTimeQual, &(itup->t_tid), NULL); - if ( htup != (HeapTuple) NULL ) - { /* it is a duplicate */ - elog(WARN, "Cannot insert a duplicate key into a unique index."); - } - /* get next offnum */ - if ( offset < maxoff ) - { - offset = OffsetNumberNext(offset); - } - else - { /* move right ? */ - if ( P_RIGHTMOST (opaque) ) - break; - if ( !_bt_isequal (itupdesc, page, P_HIKEY, - natts, itup_scankey) ) - break; - /* - * min key of the right page is the same, - * ooh - so many dead duplicates... - */ - blkno = opaque->btpo_next; - if ( nbuf != InvalidBuffer ) - _bt_relbuf (rel, nbuf, BT_READ); - for (nbuf = InvalidBuffer; ; ) - { - nbuf = _bt_getbuf (rel, blkno, BT_READ); - page = BufferGetPage (nbuf); - maxoff = PageGetMaxOffsetNumber(page); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - offset = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - if ( ! PageIsEmpty (page) && offset <= maxoff ) - { /* Found some key */ - break; - } - else - { /* Empty or "pseudo"-empty page - get next */ - blkno = opaque->btpo_next; - _bt_relbuf (rel, nbuf, BT_READ); - nbuf = InvalidBuffer; - if ( blkno == P_NONE ) - break; + OffsetNumber offset, + maxoff; + Page page; + + page = BufferGetPage(buf); + maxoff = PageGetMaxOffsetNumber(page); + + offset = _bt_binsrch(rel, buf, natts, itup_scankey, BT_DESCENT); + + /* make sure the offset we're given points to an actual */ + /* key on the page before trying to compare it */ + if (!PageIsEmpty(page) && offset <= maxoff) + { + TupleDesc itupdesc; + BTItem btitem; + IndexTuple itup; + HeapTuple htup; + BTPageOpaque opaque; + Buffer nbuf; + BlockNumber blkno; + + itupdesc = RelationGetTupleDescriptor(rel); + nbuf = InvalidBuffer; + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* + * _bt_compare returns 0 for (1,NULL) and (1,NULL) - this's + * how we handling NULLs - and so we must not use _bt_compare + * in real comparison, but only for ordering/finding items on + * pages. - vadim 03/24/97 + * + * while ( !_bt_compare (rel, itupdesc, page, natts, + * itup_scankey, offset) ) + */ + while (_bt_isequal(itupdesc, page, offset, natts, itup_scankey)) + { /* they're equal */ + btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offset)); + itup = &(btitem->bti_itup); + htup = heap_fetch(heapRel, SelfTimeQual, &(itup->t_tid), NULL); + if (htup != (HeapTuple) NULL) + { /* it is a duplicate */ + elog(WARN, "Cannot insert a duplicate key into a unique index."); + } + /* get next offnum */ + if (offset < maxoff) + { + offset = OffsetNumberNext(offset); + } + else + { /* move right ? */ + if (P_RIGHTMOST(opaque)) + break; + if (!_bt_isequal(itupdesc, page, P_HIKEY, + natts, itup_scankey)) + break; + + /* + * min key of the right page is the same, ooh - so + * many dead duplicates... + */ + blkno = opaque->btpo_next; + if (nbuf != InvalidBuffer) + _bt_relbuf(rel, nbuf, BT_READ); + for (nbuf = InvalidBuffer;;) + { + nbuf = _bt_getbuf(rel, blkno, BT_READ); + page = BufferGetPage(nbuf); + maxoff = PageGetMaxOffsetNumber(page); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + offset = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + if (!PageIsEmpty(page) && offset <= maxoff) + { /* Found some key */ + break; + } + else + { /* Empty or "pseudo"-empty page - get next */ + blkno = opaque->btpo_next; + _bt_relbuf(rel, nbuf, BT_READ); + nbuf = InvalidBuffer; + if (blkno == P_NONE) + break; + } + } + if (nbuf == InvalidBuffer) + break; + } } - } - if ( nbuf == InvalidBuffer ) - break; - } - } - if ( nbuf != InvalidBuffer ) - _bt_relbuf(rel, nbuf, BT_READ); + if (nbuf != InvalidBuffer) + _bt_relbuf(rel, nbuf, BT_READ); + } } - } - - /* do the insertion */ - res = _bt_insertonpg(rel, buf, stack, natts, itup_scankey, - btitem, (BTItem) NULL); - - /* be tidy */ - _bt_freestack(stack); - _bt_freeskey(itup_scankey); - - return (res); + + /* do the insertion */ + res = _bt_insertonpg(rel, buf, stack, natts, itup_scankey, + btitem, (BTItem) NULL); + + /* be tidy */ + _bt_freestack(stack); + _bt_freeskey(itup_scankey); + + return (res); } /* - * _bt_insertonpg() -- Insert a tuple on a particular page in the index. + * _bt_insertonpg() -- Insert a tuple on a particular page in the index. * - * This recursive procedure does the following things: + * This recursive procedure does the following things: * - * + if necessary, splits the target page. - * + finds the right place to insert the tuple (taking into - * account any changes induced by a split). - * + inserts the tuple. - * + if the page was split, pops the parent stack, and finds the - * right place to insert the new child pointer (by walking - * right using information stored in the parent stack). - * + invoking itself with the appropriate tuple for the right - * child page on the parent. + * + if necessary, splits the target page. + * + finds the right place to insert the tuple (taking into + * account any changes induced by a split). + * + inserts the tuple. + * + if the page was split, pops the parent stack, and finds the + * right place to insert the new child pointer (by walking + * right using information stored in the parent stack). + * + invoking itself with the appropriate tuple for the right + * child page on the parent. * - * On entry, we must have the right buffer on which to do the - * insertion, and the buffer must be pinned and locked. On return, - * we will have dropped both the pin and the write lock on the buffer. + * On entry, we must have the right buffer on which to do the + * insertion, and the buffer must be pinned and locked. On return, + * we will have dropped both the pin and the write lock on the buffer. * - * The locking interactions in this code are critical. You should - * grok Lehman and Yao's paper before making any changes. In addition, - * you need to understand how we disambiguate duplicate keys in this - * implementation, in order to be able to find our location using - * L&Y "move right" operations. Since we may insert duplicate user - * keys, and since these dups may propogate up the tree, we use the - * 'afteritem' parameter to position ourselves correctly for the - * insertion on internal pages. + * The locking interactions in this code are critical. You should + * grok Lehman and Yao's paper before making any changes. In addition, + * you need to understand how we disambiguate duplicate keys in this + * implementation, in order to be able to find our location using + * L&Y "move right" operations. Since we may insert duplicate user + * keys, and since these dups may propogate up the tree, we use the + * 'afteritem' parameter to position ourselves correctly for the + * insertion on internal pages. */ -static InsertIndexResult +static InsertIndexResult _bt_insertonpg(Relation rel, - Buffer buf, - BTStack stack, - int keysz, - ScanKey scankey, - BTItem btitem, - BTItem afteritem) + Buffer buf, + BTStack stack, + int keysz, + ScanKey scankey, + BTItem btitem, + BTItem afteritem) { - InsertIndexResult res; - Page page; - BTPageOpaque lpageop; - BlockNumber itup_blkno; - OffsetNumber itup_off; - OffsetNumber firstright = InvalidOffsetNumber; - int itemsz; - bool do_split = false; - bool keys_equal = false; - - page = BufferGetPage(buf); - lpageop = (BTPageOpaque) PageGetSpecialPointer(page); - - itemsz = IndexTupleDSize(btitem->bti_itup) - + (sizeof(BTItemData) - sizeof(IndexTupleData)); - - itemsz = DOUBLEALIGN(itemsz); /* be safe, PageAddItem will do this - but we need to be consistent */ - /* - * If we have to insert item on the leftmost page which is the first - * page in the chain of duplicates then: - * 1. if scankey == hikey (i.e. - new duplicate item) then - * insert it here; - * 2. if scankey < hikey then: - * 2.a if there is duplicate key(s) here - we force splitting; - * 2.b else - we may "eat" this page from duplicates chain. - */ - if ( lpageop->btpo_flags & BTP_CHAIN ) - { - OffsetNumber maxoff = PageGetMaxOffsetNumber (page); - ItemId hitemid; - BTItem hitem; - - Assert ( !P_RIGHTMOST(lpageop) ); - hitemid = PageGetItemId(page, P_HIKEY); - hitem = (BTItem) PageGetItem(page, hitemid); - if ( maxoff > P_HIKEY && - !_bt_itemcmp (rel, keysz, hitem, - (BTItem) PageGetItem(page, PageGetItemId(page, P_FIRSTKEY)), - BTEqualStrategyNumber) ) - elog (FATAL, "btree: bad key on the page in the chain of duplicates"); - - if ( !_bt_skeycmp (rel, keysz, scankey, page, hitemid, - BTEqualStrategyNumber) ) - { - if ( !P_LEFTMOST(lpageop) ) - elog (FATAL, "btree: attempt to insert bad key on the non-leftmost page in the chain of duplicates"); - if ( !_bt_skeycmp (rel, keysz, scankey, page, hitemid, - BTLessStrategyNumber) ) - elog (FATAL, "btree: attempt to insert higher key on the leftmost page in the chain of duplicates"); - if ( maxoff > P_HIKEY ) /* have duplicate(s) */ - { - firstright = P_FIRSTKEY; - do_split = true; - } - else /* "eat" page */ - { - Buffer pbuf; - Page ppage; - - itup_blkno = BufferGetBlockNumber(buf); - itup_off = PageAddItem(page, (Item) btitem, itemsz, - P_FIRSTKEY, LP_USED); - if ( itup_off == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add item"); - lpageop->btpo_flags &= ~BTP_CHAIN; - pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); - ppage = BufferGetPage(pbuf); - PageIndexTupleDelete(ppage, stack->bts_offset); - pfree(stack->bts_btitem); - stack->bts_btitem = _bt_formitem(&(btitem->bti_itup)); - ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), - itup_blkno, P_HIKEY); - _bt_wrtbuf(rel, buf); - res = _bt_insertonpg(rel, pbuf, stack->bts_parent, - keysz, scankey, stack->bts_btitem, - NULL); - ItemPointerSet(&(res->pointerData), itup_blkno, itup_off); - return (res); - } - } - else - { - keys_equal = true; - if ( PageGetFreeSpace(page) < itemsz ) - do_split = true; - } - } - else if ( PageGetFreeSpace(page) < itemsz ) - do_split = true; - else if ( PageGetFreeSpace(page) < 3*itemsz + 2*sizeof(ItemIdData) ) - { - OffsetNumber offnum = (P_RIGHTMOST(lpageop)) ? P_HIKEY : P_FIRSTKEY; - OffsetNumber maxoff = PageGetMaxOffsetNumber (page); - ItemId itid; - BTItem previtem, chkitem; - Size maxsize; - Size currsize; - - itid = PageGetItemId(page, offnum); - previtem = (BTItem) PageGetItem(page, itid); - maxsize = currsize = (ItemIdGetLength(itid) + sizeof(ItemIdData)); - for (offnum = OffsetNumberNext(offnum); - offnum <= maxoff; offnum = OffsetNumberNext(offnum) ) - { - itid = PageGetItemId(page, offnum); - chkitem = (BTItem) PageGetItem(page, itid); - if ( !_bt_itemcmp (rel, keysz, previtem, chkitem, - BTEqualStrategyNumber) ) - { - if ( currsize > maxsize ) - maxsize = currsize; - currsize = 0; - previtem = chkitem; - } - currsize += (ItemIdGetLength(itid) + sizeof(ItemIdData)); - } - if ( currsize > maxsize ) - maxsize = currsize; - maxsize += sizeof (PageHeaderData) + - DOUBLEALIGN (sizeof (BTPageOpaqueData)); - if ( maxsize >= PageGetPageSize (page) / 2 ) - do_split = true; - } - - if ( do_split ) - { - Buffer rbuf; - Page rpage; - BTItem ritem; - BlockNumber rbknum; - BTPageOpaque rpageop; - Buffer pbuf; - Page ppage; - BTPageOpaque ppageop; - BlockNumber bknum = BufferGetBlockNumber(buf); - BTItem lowLeftItem; - OffsetNumber maxoff; - bool shifted = false; - bool left_chained = ( lpageop->btpo_flags & BTP_CHAIN ) ? true : false; - - /* - * If we have to split leaf page in the chain of duplicates by - * new duplicate then we try to look at our right sibling first. - */ - if ( ( lpageop->btpo_flags & BTP_CHAIN ) && - ( lpageop->btpo_flags & BTP_LEAF ) && keys_equal ) - { - bool use_left = true; - - rbuf = _bt_getbuf(rel, lpageop->btpo_next, BT_WRITE); - rpage = BufferGetPage(rbuf); - rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage); - if ( !P_RIGHTMOST (rpageop) ) /* non-rightmost page */ - { /* - * If we have the same hikey here then it's - * yet another page in chain. - */ - if ( _bt_skeycmp (rel, keysz, scankey, rpage, - PageGetItemId(rpage, P_HIKEY), - BTEqualStrategyNumber) ) - { - if ( !( rpageop->btpo_flags & BTP_CHAIN ) ) - elog (FATAL, "btree: lost page in the chain of duplicates"); - } - else if ( _bt_skeycmp (rel, keysz, scankey, rpage, - PageGetItemId(rpage, P_HIKEY), - BTGreaterStrategyNumber) ) - elog (FATAL, "btree: hikey is out of order"); - else if ( rpageop->btpo_flags & BTP_CHAIN ) - /* - * If hikey > scankey then it's last page in chain and - * BTP_CHAIN must be OFF - */ - elog (FATAL, "btree: lost last page in the chain of duplicates"); - - /* if there is room here then we use this page. */ - if ( PageGetFreeSpace (rpage) > itemsz ) - use_left = false; - } - else /* rightmost page */ - { - Assert ( !( rpageop->btpo_flags & BTP_CHAIN ) ); - /* if there is room here then we use this page. */ - if ( PageGetFreeSpace (rpage) > itemsz ) - use_left = false; - } - if ( !use_left ) /* insert on the right page */ - { - _bt_relbuf(rel, buf, BT_WRITE); - return ( _bt_insertonpg(rel, rbuf, stack, keysz, - scankey, btitem, afteritem) ); - } - _bt_relbuf(rel, rbuf, BT_WRITE); - } + InsertIndexResult res; + Page page; + BTPageOpaque lpageop; + BlockNumber itup_blkno; + OffsetNumber itup_off; + OffsetNumber firstright = InvalidOffsetNumber; + int itemsz; + bool do_split = false; + bool keys_equal = false; + + page = BufferGetPage(buf); + lpageop = (BTPageOpaque) PageGetSpecialPointer(page); + + itemsz = IndexTupleDSize(btitem->bti_itup) + + (sizeof(BTItemData) - sizeof(IndexTupleData)); + + itemsz = DOUBLEALIGN(itemsz); /* be safe, PageAddItem will do + * this but we need to be + * consistent */ + /* - * If after splitting un-chained page we'll got chain of pages - * with duplicates then we want to know - * 1. on which of two pages new btitem will go (current - * _bt_findsplitloc is quite bad); - * 2. what parent (if there's one) thinking about it - * (remember about deletions) + * If we have to insert item on the leftmost page which is the first + * page in the chain of duplicates then: 1. if scankey == hikey (i.e. + * - new duplicate item) then insert it here; 2. if scankey < hikey + * then: 2.a if there is duplicate key(s) here - we force splitting; + * 2.b else - we may "eat" this page from duplicates chain. */ - else if ( !( lpageop->btpo_flags & BTP_CHAIN ) ) + if (lpageop->btpo_flags & BTP_CHAIN) { - OffsetNumber start = ( P_RIGHTMOST(lpageop) ) ? P_HIKEY : P_FIRSTKEY; - Size llimit; - - maxoff = PageGetMaxOffsetNumber (page); - llimit = PageGetPageSize(page) - sizeof (PageHeaderData) - - DOUBLEALIGN (sizeof (BTPageOpaqueData)) - + sizeof(ItemIdData); - llimit /= 2; - firstright = _bt_findsplitloc(rel, page, start, maxoff, llimit); - - if ( _bt_itemcmp (rel, keysz, - (BTItem) PageGetItem(page, PageGetItemId(page, start)), - (BTItem) PageGetItem(page, PageGetItemId(page, firstright)), - BTEqualStrategyNumber) ) - { - if ( _bt_skeycmp (rel, keysz, scankey, page, - PageGetItemId(page, firstright), - BTLessStrategyNumber) ) - /* - * force moving current items to the new page: - * new item will go on the current page. - */ - firstright = start; - else - /* - * new btitem >= firstright, start item == firstright - - * new chain of duplicates: if this non-leftmost leaf - * page and parent item < start item then force moving - * all items to the new page - current page will be - * "empty" after it. - */ - { - if ( !P_LEFTMOST (lpageop) && - ( lpageop->btpo_flags & BTP_LEAF ) ) - { - ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), - bknum, P_HIKEY); - pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); - if ( _bt_itemcmp (rel, keysz, stack->bts_btitem, - (BTItem) PageGetItem(page, - PageGetItemId(page, start)), - BTLessStrategyNumber) ) - { - firstright = start; - shifted = true; - } - _bt_relbuf(rel, pbuf, BT_WRITE); - } - } - } /* else - no new chain if start item < firstright one */ - } - - /* split the buffer into left and right halves */ - rbuf = _bt_split(rel, buf, firstright); - - /* which new page (left half or right half) gets the tuple? */ - if (_bt_goesonpg(rel, buf, keysz, scankey, afteritem)) { - /* left page */ - itup_off = _bt_pgaddtup(rel, buf, keysz, scankey, - itemsz, btitem, afteritem); - itup_blkno = BufferGetBlockNumber(buf); - } else { - /* right page */ - itup_off = _bt_pgaddtup(rel, rbuf, keysz, scankey, - itemsz, btitem, afteritem); - itup_blkno = BufferGetBlockNumber(rbuf); + OffsetNumber maxoff = PageGetMaxOffsetNumber(page); + ItemId hitemid; + BTItem hitem; + + Assert(!P_RIGHTMOST(lpageop)); + hitemid = PageGetItemId(page, P_HIKEY); + hitem = (BTItem) PageGetItem(page, hitemid); + if (maxoff > P_HIKEY && + !_bt_itemcmp(rel, keysz, hitem, + (BTItem) PageGetItem(page, PageGetItemId(page, P_FIRSTKEY)), + BTEqualStrategyNumber)) + elog(FATAL, "btree: bad key on the page in the chain of duplicates"); + + if (!_bt_skeycmp(rel, keysz, scankey, page, hitemid, + BTEqualStrategyNumber)) + { + if (!P_LEFTMOST(lpageop)) + elog(FATAL, "btree: attempt to insert bad key on the non-leftmost page in the chain of duplicates"); + if (!_bt_skeycmp(rel, keysz, scankey, page, hitemid, + BTLessStrategyNumber)) + elog(FATAL, "btree: attempt to insert higher key on the leftmost page in the chain of duplicates"); + if (maxoff > P_HIKEY) /* have duplicate(s) */ + { + firstright = P_FIRSTKEY; + do_split = true; + } + else +/* "eat" page */ + { + Buffer pbuf; + Page ppage; + + itup_blkno = BufferGetBlockNumber(buf); + itup_off = PageAddItem(page, (Item) btitem, itemsz, + P_FIRSTKEY, LP_USED); + if (itup_off == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add item"); + lpageop->btpo_flags &= ~BTP_CHAIN; + pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); + ppage = BufferGetPage(pbuf); + PageIndexTupleDelete(ppage, stack->bts_offset); + pfree(stack->bts_btitem); + stack->bts_btitem = _bt_formitem(&(btitem->bti_itup)); + ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), + itup_blkno, P_HIKEY); + _bt_wrtbuf(rel, buf); + res = _bt_insertonpg(rel, pbuf, stack->bts_parent, + keysz, scankey, stack->bts_btitem, + NULL); + ItemPointerSet(&(res->pointerData), itup_blkno, itup_off); + return (res); + } + } + else + { + keys_equal = true; + if (PageGetFreeSpace(page) < itemsz) + do_split = true; + } } - - maxoff = PageGetMaxOffsetNumber (page); - if ( shifted ) - { - if ( maxoff > P_FIRSTKEY ) - elog (FATAL, "btree: shifted page is not empty"); - lowLeftItem = (BTItem) NULL; - } - else - { - if ( maxoff < P_FIRSTKEY ) - elog (FATAL, "btree: un-shifted page is empty"); - lowLeftItem = (BTItem) PageGetItem(page, - PageGetItemId(page, P_FIRSTKEY)); - if ( _bt_itemcmp (rel, keysz, lowLeftItem, - (BTItem) PageGetItem(page, PageGetItemId(page, P_HIKEY)), - BTEqualStrategyNumber) ) - lpageop->btpo_flags |= BTP_CHAIN; + else if (PageGetFreeSpace(page) < itemsz) + do_split = true; + else if (PageGetFreeSpace(page) < 3 * itemsz + 2 * sizeof(ItemIdData)) + { + OffsetNumber offnum = (P_RIGHTMOST(lpageop)) ? P_HIKEY : P_FIRSTKEY; + OffsetNumber maxoff = PageGetMaxOffsetNumber(page); + ItemId itid; + BTItem previtem, + chkitem; + Size maxsize; + Size currsize; + + itid = PageGetItemId(page, offnum); + previtem = (BTItem) PageGetItem(page, itid); + maxsize = currsize = (ItemIdGetLength(itid) + sizeof(ItemIdData)); + for (offnum = OffsetNumberNext(offnum); + offnum <= maxoff; offnum = OffsetNumberNext(offnum)) + { + itid = PageGetItemId(page, offnum); + chkitem = (BTItem) PageGetItem(page, itid); + if (!_bt_itemcmp(rel, keysz, previtem, chkitem, + BTEqualStrategyNumber)) + { + if (currsize > maxsize) + maxsize = currsize; + currsize = 0; + previtem = chkitem; + } + currsize += (ItemIdGetLength(itid) + sizeof(ItemIdData)); + } + if (currsize > maxsize) + maxsize = currsize; + maxsize += sizeof(PageHeaderData) + + DOUBLEALIGN(sizeof(BTPageOpaqueData)); + if (maxsize >= PageGetPageSize(page) / 2) + do_split = true; } - /* - * By here, - * - * + our target page has been split; - * + the original tuple has been inserted; - * + we have write locks on both the old (left half) and new - * (right half) buffers, after the split; and - * + we have the key we want to insert into the parent. - * - * Do the parent insertion. We need to hold onto the locks for - * the child pages until we locate the parent, but we can release - * them before doing the actual insertion (see Lehman and Yao for - * the reasoning). - */ - - if (stack == (BTStack) NULL) { - - /* create a new root node and release the split buffers */ - _bt_newroot(rel, buf, rbuf); - _bt_relbuf(rel, buf, BT_WRITE); - _bt_relbuf(rel, rbuf, BT_WRITE); - - } else { - ScanKey newskey; - InsertIndexResult newres; - BTItem new_item; - OffsetNumber upditem_offset = P_HIKEY; - bool do_update = false; - bool update_in_place = true; - bool parent_chained; - - /* form a index tuple that points at the new right page */ - rbknum = BufferGetBlockNumber(rbuf); - rpage = BufferGetPage(rbuf); - rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage); - - /* - * By convention, the first entry (1) on every - * non-rightmost page is the high key for that page. In - * order to get the lowest key on the new right page, we - * actually look at its second (2) entry. - */ - - if (! P_RIGHTMOST(rpageop)) - { - ritem = (BTItem) PageGetItem(rpage, - PageGetItemId(rpage, P_FIRSTKEY)); - if ( _bt_itemcmp (rel, keysz, ritem, - (BTItem) PageGetItem(rpage, - PageGetItemId(rpage, P_HIKEY)), - BTEqualStrategyNumber) ) - rpageop->btpo_flags |= BTP_CHAIN; - } - else - ritem = (BTItem) PageGetItem(rpage, - PageGetItemId(rpage, P_HIKEY)); - - /* get a unique btitem for this key */ - new_item = _bt_formitem(&(ritem->bti_itup)); - - ItemPointerSet(&(new_item->bti_itup.t_tid), rbknum, P_HIKEY); - - /* - * Find the parent buffer and get the parent page. - * - * Oops - if we were moved right then we need to - * change stack item! We want to find parent pointing to - * where we are, right ? - vadim 05/27/97 - */ - ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), - bknum, P_HIKEY); - pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); - ppage = BufferGetPage(pbuf); - ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage); - parent_chained = (( ppageop->btpo_flags & BTP_CHAIN )) ? true : false; - - if ( parent_chained && !left_chained ) - elog (FATAL, "nbtree: unexpected chained parent of unchained page"); - - /* - * If the key of new_item is < than the key of the item - * in the parent page pointing to the left page - * (stack->bts_btitem), we have to update the latter key; - * otherwise the keys on the parent page wouldn't be - * monotonically increasing after we inserted the new - * pointer to the right page (new_item). This only - * happens if our left page is the leftmost page and a - * new minimum key had been inserted before, which is not - * reflected in the parent page but didn't matter so - * far. If there are duplicate keys and this new minimum - * key spills over to our new right page, we get an - * inconsistency if we don't update the left key in the - * parent page. - * - * Also, new duplicates handling code require us to update - * parent item if some smaller items left on the left page - * (which is possible in splitting leftmost page) and - * current parent item == new_item. - vadim 05/27/97 - */ - if ( _bt_itemcmp (rel, keysz, stack->bts_btitem, new_item, - BTGreaterStrategyNumber) || - ( !shifted && - _bt_itemcmp(rel, keysz, stack->bts_btitem, - new_item, BTEqualStrategyNumber) && - _bt_itemcmp(rel, keysz, lowLeftItem, - new_item, BTLessStrategyNumber) ) ) - { - do_update = true; - /* - * figure out which key is leftmost (if the parent page - * is rightmost, too, it must be the root) + if (do_split) + { + Buffer rbuf; + Page rpage; + BTItem ritem; + BlockNumber rbknum; + BTPageOpaque rpageop; + Buffer pbuf; + Page ppage; + BTPageOpaque ppageop; + BlockNumber bknum = BufferGetBlockNumber(buf); + BTItem lowLeftItem; + OffsetNumber maxoff; + bool shifted = false; + bool left_chained = (lpageop->btpo_flags & BTP_CHAIN) ? true : false; + + /* + * If we have to split leaf page in the chain of duplicates by new + * duplicate then we try to look at our right sibling first. */ - if(P_RIGHTMOST(ppageop)) - upditem_offset = P_HIKEY; + if ((lpageop->btpo_flags & BTP_CHAIN) && + (lpageop->btpo_flags & BTP_LEAF) && keys_equal) + { + bool use_left = true; + + rbuf = _bt_getbuf(rel, lpageop->btpo_next, BT_WRITE); + rpage = BufferGetPage(rbuf); + rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage); + if (!P_RIGHTMOST(rpageop)) /* non-rightmost page */ + { /* If we have the same hikey here then + * it's yet another page in chain. */ + if (_bt_skeycmp(rel, keysz, scankey, rpage, + PageGetItemId(rpage, P_HIKEY), + BTEqualStrategyNumber)) + { + if (!(rpageop->btpo_flags & BTP_CHAIN)) + elog(FATAL, "btree: lost page in the chain of duplicates"); + } + else if (_bt_skeycmp(rel, keysz, scankey, rpage, + PageGetItemId(rpage, P_HIKEY), + BTGreaterStrategyNumber)) + elog(FATAL, "btree: hikey is out of order"); + else if (rpageop->btpo_flags & BTP_CHAIN) + + /* + * If hikey > scankey then it's last page in chain and + * BTP_CHAIN must be OFF + */ + elog(FATAL, "btree: lost last page in the chain of duplicates"); + + /* if there is room here then we use this page. */ + if (PageGetFreeSpace(rpage) > itemsz) + use_left = false; + } + else +/* rightmost page */ + { + Assert(!(rpageop->btpo_flags & BTP_CHAIN)); + /* if there is room here then we use this page. */ + if (PageGetFreeSpace(rpage) > itemsz) + use_left = false; + } + if (!use_left) /* insert on the right page */ + { + _bt_relbuf(rel, buf, BT_WRITE); + return (_bt_insertonpg(rel, rbuf, stack, keysz, + scankey, btitem, afteritem)); + } + _bt_relbuf(rel, rbuf, BT_WRITE); + } + + /* + * If after splitting un-chained page we'll got chain of pages + * with duplicates then we want to know 1. on which of two pages + * new btitem will go (current _bt_findsplitloc is quite bad); 2. + * what parent (if there's one) thinking about it (remember about + * deletions) + */ + else if (!(lpageop->btpo_flags & BTP_CHAIN)) + { + OffsetNumber start = (P_RIGHTMOST(lpageop)) ? P_HIKEY : P_FIRSTKEY; + Size llimit; + + maxoff = PageGetMaxOffsetNumber(page); + llimit = PageGetPageSize(page) - sizeof(PageHeaderData) - + DOUBLEALIGN(sizeof(BTPageOpaqueData)) + + sizeof(ItemIdData); + llimit /= 2; + firstright = _bt_findsplitloc(rel, page, start, maxoff, llimit); + + if (_bt_itemcmp(rel, keysz, + (BTItem) PageGetItem(page, PageGetItemId(page, start)), + (BTItem) PageGetItem(page, PageGetItemId(page, firstright)), + BTEqualStrategyNumber)) + { + if (_bt_skeycmp(rel, keysz, scankey, page, + PageGetItemId(page, firstright), + BTLessStrategyNumber)) + + /* + * force moving current items to the new page: new + * item will go on the current page. + */ + firstright = start; + else + + /* + * new btitem >= firstright, start item == firstright + * - new chain of duplicates: if this non-leftmost + * leaf page and parent item < start item then force + * moving all items to the new page - current page + * will be "empty" after it. + */ + { + if (!P_LEFTMOST(lpageop) && + (lpageop->btpo_flags & BTP_LEAF)) + { + ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), + bknum, P_HIKEY); + pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); + if (_bt_itemcmp(rel, keysz, stack->bts_btitem, + (BTItem) PageGetItem(page, + PageGetItemId(page, start)), + BTLessStrategyNumber)) + { + firstright = start; + shifted = true; + } + _bt_relbuf(rel, pbuf, BT_WRITE); + } + } + } /* else - no new chain if start item < + * firstright one */ + } + + /* split the buffer into left and right halves */ + rbuf = _bt_split(rel, buf, firstright); + + /* which new page (left half or right half) gets the tuple? */ + if (_bt_goesonpg(rel, buf, keysz, scankey, afteritem)) + { + /* left page */ + itup_off = _bt_pgaddtup(rel, buf, keysz, scankey, + itemsz, btitem, afteritem); + itup_blkno = BufferGetBlockNumber(buf); + } else - upditem_offset = P_FIRSTKEY; - if ( !P_LEFTMOST(lpageop) || - stack->bts_offset != upditem_offset ) - elog (FATAL, "btree: items are out of order (leftmost %d, stack %u, update %u)", - P_LEFTMOST(lpageop), stack->bts_offset, upditem_offset); - } - - if ( do_update ) - { - if ( shifted ) - elog (FATAL, "btree: attempt to update parent for shifted page"); - /* - * Try to update in place. If out parent page is chained - * then we must forse insertion. + { + /* right page */ + itup_off = _bt_pgaddtup(rel, rbuf, keysz, scankey, + itemsz, btitem, afteritem); + itup_blkno = BufferGetBlockNumber(rbuf); + } + + maxoff = PageGetMaxOffsetNumber(page); + if (shifted) + { + if (maxoff > P_FIRSTKEY) + elog(FATAL, "btree: shifted page is not empty"); + lowLeftItem = (BTItem) NULL; + } + else + { + if (maxoff < P_FIRSTKEY) + elog(FATAL, "btree: un-shifted page is empty"); + lowLeftItem = (BTItem) PageGetItem(page, + PageGetItemId(page, P_FIRSTKEY)); + if (_bt_itemcmp(rel, keysz, lowLeftItem, + (BTItem) PageGetItem(page, PageGetItemId(page, P_HIKEY)), + BTEqualStrategyNumber)) + lpageop->btpo_flags |= BTP_CHAIN; + } + + /* + * By here, + * + * + our target page has been split; + the original tuple has been + * inserted; + we have write locks on both the old (left half) + * and new (right half) buffers, after the split; and + we have + * the key we want to insert into the parent. + * + * Do the parent insertion. We need to hold onto the locks for the + * child pages until we locate the parent, but we can release them + * before doing the actual insertion (see Lehman and Yao for the + * reasoning). */ - if ( !parent_chained && - DOUBLEALIGN (IndexTupleDSize (lowLeftItem->bti_itup)) == - DOUBLEALIGN (IndexTupleDSize (stack->bts_btitem->bti_itup)) ) - { - _bt_updateitem(rel, keysz, pbuf, - stack->bts_btitem, lowLeftItem); - _bt_relbuf(rel, buf, BT_WRITE); - _bt_relbuf(rel, rbuf, BT_WRITE); + + if (stack == (BTStack) NULL) + { + + /* create a new root node and release the split buffers */ + _bt_newroot(rel, buf, rbuf); + _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, rbuf, BT_WRITE); + } else { - update_in_place = false; - PageIndexTupleDelete(ppage, upditem_offset); - - /* - * don't write anything out yet--we still have the write - * lock, and now we call another _bt_insertonpg to - * insert the correct key. - * First, make a new item, using the tuple data from - * lowLeftItem. Point it to the left child. - * Update it on the stack at the same time. - */ - pfree(stack->bts_btitem); - stack->bts_btitem = _bt_formitem(&(lowLeftItem->bti_itup)); - ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), - bknum, P_HIKEY); - - /* - * Unlock the children before doing this - * - * Mmm ... I foresee problems here. - vadim 06/10/97 - */ - _bt_relbuf(rel, buf, BT_WRITE); - _bt_relbuf(rel, rbuf, BT_WRITE); - - /* - * A regular _bt_binsrch should find the right place to - * put the new entry, since it should be lower than any - * other key on the page. - * Therefore set afteritem to NULL. - */ - newskey = _bt_mkscankey(rel, &(stack->bts_btitem->bti_itup)); - newres = _bt_insertonpg(rel, pbuf, stack->bts_parent, - keysz, newskey, stack->bts_btitem, - NULL); - - pfree(newres); - pfree(newskey); - - /* - * we have now lost our lock on the parent buffer, and - * need to get it back. - */ - pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); + ScanKey newskey; + InsertIndexResult newres; + BTItem new_item; + OffsetNumber upditem_offset = P_HIKEY; + bool do_update = false; + bool update_in_place = true; + bool parent_chained; + + /* form a index tuple that points at the new right page */ + rbknum = BufferGetBlockNumber(rbuf); + rpage = BufferGetPage(rbuf); + rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage); + + /* + * By convention, the first entry (1) on every non-rightmost + * page is the high key for that page. In order to get the + * lowest key on the new right page, we actually look at its + * second (2) entry. + */ + + if (!P_RIGHTMOST(rpageop)) + { + ritem = (BTItem) PageGetItem(rpage, + PageGetItemId(rpage, P_FIRSTKEY)); + if (_bt_itemcmp(rel, keysz, ritem, + (BTItem) PageGetItem(rpage, + PageGetItemId(rpage, P_HIKEY)), + BTEqualStrategyNumber)) + rpageop->btpo_flags |= BTP_CHAIN; + } + else + ritem = (BTItem) PageGetItem(rpage, + PageGetItemId(rpage, P_HIKEY)); + + /* get a unique btitem for this key */ + new_item = _bt_formitem(&(ritem->bti_itup)); + + ItemPointerSet(&(new_item->bti_itup.t_tid), rbknum, P_HIKEY); + + /* + * Find the parent buffer and get the parent page. + * + * Oops - if we were moved right then we need to change stack + * item! We want to find parent pointing to where we are, + * right ? - vadim 05/27/97 + */ + ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), + bknum, P_HIKEY); + pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); + ppage = BufferGetPage(pbuf); + ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage); + parent_chained = ((ppageop->btpo_flags & BTP_CHAIN)) ? true : false; + + if (parent_chained && !left_chained) + elog(FATAL, "nbtree: unexpected chained parent of unchained page"); + + /* + * If the key of new_item is < than the key of the item in the + * parent page pointing to the left page (stack->bts_btitem), + * we have to update the latter key; otherwise the keys on the + * parent page wouldn't be monotonically increasing after we + * inserted the new pointer to the right page (new_item). This + * only happens if our left page is the leftmost page and a + * new minimum key had been inserted before, which is not + * reflected in the parent page but didn't matter so far. If + * there are duplicate keys and this new minimum key spills + * over to our new right page, we get an inconsistency if we + * don't update the left key in the parent page. + * + * Also, new duplicates handling code require us to update parent + * item if some smaller items left on the left page (which is + * possible in splitting leftmost page) and current parent + * item == new_item. - vadim 05/27/97 + */ + if (_bt_itemcmp(rel, keysz, stack->bts_btitem, new_item, + BTGreaterStrategyNumber) || + (!shifted && + _bt_itemcmp(rel, keysz, stack->bts_btitem, + new_item, BTEqualStrategyNumber) && + _bt_itemcmp(rel, keysz, lowLeftItem, + new_item, BTLessStrategyNumber))) + { + do_update = true; + + /* + * figure out which key is leftmost (if the parent page is + * rightmost, too, it must be the root) + */ + if (P_RIGHTMOST(ppageop)) + upditem_offset = P_HIKEY; + else + upditem_offset = P_FIRSTKEY; + if (!P_LEFTMOST(lpageop) || + stack->bts_offset != upditem_offset) + elog(FATAL, "btree: items are out of order (leftmost %d, stack %u, update %u)", + P_LEFTMOST(lpageop), stack->bts_offset, upditem_offset); + } + + if (do_update) + { + if (shifted) + elog(FATAL, "btree: attempt to update parent for shifted page"); + + /* + * Try to update in place. If out parent page is chained + * then we must forse insertion. + */ + if (!parent_chained && + DOUBLEALIGN(IndexTupleDSize(lowLeftItem->bti_itup)) == + DOUBLEALIGN(IndexTupleDSize(stack->bts_btitem->bti_itup))) + { + _bt_updateitem(rel, keysz, pbuf, + stack->bts_btitem, lowLeftItem); + _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, rbuf, BT_WRITE); + } + else + { + update_in_place = false; + PageIndexTupleDelete(ppage, upditem_offset); + + /* + * don't write anything out yet--we still have the + * write lock, and now we call another _bt_insertonpg + * to insert the correct key. First, make a new item, + * using the tuple data from lowLeftItem. Point it to + * the left child. Update it on the stack at the same + * time. + */ + pfree(stack->bts_btitem); + stack->bts_btitem = _bt_formitem(&(lowLeftItem->bti_itup)); + ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), + bknum, P_HIKEY); + + /* + * Unlock the children before doing this + * + * Mmm ... I foresee problems here. - vadim 06/10/97 + */ + _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, rbuf, BT_WRITE); + + /* + * A regular _bt_binsrch should find the right place + * to put the new entry, since it should be lower than + * any other key on the page. Therefore set afteritem + * to NULL. + */ + newskey = _bt_mkscankey(rel, &(stack->bts_btitem->bti_itup)); + newres = _bt_insertonpg(rel, pbuf, stack->bts_parent, + keysz, newskey, stack->bts_btitem, + NULL); + + pfree(newres); + pfree(newskey); + + /* + * we have now lost our lock on the parent buffer, and + * need to get it back. + */ + pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); + } + } + else + { + _bt_relbuf(rel, buf, BT_WRITE); + _bt_relbuf(rel, rbuf, BT_WRITE); + } + + newskey = _bt_mkscankey(rel, &(new_item->bti_itup)); + + afteritem = stack->bts_btitem; + if (parent_chained && !update_in_place) + { + ppage = BufferGetPage(pbuf); + ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage); + if (ppageop->btpo_flags & BTP_CHAIN) + elog(FATAL, "btree: unexpected BTP_CHAIN flag in parent after update"); + if (P_RIGHTMOST(ppageop)) + elog(FATAL, "btree: chained parent is RIGHTMOST after update"); + maxoff = PageGetMaxOffsetNumber(ppage); + if (maxoff != P_FIRSTKEY) + elog(FATAL, "btree: FIRSTKEY was unexpected in parent after update"); + if (_bt_skeycmp(rel, keysz, newskey, ppage, + PageGetItemId(ppage, P_FIRSTKEY), + BTLessEqualStrategyNumber)) + elog(FATAL, "btree: parent FIRSTKEY is >= duplicate key after update"); + if (!_bt_skeycmp(rel, keysz, newskey, ppage, + PageGetItemId(ppage, P_HIKEY), + BTEqualStrategyNumber)) + elog(FATAL, "btree: parent HIGHKEY is not equal duplicate key after update"); + afteritem = (BTItem) NULL; + } + else if (left_chained && !update_in_place) + { + ppage = BufferGetPage(pbuf); + ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage); + if (!P_RIGHTMOST(ppageop) && + _bt_skeycmp(rel, keysz, newskey, ppage, + PageGetItemId(ppage, P_HIKEY), + BTGreaterStrategyNumber)) + afteritem = (BTItem) NULL; + } + if (afteritem == (BTItem) NULL) + { + rbuf = _bt_getbuf(rel, ppageop->btpo_next, BT_WRITE); + _bt_relbuf(rel, pbuf, BT_WRITE); + pbuf = rbuf; + } + + newres = _bt_insertonpg(rel, pbuf, stack->bts_parent, + keysz, newskey, new_item, + afteritem); + + /* be tidy */ + pfree(newres); + pfree(newskey); + pfree(new_item); } - } - else - { - _bt_relbuf(rel, buf, BT_WRITE); - _bt_relbuf(rel, rbuf, BT_WRITE); - } - - newskey = _bt_mkscankey(rel, &(new_item->bti_itup)); - - afteritem = stack->bts_btitem; - if ( parent_chained && !update_in_place ) - { - ppage = BufferGetPage(pbuf); - ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage); - if ( ppageop->btpo_flags & BTP_CHAIN ) - elog (FATAL, "btree: unexpected BTP_CHAIN flag in parent after update"); - if ( P_RIGHTMOST (ppageop) ) - elog (FATAL, "btree: chained parent is RIGHTMOST after update"); - maxoff = PageGetMaxOffsetNumber (ppage); - if ( maxoff != P_FIRSTKEY ) - elog (FATAL, "btree: FIRSTKEY was unexpected in parent after update"); - if ( _bt_skeycmp (rel, keysz, newskey, ppage, - PageGetItemId(ppage, P_FIRSTKEY), - BTLessEqualStrategyNumber) ) - elog (FATAL, "btree: parent FIRSTKEY is >= duplicate key after update"); - if ( !_bt_skeycmp (rel, keysz, newskey, ppage, - PageGetItemId(ppage, P_HIKEY), - BTEqualStrategyNumber) ) - elog (FATAL, "btree: parent HIGHKEY is not equal duplicate key after update"); - afteritem = (BTItem) NULL; - } - else if ( left_chained && !update_in_place ) - { - ppage = BufferGetPage(pbuf); - ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage); - if ( !P_RIGHTMOST (ppageop) && - _bt_skeycmp (rel, keysz, newskey, ppage, - PageGetItemId(ppage, P_HIKEY), - BTGreaterStrategyNumber) ) - afteritem = (BTItem) NULL; - } - if ( afteritem == (BTItem) NULL) - { - rbuf = _bt_getbuf(rel, ppageop->btpo_next, BT_WRITE); - _bt_relbuf(rel, pbuf, BT_WRITE); - pbuf = rbuf; - } - - newres = _bt_insertonpg(rel, pbuf, stack->bts_parent, - keysz, newskey, new_item, - afteritem); - - /* be tidy */ - pfree(newres); - pfree(newskey); - pfree(new_item); } - } else { - itup_off = _bt_pgaddtup(rel, buf, keysz, scankey, - itemsz, btitem, afteritem); - itup_blkno = BufferGetBlockNumber(buf); - - _bt_relbuf(rel, buf, BT_WRITE); - } - - /* by here, the new tuple is inserted */ - res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); - ItemPointerSet(&(res->pointerData), itup_blkno, itup_off); - - return (res); + else + { + itup_off = _bt_pgaddtup(rel, buf, keysz, scankey, + itemsz, btitem, afteritem); + itup_blkno = BufferGetBlockNumber(buf); + + _bt_relbuf(rel, buf, BT_WRITE); + } + + /* by here, the new tuple is inserted */ + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + ItemPointerSet(&(res->pointerData), itup_blkno, itup_off); + + return (res); } /* - * _bt_split() -- split a page in the btree. + * _bt_split() -- split a page in the btree. * - * On entry, buf is the page to split, and is write-locked and pinned. - * Returns the new right sibling of buf, pinned and write-locked. The - * pin and lock on buf are maintained. + * On entry, buf is the page to split, and is write-locked and pinned. + * Returns the new right sibling of buf, pinned and write-locked. The + * pin and lock on buf are maintained. */ -static Buffer +static Buffer _bt_split(Relation rel, Buffer buf, OffsetNumber firstright) { - Buffer rbuf; - Page origpage; - Page leftpage, rightpage; - BTPageOpaque ropaque, lopaque, oopaque; - Buffer sbuf; - Page spage; - BTPageOpaque sopaque; - Size itemsz; - ItemId itemid; - BTItem item; - OffsetNumber leftoff, rightoff; - OffsetNumber start; - OffsetNumber maxoff; - OffsetNumber i; - - rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); - origpage = BufferGetPage(buf); - leftpage = PageGetTempPage(origpage, sizeof(BTPageOpaqueData)); - rightpage = BufferGetPage(rbuf); - - _bt_pageinit(rightpage, BufferGetPageSize(rbuf)); - _bt_pageinit(leftpage, BufferGetPageSize(buf)); - - /* init btree private data */ - oopaque = (BTPageOpaque) PageGetSpecialPointer(origpage); - lopaque = (BTPageOpaque) PageGetSpecialPointer(leftpage); - ropaque = (BTPageOpaque) PageGetSpecialPointer(rightpage); - - /* if we're splitting this page, it won't be the root when we're done */ - oopaque->btpo_flags &= ~BTP_ROOT; - oopaque->btpo_flags &= ~BTP_CHAIN; - lopaque->btpo_flags = ropaque->btpo_flags = oopaque->btpo_flags; - lopaque->btpo_prev = oopaque->btpo_prev; - ropaque->btpo_prev = BufferGetBlockNumber(buf); - lopaque->btpo_next = BufferGetBlockNumber(rbuf); - ropaque->btpo_next = oopaque->btpo_next; - - /* - * If the page we're splitting is not the rightmost page at its - * level in the tree, then the first (0) entry on the page is the - * high key for the page. We need to copy that to the right - * half. Otherwise (meaning the rightmost page case), we should - * treat the line pointers beginning at zero as user data. - * - * We leave a blank space at the start of the line table for the - * left page. We'll come back later and fill it in with the high - * key item we get from the right key. - */ - - leftoff = P_FIRSTKEY; - ropaque->btpo_next = oopaque->btpo_next; - if (! P_RIGHTMOST(oopaque)) { - /* splitting a non-rightmost page, start at the first data item */ - start = P_FIRSTKEY; - - itemid = PageGetItemId(origpage, P_HIKEY); - itemsz = ItemIdGetLength(itemid); - item = (BTItem) PageGetItem(origpage, itemid); - if ( PageAddItem(rightpage, (Item) item, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add hikey to the right sibling"); - rightoff = P_FIRSTKEY; - } else { - /* splitting a rightmost page, "high key" is the first data item */ - start = P_HIKEY; - - /* the new rightmost page will not have a high key */ - rightoff = P_HIKEY; - } - maxoff = PageGetMaxOffsetNumber(origpage); - if ( firstright == InvalidOffsetNumber ) - { - Size llimit = PageGetFreeSpace(leftpage) / 2; - firstright = _bt_findsplitloc(rel, origpage, start, maxoff, llimit); - } - - for (i = start; i <= maxoff; i = OffsetNumberNext(i)) { - itemid = PageGetItemId(origpage, i); + Buffer rbuf; + Page origpage; + Page leftpage, + rightpage; + BTPageOpaque ropaque, + lopaque, + oopaque; + Buffer sbuf; + Page spage; + BTPageOpaque sopaque; + Size itemsz; + ItemId itemid; + BTItem item; + OffsetNumber leftoff, + rightoff; + OffsetNumber start; + OffsetNumber maxoff; + OffsetNumber i; + + rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); + origpage = BufferGetPage(buf); + leftpage = PageGetTempPage(origpage, sizeof(BTPageOpaqueData)); + rightpage = BufferGetPage(rbuf); + + _bt_pageinit(rightpage, BufferGetPageSize(rbuf)); + _bt_pageinit(leftpage, BufferGetPageSize(buf)); + + /* init btree private data */ + oopaque = (BTPageOpaque) PageGetSpecialPointer(origpage); + lopaque = (BTPageOpaque) PageGetSpecialPointer(leftpage); + ropaque = (BTPageOpaque) PageGetSpecialPointer(rightpage); + + /* if we're splitting this page, it won't be the root when we're done */ + oopaque->btpo_flags &= ~BTP_ROOT; + oopaque->btpo_flags &= ~BTP_CHAIN; + lopaque->btpo_flags = ropaque->btpo_flags = oopaque->btpo_flags; + lopaque->btpo_prev = oopaque->btpo_prev; + ropaque->btpo_prev = BufferGetBlockNumber(buf); + lopaque->btpo_next = BufferGetBlockNumber(rbuf); + ropaque->btpo_next = oopaque->btpo_next; + + /* + * If the page we're splitting is not the rightmost page at its level + * in the tree, then the first (0) entry on the page is the high key + * for the page. We need to copy that to the right half. Otherwise + * (meaning the rightmost page case), we should treat the line + * pointers beginning at zero as user data. + * + * We leave a blank space at the start of the line table for the left + * page. We'll come back later and fill it in with the high key item + * we get from the right key. + */ + + leftoff = P_FIRSTKEY; + ropaque->btpo_next = oopaque->btpo_next; + if (!P_RIGHTMOST(oopaque)) + { + /* splitting a non-rightmost page, start at the first data item */ + start = P_FIRSTKEY; + + itemid = PageGetItemId(origpage, P_HIKEY); + itemsz = ItemIdGetLength(itemid); + item = (BTItem) PageGetItem(origpage, itemid); + if (PageAddItem(rightpage, (Item) item, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add hikey to the right sibling"); + rightoff = P_FIRSTKEY; + } + else + { + /* splitting a rightmost page, "high key" is the first data item */ + start = P_HIKEY; + + /* the new rightmost page will not have a high key */ + rightoff = P_HIKEY; + } + maxoff = PageGetMaxOffsetNumber(origpage); + if (firstright == InvalidOffsetNumber) + { + Size llimit = PageGetFreeSpace(leftpage) / 2; + + firstright = _bt_findsplitloc(rel, origpage, start, maxoff, llimit); + } + + for (i = start; i <= maxoff; i = OffsetNumberNext(i)) + { + itemid = PageGetItemId(origpage, i); + itemsz = ItemIdGetLength(itemid); + item = (BTItem) PageGetItem(origpage, itemid); + + /* decide which page to put it on */ + if (i < firstright) + { + if (PageAddItem(leftpage, (Item) item, itemsz, leftoff, + LP_USED) == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add item to the left sibling"); + leftoff = OffsetNumberNext(leftoff); + } + else + { + if (PageAddItem(rightpage, (Item) item, itemsz, rightoff, + LP_USED) == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add item to the right sibling"); + rightoff = OffsetNumberNext(rightoff); + } + } + + /* + * Okay, page has been split, high key on right page is correct. Now + * set the high key on the left page to be the min key on the right + * page. + */ + + if (P_RIGHTMOST(ropaque)) + { + itemid = PageGetItemId(rightpage, P_HIKEY); + } + else + { + itemid = PageGetItemId(rightpage, P_FIRSTKEY); + } itemsz = ItemIdGetLength(itemid); - item = (BTItem) PageGetItem(origpage, itemid); - - /* decide which page to put it on */ - if (i < firstright) { - if ( PageAddItem(leftpage, (Item) item, itemsz, leftoff, - LP_USED) == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add item to the left sibling"); - leftoff = OffsetNumberNext(leftoff); - } else { - if ( PageAddItem(rightpage, (Item) item, itemsz, rightoff, - LP_USED) == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add item to the right sibling"); - rightoff = OffsetNumberNext(rightoff); + item = (BTItem) PageGetItem(rightpage, itemid); + + /* + * We left a hole for the high key on the left page; fill it. The + * modal crap is to tell the page manager to put the new item on the + * page and not screw around with anything else. Whoever designed + * this interface has presumably crawled back into the dung heap they + * came from. No one here will admit to it. + */ + + PageManagerModeSet(OverwritePageManagerMode); + if (PageAddItem(leftpage, (Item) item, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add hikey to the left sibling"); + PageManagerModeSet(ShufflePageManagerMode); + + /* + * By here, the original data page has been split into two new halves, + * and these are correct. The algorithm requires that the left page + * never move during a split, so we copy the new left page back on top + * of the original. Note that this is not a waste of time, since we + * also require (in the page management code) that the center of a + * page always be clean, and the most efficient way to guarantee this + * is just to compact the data by reinserting it into a new left page. + */ + + PageRestoreTempPage(leftpage, origpage); + + /* write these guys out */ + _bt_wrtnorelbuf(rel, rbuf); + _bt_wrtnorelbuf(rel, buf); + + /* + * Finally, we need to grab the right sibling (if any) and fix the + * prev pointer there. We are guaranteed that this is deadlock-free + * since no other writer will be moving holding a lock on that page + * and trying to move left, and all readers release locks on a page + * before trying to fetch its neighbors. + */ + + if (!P_RIGHTMOST(ropaque)) + { + sbuf = _bt_getbuf(rel, ropaque->btpo_next, BT_WRITE); + spage = BufferGetPage(sbuf); + sopaque = (BTPageOpaque) PageGetSpecialPointer(spage); + sopaque->btpo_prev = BufferGetBlockNumber(rbuf); + + /* write and release the old right sibling */ + _bt_wrtbuf(rel, sbuf); } - } - - /* - * Okay, page has been split, high key on right page is correct. Now - * set the high key on the left page to be the min key on the right - * page. - */ - - if (P_RIGHTMOST(ropaque)) { - itemid = PageGetItemId(rightpage, P_HIKEY); - } else { - itemid = PageGetItemId(rightpage, P_FIRSTKEY); - } - itemsz = ItemIdGetLength(itemid); - item = (BTItem) PageGetItem(rightpage, itemid); - - /* - * We left a hole for the high key on the left page; fill it. The - * modal crap is to tell the page manager to put the new item on the - * page and not screw around with anything else. Whoever designed - * this interface has presumably crawled back into the dung heap they - * came from. No one here will admit to it. - */ - - PageManagerModeSet(OverwritePageManagerMode); - if ( PageAddItem(leftpage, (Item) item, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add hikey to the left sibling"); - PageManagerModeSet(ShufflePageManagerMode); - - /* - * By here, the original data page has been split into two new halves, - * and these are correct. The algorithm requires that the left page - * never move during a split, so we copy the new left page back on top - * of the original. Note that this is not a waste of time, since we - * also require (in the page management code) that the center of a - * page always be clean, and the most efficient way to guarantee this - * is just to compact the data by reinserting it into a new left page. - */ - - PageRestoreTempPage(leftpage, origpage); - - /* write these guys out */ - _bt_wrtnorelbuf(rel, rbuf); - _bt_wrtnorelbuf(rel, buf); - - /* - * Finally, we need to grab the right sibling (if any) and fix the - * prev pointer there. We are guaranteed that this is deadlock-free - * since no other writer will be moving holding a lock on that page - * and trying to move left, and all readers release locks on a page - * before trying to fetch its neighbors. - */ - - if (! P_RIGHTMOST(ropaque)) { - sbuf = _bt_getbuf(rel, ropaque->btpo_next, BT_WRITE); - spage = BufferGetPage(sbuf); - sopaque = (BTPageOpaque) PageGetSpecialPointer(spage); - sopaque->btpo_prev = BufferGetBlockNumber(rbuf); - - /* write and release the old right sibling */ - _bt_wrtbuf(rel, sbuf); - } - - /* split's done */ - return (rbuf); + + /* split's done */ + return (rbuf); } /* - * _bt_findsplitloc() -- find a safe place to split a page. + * _bt_findsplitloc() -- find a safe place to split a page. * - * In order to guarantee the proper handling of searches for duplicate - * keys, the first duplicate in the chain must either be the first - * item on the page after the split, or the entire chain must be on - * one of the two pages. That is, - * [1 2 2 2 3 4 5] - * must become - * [1] [2 2 2 3 4 5] - * or - * [1 2 2 2] [3 4 5] - * but not - * [1 2 2] [2 3 4 5]. - * However, - * [2 2 2 2 2 3 4] - * may be split as - * [2 2 2 2] [2 3 4]. + * In order to guarantee the proper handling of searches for duplicate + * keys, the first duplicate in the chain must either be the first + * item on the page after the split, or the entire chain must be on + * one of the two pages. That is, + * [1 2 2 2 3 4 5] + * must become + * [1] [2 2 2 3 4 5] + * or + * [1 2 2 2] [3 4 5] + * but not + * [1 2 2] [2 3 4 5]. + * However, + * [2 2 2 2 2 3 4] + * may be split as + * [2 2 2 2] [2 3 4]. */ -static OffsetNumber +static OffsetNumber _bt_findsplitloc(Relation rel, - Page page, - OffsetNumber start, - OffsetNumber maxoff, - Size llimit) + Page page, + OffsetNumber start, + OffsetNumber maxoff, + Size llimit) { - OffsetNumber i; - OffsetNumber saferight; - ItemId nxtitemid, safeitemid; - BTItem safeitem, nxtitem; - Size nbytes; - int natts; - - if ( start >= maxoff ) - elog (FATAL, "btree: cannot split if start (%d) >= maxoff (%d)", - start, maxoff); - natts = rel->rd_rel->relnatts; - saferight = start; - safeitemid = PageGetItemId(page, saferight); - nbytes = ItemIdGetLength(safeitemid) + sizeof(ItemIdData); - safeitem = (BTItem) PageGetItem(page, safeitemid); - - i = OffsetNumberNext(start); - - while (nbytes < llimit) - { - /* check the next item on the page */ - nxtitemid = PageGetItemId(page, i); - nbytes += (ItemIdGetLength(nxtitemid) + sizeof(ItemIdData)); - nxtitem = (BTItem) PageGetItem(page, nxtitemid); - - /* - * Test against last known safe item: - * if the tuple we're looking at isn't equal to the last safe - * one we saw, then it's our new safe tuple. - */ - if ( !_bt_itemcmp (rel, natts, - safeitem, nxtitem, BTEqualStrategyNumber) ) + OffsetNumber i; + OffsetNumber saferight; + ItemId nxtitemid, + safeitemid; + BTItem safeitem, + nxtitem; + Size nbytes; + int natts; + + if (start >= maxoff) + elog(FATAL, "btree: cannot split if start (%d) >= maxoff (%d)", + start, maxoff); + natts = rel->rd_rel->relnatts; + saferight = start; + safeitemid = PageGetItemId(page, saferight); + nbytes = ItemIdGetLength(safeitemid) + sizeof(ItemIdData); + safeitem = (BTItem) PageGetItem(page, safeitemid); + + i = OffsetNumberNext(start); + + while (nbytes < llimit) { - safeitem = nxtitem; - saferight = i; + /* check the next item on the page */ + nxtitemid = PageGetItemId(page, i); + nbytes += (ItemIdGetLength(nxtitemid) + sizeof(ItemIdData)); + nxtitem = (BTItem) PageGetItem(page, nxtitemid); + + /* + * Test against last known safe item: if the tuple we're looking + * at isn't equal to the last safe one we saw, then it's our new + * safe tuple. + */ + if (!_bt_itemcmp(rel, natts, + safeitem, nxtitem, BTEqualStrategyNumber)) + { + safeitem = nxtitem; + saferight = i; + } + if (i < maxoff) + i = OffsetNumberNext(i); + else + break; } - if ( i < maxoff ) - i = OffsetNumberNext(i); - else - break; - } - - /* - * If the chain of dups starts at the beginning of the page and extends - * past the halfway mark, we can split it in the middle. - */ - - if (saferight == start) - saferight = i; - - if ( saferight == maxoff && ( maxoff - start ) > 1 ) - saferight = start + ( maxoff - start ) / 2; - - return (saferight); + + /* + * If the chain of dups starts at the beginning of the page and + * extends past the halfway mark, we can split it in the middle. + */ + + if (saferight == start) + saferight = i; + + if (saferight == maxoff && (maxoff - start) > 1) + saferight = start + (maxoff - start) / 2; + + return (saferight); } /* - * _bt_newroot() -- Create a new root page for the index. + * _bt_newroot() -- Create a new root page for the index. * - * We've just split the old root page and need to create a new one. - * In order to do this, we add a new root page to the file, then lock - * the metadata page and update it. This is guaranteed to be deadlock- - * free, because all readers release their locks on the metadata page - * before trying to lock the root, and all writers lock the root before - * trying to lock the metadata page. We have a write lock on the old - * root page, so we have not introduced any cycles into the waits-for - * graph. + * We've just split the old root page and need to create a new one. + * In order to do this, we add a new root page to the file, then lock + * the metadata page and update it. This is guaranteed to be deadlock- + * free, because all readers release their locks on the metadata page + * before trying to lock the root, and all writers lock the root before + * trying to lock the metadata page. We have a write lock on the old + * root page, so we have not introduced any cycles into the waits-for + * graph. * - * On entry, lbuf (the old root) and rbuf (its new peer) are write- - * locked. We don't drop the locks in this routine; that's done by - * the caller. On exit, a new root page exists with entries for the - * two new children. The new root page is neither pinned nor locked. + * On entry, lbuf (the old root) and rbuf (its new peer) are write- + * locked. We don't drop the locks in this routine; that's done by + * the caller. On exit, a new root page exists with entries for the + * two new children. The new root page is neither pinned nor locked. */ static void _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) { - Buffer rootbuf; - Page lpage, rpage, rootpage; - BlockNumber lbkno, rbkno; - BlockNumber rootbknum; - BTPageOpaque rootopaque; - ItemId itemid; - BTItem item; - Size itemsz; - BTItem new_item; - - /* get a new root page */ - rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); - rootpage = BufferGetPage(rootbuf); - _bt_pageinit(rootpage, BufferGetPageSize(rootbuf)); - - /* set btree special data */ - rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); - rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE; - rootopaque->btpo_flags |= BTP_ROOT; - - /* - * Insert the internal tuple pointers. - */ - - lbkno = BufferGetBlockNumber(lbuf); - rbkno = BufferGetBlockNumber(rbuf); - lpage = BufferGetPage(lbuf); - rpage = BufferGetPage(rbuf); - - /* - * step over the high key on the left page while building the - * left page pointer. - */ - itemid = PageGetItemId(lpage, P_FIRSTKEY); - itemsz = ItemIdGetLength(itemid); - item = (BTItem) PageGetItem(lpage, itemid); - new_item = _bt_formitem(&(item->bti_itup)); - ItemPointerSet(&(new_item->bti_itup.t_tid), lbkno, P_HIKEY); - - /* - * insert the left page pointer into the new root page. the root - * page is the rightmost page on its level so the "high key" item - * is the first data item. - */ - if ( PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add leftkey to new root page"); - pfree(new_item); - - /* - * the right page is the rightmost page on the second level, so - * the "high key" item is the first data item on that page as well. - */ - itemid = PageGetItemId(rpage, P_HIKEY); - itemsz = ItemIdGetLength(itemid); - item = (BTItem) PageGetItem(rpage, itemid); - new_item = _bt_formitem(&(item->bti_itup)); - ItemPointerSet(&(new_item->bti_itup.t_tid), rbkno, P_HIKEY); - - /* - * insert the right page pointer into the new root page. - */ - if ( PageAddItem(rootpage, (Item) new_item, itemsz, P_FIRSTKEY, LP_USED) == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add rightkey to new root page"); - pfree(new_item); - - /* write and let go of the root buffer */ - rootbknum = BufferGetBlockNumber(rootbuf); - _bt_wrtbuf(rel, rootbuf); - - /* update metadata page with new root block number */ - _bt_metaproot(rel, rootbknum, 0); + Buffer rootbuf; + Page lpage, + rpage, + rootpage; + BlockNumber lbkno, + rbkno; + BlockNumber rootbknum; + BTPageOpaque rootopaque; + ItemId itemid; + BTItem item; + Size itemsz; + BTItem new_item; + + /* get a new root page */ + rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); + rootpage = BufferGetPage(rootbuf); + _bt_pageinit(rootpage, BufferGetPageSize(rootbuf)); + + /* set btree special data */ + rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); + rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE; + rootopaque->btpo_flags |= BTP_ROOT; + + /* + * Insert the internal tuple pointers. + */ + + lbkno = BufferGetBlockNumber(lbuf); + rbkno = BufferGetBlockNumber(rbuf); + lpage = BufferGetPage(lbuf); + rpage = BufferGetPage(rbuf); + + /* + * step over the high key on the left page while building the left + * page pointer. + */ + itemid = PageGetItemId(lpage, P_FIRSTKEY); + itemsz = ItemIdGetLength(itemid); + item = (BTItem) PageGetItem(lpage, itemid); + new_item = _bt_formitem(&(item->bti_itup)); + ItemPointerSet(&(new_item->bti_itup.t_tid), lbkno, P_HIKEY); + + /* + * insert the left page pointer into the new root page. the root page + * is the rightmost page on its level so the "high key" item is the + * first data item. + */ + if (PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add leftkey to new root page"); + pfree(new_item); + + /* + * the right page is the rightmost page on the second level, so the + * "high key" item is the first data item on that page as well. + */ + itemid = PageGetItemId(rpage, P_HIKEY); + itemsz = ItemIdGetLength(itemid); + item = (BTItem) PageGetItem(rpage, itemid); + new_item = _bt_formitem(&(item->bti_itup)); + ItemPointerSet(&(new_item->bti_itup.t_tid), rbkno, P_HIKEY); + + /* + * insert the right page pointer into the new root page. + */ + if (PageAddItem(rootpage, (Item) new_item, itemsz, P_FIRSTKEY, LP_USED) == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add rightkey to new root page"); + pfree(new_item); + + /* write and let go of the root buffer */ + rootbknum = BufferGetBlockNumber(rootbuf); + _bt_wrtbuf(rel, rootbuf); + + /* update metadata page with new root block number */ + _bt_metaproot(rel, rootbknum, 0); } /* - * _bt_pgaddtup() -- add a tuple to a particular page in the index. + * _bt_pgaddtup() -- add a tuple to a particular page in the index. * - * This routine adds the tuple to the page as requested, and keeps the - * write lock and reference associated with the page's buffer. It is - * an error to call pgaddtup() without a write lock and reference. If - * afteritem is non-null, it's the item that we expect our new item - * to follow. Otherwise, we do a binary search for the correct place - * and insert the new item there. + * This routine adds the tuple to the page as requested, and keeps the + * write lock and reference associated with the page's buffer. It is + * an error to call pgaddtup() without a write lock and reference. If + * afteritem is non-null, it's the item that we expect our new item + * to follow. Otherwise, we do a binary search for the correct place + * and insert the new item there. */ -static OffsetNumber +static OffsetNumber _bt_pgaddtup(Relation rel, - Buffer buf, - int keysz, - ScanKey itup_scankey, - Size itemsize, - BTItem btitem, - BTItem afteritem) + Buffer buf, + int keysz, + ScanKey itup_scankey, + Size itemsize, + BTItem btitem, + BTItem afteritem) { - OffsetNumber itup_off; - OffsetNumber first; - Page page; - BTPageOpaque opaque; - BTItem chkitem; - - page = BufferGetPage(buf); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - first = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - - if (afteritem == (BTItem) NULL) { - itup_off = _bt_binsrch(rel, buf, keysz, itup_scankey, BT_INSERTION); - } else { - itup_off = first; - - do { - chkitem = - (BTItem) PageGetItem(page, PageGetItemId(page, itup_off)); - itup_off = OffsetNumberNext(itup_off); - } while ( ! BTItemSame (chkitem, afteritem) ); - } - - if ( PageAddItem(page, (Item) btitem, itemsize, itup_off, LP_USED) == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add item to the page"); - - /* write the buffer, but hold our lock */ - _bt_wrtnorelbuf(rel, buf); - - return (itup_off); + OffsetNumber itup_off; + OffsetNumber first; + Page page; + BTPageOpaque opaque; + BTItem chkitem; + + page = BufferGetPage(buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + first = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + + if (afteritem == (BTItem) NULL) + { + itup_off = _bt_binsrch(rel, buf, keysz, itup_scankey, BT_INSERTION); + } + else + { + itup_off = first; + + do + { + chkitem = + (BTItem) PageGetItem(page, PageGetItemId(page, itup_off)); + itup_off = OffsetNumberNext(itup_off); + } while (!BTItemSame(chkitem, afteritem)); + } + + if (PageAddItem(page, (Item) btitem, itemsize, itup_off, LP_USED) == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add item to the page"); + + /* write the buffer, but hold our lock */ + _bt_wrtnorelbuf(rel, buf); + + return (itup_off); } /* - * _bt_goesonpg() -- Does a new tuple belong on this page? + * _bt_goesonpg() -- Does a new tuple belong on this page? * - * This is part of the complexity introduced by allowing duplicate - * keys into the index. The tuple belongs on this page if: + * This is part of the complexity introduced by allowing duplicate + * keys into the index. The tuple belongs on this page if: * - * + there is no page to the right of this one; or - * + it is less than the high key on the page; or - * + the item it is to follow ("afteritem") appears on this - * page. + * + there is no page to the right of this one; or + * + it is less than the high key on the page; or + * + the item it is to follow ("afteritem") appears on this + * page. */ -static bool +static bool _bt_goesonpg(Relation rel, - Buffer buf, - Size keysz, - ScanKey scankey, - BTItem afteritem) + Buffer buf, + Size keysz, + ScanKey scankey, + BTItem afteritem) { - Page page; - ItemId hikey; - BTPageOpaque opaque; - BTItem chkitem; - OffsetNumber offnum, maxoff; - bool found; - - page = BufferGetPage(buf); - - /* no right neighbor? */ - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - if (P_RIGHTMOST(opaque)) - return (true); - - /* - * this is a non-rightmost page, so it must have a high key item. - * - * If the scan key is < the high key (the min key on the next page), - * then it for sure belongs here. - */ - hikey = PageGetItemId(page, P_HIKEY); - if (_bt_skeycmp(rel, keysz, scankey, page, hikey, BTLessStrategyNumber)) - return (true); - - /* - * If the scan key is > the high key, then it for sure doesn't belong - * here. - */ - - if (_bt_skeycmp(rel, keysz, scankey, page, hikey, BTGreaterStrategyNumber)) - return (false); - - /* - * If we have no adjacency information, and the item is equal to the - * high key on the page (by here it is), then the item does not belong - * on this page. - * - * Now it's not true in all cases. - vadim 06/10/97 - */ - - if (afteritem == (BTItem) NULL) - { - if ( opaque->btpo_flags & BTP_LEAF ) - return (false); - if ( opaque->btpo_flags & BTP_CHAIN ) - return (true); - if ( _bt_skeycmp (rel, keysz, scankey, page, - PageGetItemId(page, P_FIRSTKEY), - BTEqualStrategyNumber) ) - return (true); - return (false); - } - - /* damn, have to work for it. i hate that. */ - maxoff = PageGetMaxOffsetNumber(page); - - /* - * Search the entire page for the afteroid. We need to do this, rather - * than doing a binary search and starting from there, because if the - * key we're searching for is the leftmost key in the tree at this - * level, then a binary search will do the wrong thing. Splits are - * pretty infrequent, so the cost isn't as bad as it could be. - */ - - found = false; - for (offnum = P_FIRSTKEY; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) { - chkitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); - - if ( BTItemSame (chkitem, afteritem) ) { - found = true; - break; + Page page; + ItemId hikey; + BTPageOpaque opaque; + BTItem chkitem; + OffsetNumber offnum, + maxoff; + bool found; + + page = BufferGetPage(buf); + + /* no right neighbor? */ + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + if (P_RIGHTMOST(opaque)) + return (true); + + /* + * this is a non-rightmost page, so it must have a high key item. + * + * If the scan key is < the high key (the min key on the next page), then + * it for sure belongs here. + */ + hikey = PageGetItemId(page, P_HIKEY); + if (_bt_skeycmp(rel, keysz, scankey, page, hikey, BTLessStrategyNumber)) + return (true); + + /* + * If the scan key is > the high key, then it for sure doesn't belong + * here. + */ + + if (_bt_skeycmp(rel, keysz, scankey, page, hikey, BTGreaterStrategyNumber)) + return (false); + + /* + * If we have no adjacency information, and the item is equal to the + * high key on the page (by here it is), then the item does not belong + * on this page. + * + * Now it's not true in all cases. - vadim 06/10/97 + */ + + if (afteritem == (BTItem) NULL) + { + if (opaque->btpo_flags & BTP_LEAF) + return (false); + if (opaque->btpo_flags & BTP_CHAIN) + return (true); + if (_bt_skeycmp(rel, keysz, scankey, page, + PageGetItemId(page, P_FIRSTKEY), + BTEqualStrategyNumber)) + return (true); + return (false); + } + + /* damn, have to work for it. i hate that. */ + maxoff = PageGetMaxOffsetNumber(page); + + /* + * Search the entire page for the afteroid. We need to do this, + * rather than doing a binary search and starting from there, because + * if the key we're searching for is the leftmost key in the tree at + * this level, then a binary search will do the wrong thing. Splits + * are pretty infrequent, so the cost isn't as bad as it could be. + */ + + found = false; + for (offnum = P_FIRSTKEY; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + chkitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); + + if (BTItemSame(chkitem, afteritem)) + { + found = true; + break; + } } - } - - return (found); + + return (found); } /* - * _bt_itemcmp() -- compare item1 to item2 using a requested - * strategy (<, <=, =, >=, >) + * _bt_itemcmp() -- compare item1 to item2 using a requested + * strategy (<, <=, =, >=, >) * */ bool _bt_itemcmp(Relation rel, - Size keysz, - BTItem item1, - BTItem item2, - StrategyNumber strat) + Size keysz, + BTItem item1, + BTItem item2, + StrategyNumber strat) { - TupleDesc tupDes; - IndexTuple indexTuple1, indexTuple2; - Datum attrDatum1, attrDatum2; - int i; - bool isFirstNull, isSecondNull; - bool compare; - bool useEqual = false; - - if ( strat == BTLessEqualStrategyNumber ) - { - useEqual = true; - strat = BTLessStrategyNumber; - } - else if ( strat == BTGreaterEqualStrategyNumber ) - { - useEqual = true; - strat = BTGreaterStrategyNumber; - } - - tupDes = RelationGetTupleDescriptor(rel); - indexTuple1 = &(item1->bti_itup); - indexTuple2 = &(item2->bti_itup); - - for (i = 1; i <= keysz; i++) { - attrDatum1 = index_getattr(indexTuple1, i, tupDes, &isFirstNull); - attrDatum2 = index_getattr(indexTuple2, i, tupDes, &isSecondNull); - - /* see comments about NULLs handling in btbuild */ - if ( isFirstNull ) /* attr in item1 is NULL */ + TupleDesc tupDes; + IndexTuple indexTuple1, + indexTuple2; + Datum attrDatum1, + attrDatum2; + int i; + bool isFirstNull, + isSecondNull; + bool compare; + bool useEqual = false; + + if (strat == BTLessEqualStrategyNumber) { - if ( isSecondNull ) /* attr in item2 is NULL too */ - compare = ( strat == BTEqualStrategyNumber ) ? true : false; - else - compare = ( strat == BTGreaterStrategyNumber ) ? true : false; - } - else if ( isSecondNull ) /* attr in item1 is NOT_NULL and */ - { /* and attr in item2 is NULL */ - compare = ( strat == BTLessStrategyNumber ) ? true : false; - } - else - { - compare = _bt_invokestrat(rel, i, strat, attrDatum1, attrDatum2); + useEqual = true; + strat = BTLessStrategyNumber; } - - if ( compare ) /* true for one of ">, <, =" */ + else if (strat == BTGreaterEqualStrategyNumber) { - if ( strat != BTEqualStrategyNumber ) - return (true); + useEqual = true; + strat = BTGreaterStrategyNumber; } - else /* false for one of ">, <, =" */ + + tupDes = RelationGetTupleDescriptor(rel); + indexTuple1 = &(item1->bti_itup); + indexTuple2 = &(item2->bti_itup); + + for (i = 1; i <= keysz; i++) { - if ( strat == BTEqualStrategyNumber ) - return (false); - /* - * if original strat was "<=, >=" OR - * "<, >" but some attribute(s) left - * - need to test for Equality - */ - if ( useEqual || i < keysz ) - { - if ( isFirstNull || isSecondNull ) - compare = ( isFirstNull && isSecondNull ) ? true : false; - else - compare = _bt_invokestrat(rel, i, BTEqualStrategyNumber, - attrDatum1, attrDatum2); - if ( compare ) /* item1' and item2' attributes are equal */ - continue; /* - try to compare next attributes */ - } - return (false); + attrDatum1 = index_getattr(indexTuple1, i, tupDes, &isFirstNull); + attrDatum2 = index_getattr(indexTuple2, i, tupDes, &isSecondNull); + + /* see comments about NULLs handling in btbuild */ + if (isFirstNull) /* attr in item1 is NULL */ + { + if (isSecondNull) /* attr in item2 is NULL too */ + compare = (strat == BTEqualStrategyNumber) ? true : false; + else + compare = (strat == BTGreaterStrategyNumber) ? true : false; + } + else if (isSecondNull) /* attr in item1 is NOT_NULL and */ + { /* and attr in item2 is NULL */ + compare = (strat == BTLessStrategyNumber) ? true : false; + } + else + { + compare = _bt_invokestrat(rel, i, strat, attrDatum1, attrDatum2); + } + + if (compare) /* true for one of ">, <, =" */ + { + if (strat != BTEqualStrategyNumber) + return (true); + } + else +/* false for one of ">, <, =" */ + { + if (strat == BTEqualStrategyNumber) + return (false); + + /* + * if original strat was "<=, >=" OR "<, >" but some + * attribute(s) left - need to test for Equality + */ + if (useEqual || i < keysz) + { + if (isFirstNull || isSecondNull) + compare = (isFirstNull && isSecondNull) ? true : false; + else + compare = _bt_invokestrat(rel, i, BTEqualStrategyNumber, + attrDatum1, attrDatum2); + if (compare) /* item1' and item2' attributes are equal */ + continue; /* - try to compare next attributes */ + } + return (false); + } } - } - return (true); + return (true); } /* - * _bt_updateitem() -- updates the key of the item identified by the - * oid with the key of newItem (done in place if - * possible) + * _bt_updateitem() -- updates the key of the item identified by the + * oid with the key of newItem (done in place if + * possible) * */ static void _bt_updateitem(Relation rel, - Size keysz, - Buffer buf, - BTItem oldItem, - BTItem newItem) + Size keysz, + Buffer buf, + BTItem oldItem, + BTItem newItem) { - Page page; - OffsetNumber maxoff; - OffsetNumber i; - ItemPointerData itemPtrData; - BTItem item; - IndexTuple oldIndexTuple, newIndexTuple; - int first; - - page = BufferGetPage(buf); - maxoff = PageGetMaxOffsetNumber(page); - - /* locate item on the page */ - first = P_RIGHTMOST((BTPageOpaque) PageGetSpecialPointer(page)) - ? P_HIKEY : P_FIRSTKEY; - i = first; - do { - item = (BTItem) PageGetItem(page, PageGetItemId(page, i)); - i = OffsetNumberNext(i); - } while (i <= maxoff && ! BTItemSame (item, oldItem)); - - /* this should never happen (in theory) */ - if ( ! BTItemSame (item, oldItem) ) { - elog(FATAL, "_bt_getstackbuf was lying!!"); - } - - /* - * It's defined by caller (_bt_insertonpg) - */ - /* - if(IndexTupleDSize(newItem->bti_itup) > - IndexTupleDSize(item->bti_itup)) { - elog(NOTICE, "trying to overwrite a smaller value with a bigger one in _bt_updateitem"); - elog(WARN, "this is not good."); - } - */ - - oldIndexTuple = &(item->bti_itup); - newIndexTuple = &(newItem->bti_itup); + Page page; + OffsetNumber maxoff; + OffsetNumber i; + ItemPointerData itemPtrData; + BTItem item; + IndexTuple oldIndexTuple, + newIndexTuple; + int first; + + page = BufferGetPage(buf); + maxoff = PageGetMaxOffsetNumber(page); + + /* locate item on the page */ + first = P_RIGHTMOST((BTPageOpaque) PageGetSpecialPointer(page)) + ? P_HIKEY : P_FIRSTKEY; + i = first; + do + { + item = (BTItem) PageGetItem(page, PageGetItemId(page, i)); + i = OffsetNumberNext(i); + } while (i <= maxoff && !BTItemSame(item, oldItem)); + + /* this should never happen (in theory) */ + if (!BTItemSame(item, oldItem)) + { + elog(FATAL, "_bt_getstackbuf was lying!!"); + } + + /* + * It's defined by caller (_bt_insertonpg) + */ + + /* + * if(IndexTupleDSize(newItem->bti_itup) > + * IndexTupleDSize(item->bti_itup)) { elog(NOTICE, "trying to + * overwrite a smaller value with a bigger one in _bt_updateitem"); + * elog(WARN, "this is not good."); } + */ + + oldIndexTuple = &(item->bti_itup); + newIndexTuple = &(newItem->bti_itup); /* keep the original item pointer */ - ItemPointerCopy(&(oldIndexTuple->t_tid), &itemPtrData); - CopyIndexTuple(newIndexTuple, &oldIndexTuple); - ItemPointerCopy(&itemPtrData, &(oldIndexTuple->t_tid)); - + ItemPointerCopy(&(oldIndexTuple->t_tid), &itemPtrData); + CopyIndexTuple(newIndexTuple, &oldIndexTuple); + ItemPointerCopy(&itemPtrData, &(oldIndexTuple->t_tid)); + } /* @@ -1409,177 +1460,179 @@ _bt_updateitem(Relation rel, * * Rule is simple: NOT_NULL not equal NULL, NULL not_equal NULL too. */ -static bool -_bt_isequal (TupleDesc itupdesc, Page page, OffsetNumber offnum, - int keysz, ScanKey scankey) +static bool +_bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum, + int keysz, ScanKey scankey) { - Datum datum; - BTItem btitem; - IndexTuple itup; - ScanKey entry; - AttrNumber attno; - long result; - int i; - bool null; - - btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); - itup = &(btitem->bti_itup); - - for (i = 1; i <= keysz; i++) - { - entry = &scankey[i - 1]; - attno = entry->sk_attno; - Assert (attno == i); - datum = index_getattr(itup, attno, itupdesc, &null); - - /* NULLs are not equal */ - if ( entry->sk_flags & SK_ISNULL || null ) - return (false); - - result = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure, - entry->sk_argument, datum); - if (result != 0) - return (false); - } - - /* by here, the keys are equal */ - return (true); + Datum datum; + BTItem btitem; + IndexTuple itup; + ScanKey entry; + AttrNumber attno; + long result; + int i; + bool null; + + btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &(btitem->bti_itup); + + for (i = 1; i <= keysz; i++) + { + entry = &scankey[i - 1]; + attno = entry->sk_attno; + Assert(attno == i); + datum = index_getattr(itup, attno, itupdesc, &null); + + /* NULLs are not equal */ + if (entry->sk_flags & SK_ISNULL || null) + return (false); + + result = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure, + entry->sk_argument, datum); + if (result != 0) + return (false); + } + + /* by here, the keys are equal */ + return (true); } #ifdef NOT_USED /* - * _bt_shift - insert btitem on the passed page after shifting page - * to the right in the tree. + * _bt_shift - insert btitem on the passed page after shifting page + * to the right in the tree. * * NOTE: tested for shifting leftmost page only, having btitem < hikey. */ -static InsertIndexResult -_bt_shift (Relation rel, Buffer buf, BTStack stack, int keysz, - ScanKey scankey, BTItem btitem, BTItem hikey) +static InsertIndexResult +_bt_shift(Relation rel, Buffer buf, BTStack stack, int keysz, + ScanKey scankey, BTItem btitem, BTItem hikey) { - InsertIndexResult res; - int itemsz; - Page page; - BlockNumber bknum; - BTPageOpaque pageop; - Buffer rbuf; - Page rpage; - BTPageOpaque rpageop; - Buffer pbuf; - Page ppage; - BTPageOpaque ppageop; - Buffer nbuf; - Page npage; - BTPageOpaque npageop; - BlockNumber nbknum; - BTItem nitem; - OffsetNumber afteroff; - - btitem = _bt_formitem(&(btitem->bti_itup)); - hikey = _bt_formitem(&(hikey->bti_itup)); - - page = BufferGetPage(buf); - - /* grab new page */ - nbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); - nbknum = BufferGetBlockNumber(nbuf); - npage = BufferGetPage(nbuf); - _bt_pageinit(npage, BufferGetPageSize(nbuf)); - npageop = (BTPageOpaque) PageGetSpecialPointer(npage); - - /* copy content of the passed page */ - memmove ((char *) npage, (char *) page, BufferGetPageSize(buf)); - - /* re-init old (passed) page */ - _bt_pageinit(page, BufferGetPageSize(buf)); - pageop = (BTPageOpaque) PageGetSpecialPointer(page); - - /* init old page opaque */ - pageop->btpo_flags = npageop->btpo_flags; /* restore flags */ - pageop->btpo_flags &= ~BTP_CHAIN; - if ( _bt_itemcmp (rel, keysz, hikey, btitem, BTEqualStrategyNumber) ) - pageop->btpo_flags |= BTP_CHAIN; - pageop->btpo_prev = npageop->btpo_prev; /* restore prev */ - pageop->btpo_next = nbknum; /* next points to the new page */ - - /* init shifted page opaque */ - npageop->btpo_prev = bknum = BufferGetBlockNumber(buf); - - /* shifted page is ok, populate old page */ - - /* add passed hikey */ - itemsz = IndexTupleDSize(hikey->bti_itup) - + (sizeof(BTItemData) - sizeof(IndexTupleData)); - itemsz = DOUBLEALIGN(itemsz); - if ( PageAddItem(page, (Item) hikey, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add hikey in _bt_shift"); - pfree (hikey); - - /* add btitem */ - itemsz = IndexTupleDSize(btitem->bti_itup) - + (sizeof(BTItemData) - sizeof(IndexTupleData)); - itemsz = DOUBLEALIGN(itemsz); - if ( PageAddItem(page, (Item) btitem, itemsz, P_FIRSTKEY, LP_USED) == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add firstkey in _bt_shift"); - pfree (btitem); - nitem = (BTItem) PageGetItem(page, PageGetItemId(page, P_FIRSTKEY)); - btitem = _bt_formitem(&(nitem->bti_itup)); - ItemPointerSet(&(btitem->bti_itup.t_tid), bknum, P_HIKEY); - - /* ok, write them out */ - _bt_wrtnorelbuf(rel, nbuf); - _bt_wrtnorelbuf(rel, buf); - - /* fix btpo_prev on right sibling of old page */ - if ( !P_RIGHTMOST (npageop) ) - { - rbuf = _bt_getbuf(rel, npageop->btpo_next, BT_WRITE); - rpage = BufferGetPage(rbuf); - rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage); - rpageop->btpo_prev = nbknum; - _bt_wrtbuf(rel, rbuf); - } - - /* get parent pointing to the old page */ - ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), - bknum, P_HIKEY); - pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); - ppage = BufferGetPage(pbuf); - ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage); - - _bt_relbuf(rel, nbuf, BT_WRITE); - _bt_relbuf(rel, buf, BT_WRITE); - - /* re-set parent' pointer - we shifted our page to the right ! */ - nitem = (BTItem) PageGetItem (ppage, - PageGetItemId (ppage, stack->bts_offset)); - ItemPointerSet(&(nitem->bti_itup.t_tid), nbknum, P_HIKEY); - ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), nbknum, P_HIKEY); - _bt_wrtnorelbuf(rel, pbuf); - - /* - * Now we want insert into the parent pointer to our old page. It has to - * be inserted before the pointer to new page. You may get problems here - * (in the _bt_goesonpg and/or _bt_pgaddtup), but may be not - I don't - * know. It works if old page is leftmost (nitem is NULL) and - * btitem < hikey and it's all what we need currently. - vadim 05/30/97 - */ - nitem = NULL; - afteroff = P_FIRSTKEY; - if ( !P_RIGHTMOST (ppageop) ) - afteroff = OffsetNumberNext (afteroff); - if ( stack->bts_offset >= afteroff ) - { - afteroff = OffsetNumberPrev (stack->bts_offset); - nitem = (BTItem) PageGetItem (ppage, PageGetItemId (ppage, afteroff)); - nitem = _bt_formitem(&(nitem->bti_itup)); - } - res = _bt_insertonpg(rel, pbuf, stack->bts_parent, - keysz, scankey, btitem, nitem); - pfree (btitem); - - ItemPointerSet(&(res->pointerData), nbknum, P_HIKEY); - - return (res); + InsertIndexResult res; + int itemsz; + Page page; + BlockNumber bknum; + BTPageOpaque pageop; + Buffer rbuf; + Page rpage; + BTPageOpaque rpageop; + Buffer pbuf; + Page ppage; + BTPageOpaque ppageop; + Buffer nbuf; + Page npage; + BTPageOpaque npageop; + BlockNumber nbknum; + BTItem nitem; + OffsetNumber afteroff; + + btitem = _bt_formitem(&(btitem->bti_itup)); + hikey = _bt_formitem(&(hikey->bti_itup)); + + page = BufferGetPage(buf); + + /* grab new page */ + nbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); + nbknum = BufferGetBlockNumber(nbuf); + npage = BufferGetPage(nbuf); + _bt_pageinit(npage, BufferGetPageSize(nbuf)); + npageop = (BTPageOpaque) PageGetSpecialPointer(npage); + + /* copy content of the passed page */ + memmove((char *) npage, (char *) page, BufferGetPageSize(buf)); + + /* re-init old (passed) page */ + _bt_pageinit(page, BufferGetPageSize(buf)); + pageop = (BTPageOpaque) PageGetSpecialPointer(page); + + /* init old page opaque */ + pageop->btpo_flags = npageop->btpo_flags; /* restore flags */ + pageop->btpo_flags &= ~BTP_CHAIN; + if (_bt_itemcmp(rel, keysz, hikey, btitem, BTEqualStrategyNumber)) + pageop->btpo_flags |= BTP_CHAIN; + pageop->btpo_prev = npageop->btpo_prev; /* restore prev */ + pageop->btpo_next = nbknum; /* next points to the new page */ + + /* init shifted page opaque */ + npageop->btpo_prev = bknum = BufferGetBlockNumber(buf); + + /* shifted page is ok, populate old page */ + + /* add passed hikey */ + itemsz = IndexTupleDSize(hikey->bti_itup) + + (sizeof(BTItemData) - sizeof(IndexTupleData)); + itemsz = DOUBLEALIGN(itemsz); + if (PageAddItem(page, (Item) hikey, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add hikey in _bt_shift"); + pfree(hikey); + + /* add btitem */ + itemsz = IndexTupleDSize(btitem->bti_itup) + + (sizeof(BTItemData) - sizeof(IndexTupleData)); + itemsz = DOUBLEALIGN(itemsz); + if (PageAddItem(page, (Item) btitem, itemsz, P_FIRSTKEY, LP_USED) == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add firstkey in _bt_shift"); + pfree(btitem); + nitem = (BTItem) PageGetItem(page, PageGetItemId(page, P_FIRSTKEY)); + btitem = _bt_formitem(&(nitem->bti_itup)); + ItemPointerSet(&(btitem->bti_itup.t_tid), bknum, P_HIKEY); + + /* ok, write them out */ + _bt_wrtnorelbuf(rel, nbuf); + _bt_wrtnorelbuf(rel, buf); + + /* fix btpo_prev on right sibling of old page */ + if (!P_RIGHTMOST(npageop)) + { + rbuf = _bt_getbuf(rel, npageop->btpo_next, BT_WRITE); + rpage = BufferGetPage(rbuf); + rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage); + rpageop->btpo_prev = nbknum; + _bt_wrtbuf(rel, rbuf); + } + + /* get parent pointing to the old page */ + ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), + bknum, P_HIKEY); + pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); + ppage = BufferGetPage(pbuf); + ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage); + + _bt_relbuf(rel, nbuf, BT_WRITE); + _bt_relbuf(rel, buf, BT_WRITE); + + /* re-set parent' pointer - we shifted our page to the right ! */ + nitem = (BTItem) PageGetItem(ppage, + PageGetItemId(ppage, stack->bts_offset)); + ItemPointerSet(&(nitem->bti_itup.t_tid), nbknum, P_HIKEY); + ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), nbknum, P_HIKEY); + _bt_wrtnorelbuf(rel, pbuf); + + /* + * Now we want insert into the parent pointer to our old page. It has + * to be inserted before the pointer to new page. You may get problems + * here (in the _bt_goesonpg and/or _bt_pgaddtup), but may be not - I + * don't know. It works if old page is leftmost (nitem is NULL) and + * btitem < hikey and it's all what we need currently. - vadim + * 05/30/97 + */ + nitem = NULL; + afteroff = P_FIRSTKEY; + if (!P_RIGHTMOST(ppageop)) + afteroff = OffsetNumberNext(afteroff); + if (stack->bts_offset >= afteroff) + { + afteroff = OffsetNumberPrev(stack->bts_offset); + nitem = (BTItem) PageGetItem(ppage, PageGetItemId(ppage, afteroff)); + nitem = _bt_formitem(&(nitem->bti_itup)); + } + res = _bt_insertonpg(rel, pbuf, stack->bts_parent, + keysz, scankey, btitem, nitem); + pfree(btitem); + + ItemPointerSet(&(res->pointerData), nbknum, P_HIKEY); + + return (res); } + #endif diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 9142c55737..6551af4c17 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -1,21 +1,21 @@ /*------------------------------------------------------------------------- * * nbtpage.c-- - * BTree-specific page management code for the Postgres btree access - * method. + * BTree-specific page management code for the Postgres btree access + * method. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.9 1997/08/19 21:29:36 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.10 1997/09/07 04:38:52 momjian Exp $ * - * NOTES - * Postgres btree pages look like ordinary relation pages. The opaque - * data at high addresses includes pointers to left and right siblings - * and flag data describing page state. The first page in a btree, page - * zero, is special -- it stores meta-information describing the tree. - * Pages one and higher store the actual tree data. + * NOTES + * Postgres btree pages look like ordinary relation pages. The opaque + * data at high addresses includes pointers to left and right siblings + * and flag data describing page state. The first page in a btree, page + * zero, is special -- it stores meta-information describing the tree. + * Pages one and higher store the actual tree data. * *------------------------------------------------------------------------- */ @@ -31,16 +31,16 @@ #include <storage/lmgr.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static void _bt_setpagelock(Relation rel, BlockNumber blkno, int access); -static void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access); +static void _bt_setpagelock(Relation rel, BlockNumber blkno, int access); +static void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access); #define BTREE_METAPAGE 0 -#define BTREE_MAGIC 0x053162 +#define BTREE_MAGIC 0x053162 #ifdef BTREE_VERSION_1 #define BTREE_VERSION 1 @@ -48,546 +48,574 @@ static void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access); #define BTREE_VERSION 0 #endif -typedef struct BTMetaPageData { - uint32 btm_magic; - uint32 btm_version; - BlockNumber btm_root; +typedef struct BTMetaPageData +{ + uint32 btm_magic; + uint32 btm_version; + BlockNumber btm_root; #ifdef BTREE_VERSION_1 - int32 btm_level; + int32 btm_level; #endif -} BTMetaPageData; +} BTMetaPageData; -#define BTPageGetMeta(p) \ - ((BTMetaPageData *) &((PageHeader) p)->pd_linp[0]) +#define BTPageGetMeta(p) \ + ((BTMetaPageData *) &((PageHeader) p)->pd_linp[0]) -extern bool BuildingBtree; +extern bool BuildingBtree; /* - * We use high-concurrency locking on btrees. There are two cases in - * which we don't do locking. One is when we're building the btree. - * Since the creating transaction has not committed, no one can see - * the index, and there's no reason to share locks. The second case - * is when we're just starting up the database system. We use some - * special-purpose initialization code in the relation cache manager - * (see utils/cache/relcache.c) to allow us to do indexed scans on - * the system catalogs before we'd normally be able to. This happens - * before the lock table is fully initialized, so we can't use it. - * Strictly speaking, this violates 2pl, but we don't do 2pl on the - * system catalogs anyway, so I declare this to be okay. + * We use high-concurrency locking on btrees. There are two cases in + * which we don't do locking. One is when we're building the btree. + * Since the creating transaction has not committed, no one can see + * the index, and there's no reason to share locks. The second case + * is when we're just starting up the database system. We use some + * special-purpose initialization code in the relation cache manager + * (see utils/cache/relcache.c) to allow us to do indexed scans on + * the system catalogs before we'd normally be able to. This happens + * before the lock table is fully initialized, so we can't use it. + * Strictly speaking, this violates 2pl, but we don't do 2pl on the + * system catalogs anyway, so I declare this to be okay. */ -#define USELOCKING (!BuildingBtree && !IsInitProcessingMode()) +#define USELOCKING (!BuildingBtree && !IsInitProcessingMode()) /* - * _bt_metapinit() -- Initialize the metadata page of a btree. + * _bt_metapinit() -- Initialize the metadata page of a btree. */ void _bt_metapinit(Relation rel) { - Buffer buf; - Page pg; - int nblocks; - BTMetaPageData metad; - BTPageOpaque op; - - /* can't be sharing this with anyone, now... */ - if (USELOCKING) - RelationSetLockForWrite(rel); - - if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) { - elog(WARN, "Cannot initialize non-empty btree %s", - RelationGetRelationName(rel)); - } - - buf = ReadBuffer(rel, P_NEW); - pg = BufferGetPage(buf); - _bt_pageinit(pg, BufferGetPageSize(buf)); - - metad.btm_magic = BTREE_MAGIC; - metad.btm_version = BTREE_VERSION; - metad.btm_root = P_NONE; + Buffer buf; + Page pg; + int nblocks; + BTMetaPageData metad; + BTPageOpaque op; + + /* can't be sharing this with anyone, now... */ + if (USELOCKING) + RelationSetLockForWrite(rel); + + if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) + { + elog(WARN, "Cannot initialize non-empty btree %s", + RelationGetRelationName(rel)); + } + + buf = ReadBuffer(rel, P_NEW); + pg = BufferGetPage(buf); + _bt_pageinit(pg, BufferGetPageSize(buf)); + + metad.btm_magic = BTREE_MAGIC; + metad.btm_version = BTREE_VERSION; + metad.btm_root = P_NONE; #ifdef BTREE_VERSION_1 - metad.btm_level = 0; + metad.btm_level = 0; #endif - memmove((char *) BTPageGetMeta(pg), (char *) &metad, sizeof(metad)); - - op = (BTPageOpaque) PageGetSpecialPointer(pg); - op->btpo_flags = BTP_META; - - WriteBuffer(buf); - - /* all done */ - if (USELOCKING) - RelationUnsetLockForWrite(rel); + memmove((char *) BTPageGetMeta(pg), (char *) &metad, sizeof(metad)); + + op = (BTPageOpaque) PageGetSpecialPointer(pg); + op->btpo_flags = BTP_META; + + WriteBuffer(buf); + + /* all done */ + if (USELOCKING) + RelationUnsetLockForWrite(rel); } #ifdef NOT_USED /* - * _bt_checkmeta() -- Verify that the metadata stored in a btree are - * reasonable. + * _bt_checkmeta() -- Verify that the metadata stored in a btree are + * reasonable. */ void _bt_checkmeta(Relation rel) { - Buffer metabuf; - Page metap; - BTMetaPageData *metad; - BTPageOpaque op; - int nblocks; - - /* if the relation is empty, this is init time; don't complain */ - if ((nblocks = RelationGetNumberOfBlocks(rel)) == 0) - return; - - metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); - metap = BufferGetPage(metabuf); - op = (BTPageOpaque) PageGetSpecialPointer(metap); - if (!(op->btpo_flags & BTP_META)) { - elog(WARN, "Invalid metapage for index %s", - RelationGetRelationName(rel)); - } - metad = BTPageGetMeta(metap); - - if (metad->btm_magic != BTREE_MAGIC) { - elog(WARN, "Index %s is not a btree", - RelationGetRelationName(rel)); - } - - if (metad->btm_version != BTREE_VERSION) { - elog(WARN, "Version mismatch on %s: version %d file, version %d code", - RelationGetRelationName(rel), - metad->btm_version, BTREE_VERSION); - } - - _bt_relbuf(rel, metabuf, BT_READ); + Buffer metabuf; + Page metap; + BTMetaPageData *metad; + BTPageOpaque op; + int nblocks; + + /* if the relation is empty, this is init time; don't complain */ + if ((nblocks = RelationGetNumberOfBlocks(rel)) == 0) + return; + + metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + metap = BufferGetPage(metabuf); + op = (BTPageOpaque) PageGetSpecialPointer(metap); + if (!(op->btpo_flags & BTP_META)) + { + elog(WARN, "Invalid metapage for index %s", + RelationGetRelationName(rel)); + } + metad = BTPageGetMeta(metap); + + if (metad->btm_magic != BTREE_MAGIC) + { + elog(WARN, "Index %s is not a btree", + RelationGetRelationName(rel)); + } + + if (metad->btm_version != BTREE_VERSION) + { + elog(WARN, "Version mismatch on %s: version %d file, version %d code", + RelationGetRelationName(rel), + metad->btm_version, BTREE_VERSION); + } + + _bt_relbuf(rel, metabuf, BT_READ); } + #endif /* - * _bt_getroot() -- Get the root page of the btree. + * _bt_getroot() -- Get the root page of the btree. * - * Since the root page can move around the btree file, we have to read - * its location from the metadata page, and then read the root page - * itself. If no root page exists yet, we have to create one. The - * standard class of race conditions exists here; I think I covered - * them all in the Hopi Indian rain dance of lock requests below. + * Since the root page can move around the btree file, we have to read + * its location from the metadata page, and then read the root page + * itself. If no root page exists yet, we have to create one. The + * standard class of race conditions exists here; I think I covered + * them all in the Hopi Indian rain dance of lock requests below. * - * We pass in the access type (BT_READ or BT_WRITE), and return the - * root page's buffer with the appropriate lock type set. Reference - * count on the root page gets bumped by ReadBuffer. The metadata - * page is unlocked and unreferenced by this process when this routine - * returns. + * We pass in the access type (BT_READ or BT_WRITE), and return the + * root page's buffer with the appropriate lock type set. Reference + * count on the root page gets bumped by ReadBuffer. The metadata + * page is unlocked and unreferenced by this process when this routine + * returns. */ Buffer _bt_getroot(Relation rel, int access) { - Buffer metabuf; - Page metapg; - BTPageOpaque metaopaque; - Buffer rootbuf; - Page rootpg; - BTPageOpaque rootopaque; - BlockNumber rootblkno; - BTMetaPageData *metad; - - metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); - metapg = BufferGetPage(metabuf); - metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); - Assert(metaopaque->btpo_flags & BTP_META); - metad = BTPageGetMeta(metapg); - - if (metad->btm_magic != BTREE_MAGIC) { - elog(WARN, "Index %s is not a btree", - RelationGetRelationName(rel)); - } - - if (metad->btm_version != BTREE_VERSION) { - elog(WARN, "Version mismatch on %s: version %d file, version %d code", - RelationGetRelationName(rel), - metad->btm_version, BTREE_VERSION); - } - - /* if no root page initialized yet, do it */ - if (metad->btm_root == P_NONE) { - - /* turn our read lock in for a write lock */ - _bt_relbuf(rel, metabuf, BT_READ); - metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + Buffer metabuf; + Page metapg; + BTPageOpaque metaopaque; + Buffer rootbuf; + Page rootpg; + BTPageOpaque rootopaque; + BlockNumber rootblkno; + BTMetaPageData *metad; + + metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); metapg = BufferGetPage(metabuf); metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); Assert(metaopaque->btpo_flags & BTP_META); metad = BTPageGetMeta(metapg); - - /* - * Race condition: if someone else initialized the metadata between - * the time we released the read lock and acquired the write lock, - * above, we want to avoid doing it again. - */ - - if (metad->btm_root == P_NONE) { - - /* - * Get, initialize, write, and leave a lock of the appropriate - * type on the new root page. Since this is the first page in - * the tree, it's a leaf. - */ - - rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); - rootblkno = BufferGetBlockNumber(rootbuf); - rootpg = BufferGetPage(rootbuf); - metad->btm_root = rootblkno; + + if (metad->btm_magic != BTREE_MAGIC) + { + elog(WARN, "Index %s is not a btree", + RelationGetRelationName(rel)); + } + + if (metad->btm_version != BTREE_VERSION) + { + elog(WARN, "Version mismatch on %s: version %d file, version %d code", + RelationGetRelationName(rel), + metad->btm_version, BTREE_VERSION); + } + + /* if no root page initialized yet, do it */ + if (metad->btm_root == P_NONE) + { + + /* turn our read lock in for a write lock */ + _bt_relbuf(rel, metabuf, BT_READ); + metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + metapg = BufferGetPage(metabuf); + metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); + Assert(metaopaque->btpo_flags & BTP_META); + metad = BTPageGetMeta(metapg); + + /* + * Race condition: if someone else initialized the metadata + * between the time we released the read lock and acquired the + * write lock, above, we want to avoid doing it again. + */ + + if (metad->btm_root == P_NONE) + { + + /* + * Get, initialize, write, and leave a lock of the appropriate + * type on the new root page. Since this is the first page in + * the tree, it's a leaf. + */ + + rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); + rootblkno = BufferGetBlockNumber(rootbuf); + rootpg = BufferGetPage(rootbuf); + metad->btm_root = rootblkno; #ifdef BTREE_VERSION_1 - metad->btm_level = 1; + metad->btm_level = 1; #endif - _bt_pageinit(rootpg, BufferGetPageSize(rootbuf)); - rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg); - rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT); - _bt_wrtnorelbuf(rel, rootbuf); - - /* swap write lock for read lock, if appropriate */ - if (access != BT_WRITE) { - _bt_setpagelock(rel, rootblkno, BT_READ); - _bt_unsetpagelock(rel, rootblkno, BT_WRITE); - } - - /* okay, metadata is correct */ - _bt_wrtbuf(rel, metabuf); - } else { - - /* - * Metadata initialized by someone else. In order to guarantee - * no deadlocks, we have to release the metadata page and start - * all over again. - */ - - _bt_relbuf(rel, metabuf, BT_WRITE); - return (_bt_getroot(rel, access)); + _bt_pageinit(rootpg, BufferGetPageSize(rootbuf)); + rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg); + rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT); + _bt_wrtnorelbuf(rel, rootbuf); + + /* swap write lock for read lock, if appropriate */ + if (access != BT_WRITE) + { + _bt_setpagelock(rel, rootblkno, BT_READ); + _bt_unsetpagelock(rel, rootblkno, BT_WRITE); + } + + /* okay, metadata is correct */ + _bt_wrtbuf(rel, metabuf); + } + else + { + + /* + * Metadata initialized by someone else. In order to + * guarantee no deadlocks, we have to release the metadata + * page and start all over again. + */ + + _bt_relbuf(rel, metabuf, BT_WRITE); + return (_bt_getroot(rel, access)); + } } - } else { - rootbuf = _bt_getbuf(rel, metad->btm_root, access); - - /* done with the meta page */ - _bt_relbuf(rel, metabuf, BT_READ); - } - - /* - * Race condition: If the root page split between the time we looked - * at the metadata page and got the root buffer, then we got the wrong - * buffer. - */ - - rootpg = BufferGetPage(rootbuf); - rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg); - if (!(rootopaque->btpo_flags & BTP_ROOT)) { - - /* it happened, try again */ - _bt_relbuf(rel, rootbuf, access); - return (_bt_getroot(rel, access)); - } - - /* - * By here, we have a correct lock on the root block, its reference - * count is correct, and we have no lock set on the metadata page. - * Return the root block. - */ - - return (rootbuf); + else + { + rootbuf = _bt_getbuf(rel, metad->btm_root, access); + + /* done with the meta page */ + _bt_relbuf(rel, metabuf, BT_READ); + } + + /* + * Race condition: If the root page split between the time we looked + * at the metadata page and got the root buffer, then we got the wrong + * buffer. + */ + + rootpg = BufferGetPage(rootbuf); + rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpg); + if (!(rootopaque->btpo_flags & BTP_ROOT)) + { + + /* it happened, try again */ + _bt_relbuf(rel, rootbuf, access); + return (_bt_getroot(rel, access)); + } + + /* + * By here, we have a correct lock on the root block, its reference + * count is correct, and we have no lock set on the metadata page. + * Return the root block. + */ + + return (rootbuf); } /* - * _bt_getbuf() -- Get a buffer by block number for read or write. + * _bt_getbuf() -- Get a buffer by block number for read or write. * - * When this routine returns, the appropriate lock is set on the - * requested buffer its reference count is correct. + * When this routine returns, the appropriate lock is set on the + * requested buffer its reference count is correct. */ Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access) { - Buffer buf; - Page page; - - /* - * If we want a new block, we can't set a lock of the appropriate type - * until we've instantiated the buffer. - */ - - if (blkno != P_NEW) { - if (access == BT_WRITE) - _bt_setpagelock(rel, blkno, BT_WRITE); - else - _bt_setpagelock(rel, blkno, BT_READ); - - buf = ReadBuffer(rel, blkno); - } else { - buf = ReadBuffer(rel, blkno); - blkno = BufferGetBlockNumber(buf); - page = BufferGetPage(buf); - _bt_pageinit(page, BufferGetPageSize(buf)); - - if (access == BT_WRITE) - _bt_setpagelock(rel, blkno, BT_WRITE); + Buffer buf; + Page page; + + /* + * If we want a new block, we can't set a lock of the appropriate type + * until we've instantiated the buffer. + */ + + if (blkno != P_NEW) + { + if (access == BT_WRITE) + _bt_setpagelock(rel, blkno, BT_WRITE); + else + _bt_setpagelock(rel, blkno, BT_READ); + + buf = ReadBuffer(rel, blkno); + } else - _bt_setpagelock(rel, blkno, BT_READ); - } - - /* ref count and lock type are correct */ - return (buf); + { + buf = ReadBuffer(rel, blkno); + blkno = BufferGetBlockNumber(buf); + page = BufferGetPage(buf); + _bt_pageinit(page, BufferGetPageSize(buf)); + + if (access == BT_WRITE) + _bt_setpagelock(rel, blkno, BT_WRITE); + else + _bt_setpagelock(rel, blkno, BT_READ); + } + + /* ref count and lock type are correct */ + return (buf); } /* - * _bt_relbuf() -- release a locked buffer. + * _bt_relbuf() -- release a locked buffer. */ void _bt_relbuf(Relation rel, Buffer buf, int access) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - - /* access had better be one of read or write */ - if (access == BT_WRITE) - _bt_unsetpagelock(rel, blkno, BT_WRITE); - else - _bt_unsetpagelock(rel, blkno, BT_READ); - - ReleaseBuffer(buf); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + + /* access had better be one of read or write */ + if (access == BT_WRITE) + _bt_unsetpagelock(rel, blkno, BT_WRITE); + else + _bt_unsetpagelock(rel, blkno, BT_READ); + + ReleaseBuffer(buf); } /* - * _bt_wrtbuf() -- write a btree page to disk. + * _bt_wrtbuf() -- write a btree page to disk. * - * This routine releases the lock held on the buffer and our reference - * to it. It is an error to call _bt_wrtbuf() without a write lock - * or a reference to the buffer. + * This routine releases the lock held on the buffer and our reference + * to it. It is an error to call _bt_wrtbuf() without a write lock + * or a reference to the buffer. */ void _bt_wrtbuf(Relation rel, Buffer buf) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - WriteBuffer(buf); - _bt_unsetpagelock(rel, blkno, BT_WRITE); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + WriteBuffer(buf); + _bt_unsetpagelock(rel, blkno, BT_WRITE); } /* - * _bt_wrtnorelbuf() -- write a btree page to disk, but do not release - * our reference or lock. + * _bt_wrtnorelbuf() -- write a btree page to disk, but do not release + * our reference or lock. * - * It is an error to call _bt_wrtnorelbuf() without a write lock - * or a reference to the buffer. + * It is an error to call _bt_wrtnorelbuf() without a write lock + * or a reference to the buffer. */ void _bt_wrtnorelbuf(Relation rel, Buffer buf) { - BlockNumber blkno; - - blkno = BufferGetBlockNumber(buf); - WriteNoReleaseBuffer(buf); + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + WriteNoReleaseBuffer(buf); } /* - * _bt_pageinit() -- Initialize a new page. + * _bt_pageinit() -- Initialize a new page. */ void _bt_pageinit(Page page, Size size) { - /* - * Cargo-cult programming -- don't really need this to be zero, but - * creating new pages is an infrequent occurrence and it makes me feel - * good when I know they're empty. - */ - - memset(page, 0, size); - - PageInit(page, size, sizeof(BTPageOpaqueData)); + + /* + * Cargo-cult programming -- don't really need this to be zero, but + * creating new pages is an infrequent occurrence and it makes me feel + * good when I know they're empty. + */ + + memset(page, 0, size); + + PageInit(page, size, sizeof(BTPageOpaqueData)); } /* - * _bt_metaproot() -- Change the root page of the btree. + * _bt_metaproot() -- Change the root page of the btree. * - * Lehman and Yao require that the root page move around in order to - * guarantee deadlock-free short-term, fine-granularity locking. When - * we split the root page, we record the new parent in the metadata page - * for the relation. This routine does the work. + * Lehman and Yao require that the root page move around in order to + * guarantee deadlock-free short-term, fine-granularity locking. When + * we split the root page, we record the new parent in the metadata page + * for the relation. This routine does the work. * - * No direct preconditions, but if you don't have the a write lock on - * at least the old root page when you call this, you're making a big - * mistake. On exit, metapage data is correct and we no longer have - * a reference to or lock on the metapage. + * No direct preconditions, but if you don't have the a write lock on + * at least the old root page when you call this, you're making a big + * mistake. On exit, metapage data is correct and we no longer have + * a reference to or lock on the metapage. */ void _bt_metaproot(Relation rel, BlockNumber rootbknum, int level) { - Buffer metabuf; - Page metap; - BTPageOpaque metaopaque; - BTMetaPageData *metad; - - metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); - metap = BufferGetPage(metabuf); - metaopaque = (BTPageOpaque) PageGetSpecialPointer(metap); - Assert(metaopaque->btpo_flags & BTP_META); - metad = BTPageGetMeta(metap); - metad->btm_root = rootbknum; + Buffer metabuf; + Page metap; + BTPageOpaque metaopaque; + BTMetaPageData *metad; + + metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + metap = BufferGetPage(metabuf); + metaopaque = (BTPageOpaque) PageGetSpecialPointer(metap); + Assert(metaopaque->btpo_flags & BTP_META); + metad = BTPageGetMeta(metap); + metad->btm_root = rootbknum; #ifdef BTREE_VERSION_1 - if ( level == 0 ) /* called from _do_insert */ - metad->btm_level += 1; - else - metad->btm_level = level; /* called from btsort */ + if (level == 0) /* called from _do_insert */ + metad->btm_level += 1; + else + metad->btm_level = level; /* called from btsort */ #endif - _bt_wrtbuf(rel, metabuf); + _bt_wrtbuf(rel, metabuf); } /* - * _bt_getstackbuf() -- Walk back up the tree one step, and find the item - * we last looked at in the parent. + * _bt_getstackbuf() -- Walk back up the tree one step, and find the item + * we last looked at in the parent. * - * This is possible because we save a bit image of the last item - * we looked at in the parent, and the update algorithm guarantees - * that if items above us in the tree move, they only move right. + * This is possible because we save a bit image of the last item + * we looked at in the parent, and the update algorithm guarantees + * that if items above us in the tree move, they only move right. * - * Also, re-set bts_blkno & bts_offset if changed and - * bts_btitem (it may be changed - see _bt_insertonpg). + * Also, re-set bts_blkno & bts_offset if changed and + * bts_btitem (it may be changed - see _bt_insertonpg). */ Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access) { - Buffer buf; - BlockNumber blkno; - OffsetNumber start, offnum, maxoff; - OffsetNumber i; - Page page; - ItemId itemid; - BTItem item; - BTPageOpaque opaque; - BTItem item_save; - int item_nbytes; - - blkno = stack->bts_blkno; - buf = _bt_getbuf(rel, blkno, access); - page = BufferGetPage(buf); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - maxoff = PageGetMaxOffsetNumber(page); - - if (maxoff >= stack->bts_offset) { - itemid = PageGetItemId(page, stack->bts_offset); - item = (BTItem) PageGetItem(page, itemid); - - /* if the item is where we left it, we're done */ - if ( BTItemSame (item, stack->bts_btitem) ) - { - pfree(stack->bts_btitem); - item_nbytes = ItemIdGetLength(itemid); - item_save = (BTItem) palloc(item_nbytes); - memmove((char *) item_save, (char *) item, item_nbytes); - stack->bts_btitem = item_save; - return (buf); - } - - /* if the item has just moved right on this page, we're done */ - for (i = OffsetNumberNext(stack->bts_offset); - i <= maxoff; - i = OffsetNumberNext(i)) { - itemid = PageGetItemId(page, i); - item = (BTItem) PageGetItem(page, itemid); - - /* if the item is where we left it, we're done */ - if ( BTItemSame (item, stack->bts_btitem) ) - { - stack->bts_offset = i; - pfree(stack->bts_btitem); - item_nbytes = ItemIdGetLength(itemid); - item_save = (BTItem) palloc(item_nbytes); - memmove((char *) item_save, (char *) item, item_nbytes); - stack->bts_btitem = item_save; - return (buf); - } - } - } - - /* by here, the item we're looking for moved right at least one page */ - for (;;) { - blkno = opaque->btpo_next; - if (P_RIGHTMOST(opaque)) - elog(FATAL, "my bits moved right off the end of the world!"); - - _bt_relbuf(rel, buf, access); + Buffer buf; + BlockNumber blkno; + OffsetNumber start, + offnum, + maxoff; + OffsetNumber i; + Page page; + ItemId itemid; + BTItem item; + BTPageOpaque opaque; + BTItem item_save; + int item_nbytes; + + blkno = stack->bts_blkno; buf = _bt_getbuf(rel, blkno, access); page = BufferGetPage(buf); - maxoff = PageGetMaxOffsetNumber(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page); - - /* if we have a right sibling, step over the high key */ - start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - - /* see if it's on this page */ - for (offnum = start; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) { - itemid = PageGetItemId(page, offnum); - item = (BTItem) PageGetItem(page, itemid); - if ( BTItemSame (item, stack->bts_btitem) ) - { - stack->bts_offset = offnum; - stack->bts_blkno = blkno; - pfree(stack->bts_btitem); - item_nbytes = ItemIdGetLength(itemid); - item_save = (BTItem) palloc(item_nbytes); - memmove((char *) item_save, (char *) item, item_nbytes); - stack->bts_btitem = item_save; - return (buf); - } + maxoff = PageGetMaxOffsetNumber(page); + + if (maxoff >= stack->bts_offset) + { + itemid = PageGetItemId(page, stack->bts_offset); + item = (BTItem) PageGetItem(page, itemid); + + /* if the item is where we left it, we're done */ + if (BTItemSame(item, stack->bts_btitem)) + { + pfree(stack->bts_btitem); + item_nbytes = ItemIdGetLength(itemid); + item_save = (BTItem) palloc(item_nbytes); + memmove((char *) item_save, (char *) item, item_nbytes); + stack->bts_btitem = item_save; + return (buf); + } + + /* if the item has just moved right on this page, we're done */ + for (i = OffsetNumberNext(stack->bts_offset); + i <= maxoff; + i = OffsetNumberNext(i)) + { + itemid = PageGetItemId(page, i); + item = (BTItem) PageGetItem(page, itemid); + + /* if the item is where we left it, we're done */ + if (BTItemSame(item, stack->bts_btitem)) + { + stack->bts_offset = i; + pfree(stack->bts_btitem); + item_nbytes = ItemIdGetLength(itemid); + item_save = (BTItem) palloc(item_nbytes); + memmove((char *) item_save, (char *) item, item_nbytes); + stack->bts_btitem = item_save; + return (buf); + } + } + } + + /* by here, the item we're looking for moved right at least one page */ + for (;;) + { + blkno = opaque->btpo_next; + if (P_RIGHTMOST(opaque)) + elog(FATAL, "my bits moved right off the end of the world!"); + + _bt_relbuf(rel, buf, access); + buf = _bt_getbuf(rel, blkno, access); + page = BufferGetPage(buf); + maxoff = PageGetMaxOffsetNumber(page); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* if we have a right sibling, step over the high key */ + start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + + /* see if it's on this page */ + for (offnum = start; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + item = (BTItem) PageGetItem(page, itemid); + if (BTItemSame(item, stack->bts_btitem)) + { + stack->bts_offset = offnum; + stack->bts_blkno = blkno; + pfree(stack->bts_btitem); + item_nbytes = ItemIdGetLength(itemid); + item_save = (BTItem) palloc(item_nbytes); + memmove((char *) item_save, (char *) item, item_nbytes); + stack->bts_btitem = item_save; + return (buf); + } + } } - } } static void _bt_setpagelock(Relation rel, BlockNumber blkno, int access) { - ItemPointerData iptr; - - if (USELOCKING) { - ItemPointerSet(&iptr, blkno, P_HIKEY); - - if (access == BT_WRITE) - RelationSetSingleWLockPage(rel, &iptr); - else - RelationSetSingleRLockPage(rel, &iptr); - } + ItemPointerData iptr; + + if (USELOCKING) + { + ItemPointerSet(&iptr, blkno, P_HIKEY); + + if (access == BT_WRITE) + RelationSetSingleWLockPage(rel, &iptr); + else + RelationSetSingleRLockPage(rel, &iptr); + } } static void _bt_unsetpagelock(Relation rel, BlockNumber blkno, int access) { - ItemPointerData iptr; - - if (USELOCKING) { - ItemPointerSet(&iptr, blkno, P_HIKEY); - - if (access == BT_WRITE) - RelationUnsetSingleWLockPage(rel, &iptr); - else - RelationUnsetSingleRLockPage(rel, &iptr); - } + ItemPointerData iptr; + + if (USELOCKING) + { + ItemPointerSet(&iptr, blkno, P_HIKEY); + + if (access == BT_WRITE) + RelationUnsetSingleWLockPage(rel, &iptr); + else + RelationUnsetSingleRLockPage(rel, &iptr); + } } void _bt_pagedel(Relation rel, ItemPointer tid) { - Buffer buf; - Page page; - BlockNumber blkno; - OffsetNumber offno; - - blkno = ItemPointerGetBlockNumber(tid); - offno = ItemPointerGetOffsetNumber(tid); - - buf = _bt_getbuf(rel, blkno, BT_WRITE); - page = BufferGetPage(buf); - - PageIndexTupleDelete(page, offno); - - /* write the buffer and release the lock */ - _bt_wrtbuf(rel, buf); + Buffer buf; + Page page; + BlockNumber blkno; + OffsetNumber offno; + + blkno = ItemPointerGetBlockNumber(tid); + offno = ItemPointerGetOffsetNumber(tid); + + buf = _bt_getbuf(rel, blkno, BT_WRITE); + page = BufferGetPage(buf); + + PageIndexTupleDelete(page, offno); + + /* write the buffer and release the lock */ + _bt_wrtbuf(rel, buf); } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index b672901f8d..dccbd77b35 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -1,17 +1,17 @@ /*------------------------------------------------------------------------- * * btree.c-- - * Implementation of Lehman and Yao's btree management algorithm for - * Postgres. + * Implementation of Lehman and Yao's btree management algorithm for + * Postgres. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.19 1997/05/05 03:41:17 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.20 1997/09/07 04:38:54 momjian Exp $ * * NOTES - * This file contains only the public interface routines. + * This file contains only the public interface routines. * *------------------------------------------------------------------------- */ @@ -28,546 +28,579 @@ #include <miscadmin.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif #ifdef BTREE_BUILD_STATS #include <tcop/tcopprot.h> -extern int ShowExecutorStats; +extern int ShowExecutorStats; + #endif -bool BuildingBtree = false; /* see comment in btbuild() */ -bool FastBuild = true; /* use sort/build instead of insertion build */ +bool BuildingBtree = false; /* see comment in btbuild() */ +bool FastBuild = true; /* use sort/build instead of + * insertion build */ /* - * btbuild() -- build a new btree index. + * btbuild() -- build a new btree index. * - * We use a global variable to record the fact that we're creating - * a new index. This is used to avoid high-concurrency locking, - * since the index won't be visible until this transaction commits - * and since building is guaranteed to be single-threaded. + * We use a global variable to record the fact that we're creating + * a new index. This is used to avoid high-concurrency locking, + * since the index won't be visible until this transaction commits + * and since building is guaranteed to be single-threaded. */ void btbuild(Relation heap, - Relation index, - int natts, - AttrNumber *attnum, - IndexStrategy istrat, - uint16 pcount, - Datum *params, - FuncIndexInfo *finfo, - PredInfo *predInfo) + Relation index, + int natts, + AttrNumber * attnum, + IndexStrategy istrat, + uint16 pcount, + Datum * params, + FuncIndexInfo * finfo, + PredInfo * predInfo) { - HeapScanDesc hscan; - Buffer buffer; - HeapTuple htup; - IndexTuple itup; - TupleDesc htupdesc, itupdesc; - Datum *attdata; - bool *nulls; - InsertIndexResult res = 0; - int nhtups, nitups; - int i; - BTItem btitem; + HeapScanDesc hscan; + Buffer buffer; + HeapTuple htup; + IndexTuple itup; + TupleDesc htupdesc, + itupdesc; + Datum *attdata; + bool *nulls; + InsertIndexResult res = 0; + int nhtups, + nitups; + int i; + BTItem btitem; + #ifndef OMIT_PARTIAL_INDEX - ExprContext *econtext = (ExprContext *) NULL; - TupleTable tupleTable = (TupleTable) NULL; - TupleTableSlot *slot = (TupleTableSlot *) NULL; -#endif - Oid hrelid, irelid; - Node *pred, *oldPred; - void *spool = (void *) NULL; - bool isunique; - bool usefast; - - /* note that this is a new btree */ - BuildingBtree = true; - - pred = predInfo->pred; - oldPred = predInfo->oldPred; - - /* - * bootstrap processing does something strange, so don't use - * sort/build for initial catalog indices. at some point i need - * to look harder at this. (there is some kind of incremental - * processing going on there.) -- pma 08/29/95 - */ - usefast = (FastBuild && IsNormalProcessingMode()); + ExprContext *econtext = (ExprContext *) NULL; + TupleTable tupleTable = (TupleTable) NULL; + TupleTableSlot *slot = (TupleTableSlot *) NULL; -#ifdef BTREE_BUILD_STATS - if ( ShowExecutorStats ) - ResetUsage (); #endif + Oid hrelid, + irelid; + Node *pred, + *oldPred; + void *spool = (void *) NULL; + bool isunique; + bool usefast; - /* see if index is unique */ - isunique = IndexIsUniqueNoCache(RelationGetRelationId(index)); - - /* initialize the btree index metadata page (if this is a new index) */ - if (oldPred == NULL) - _bt_metapinit(index); - - /* get tuple descriptors for heap and index relations */ - htupdesc = RelationGetTupleDescriptor(heap); - itupdesc = RelationGetTupleDescriptor(index); - - /* get space for data items that'll appear in the index tuple */ - attdata = (Datum *) palloc(natts * sizeof(Datum)); - nulls = (bool *) palloc(natts * sizeof(bool)); - - /* - * If this is a predicate (partial) index, we will need to evaluate the - * predicate using ExecQual, which requires the current tuple to be in a - * slot of a TupleTable. In addition, ExecQual must have an ExprContext - * referring to that slot. Here, we initialize dummy TupleTable and - * ExprContext objects for this purpose. --Nels, Feb '92 - */ -#ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) { - tupleTable = ExecCreateTupleTable(1); - slot = ExecAllocTableSlot(tupleTable); - econtext = makeNode(ExprContext); - FillDummyExprContext(econtext, slot, htupdesc, InvalidBuffer); + /* note that this is a new btree */ + BuildingBtree = true; + + pred = predInfo->pred; + oldPred = predInfo->oldPred; /* - * we never want to use sort/build if we are extending an - * existing partial index -- it works by inserting the - * newly-qualifying tuples into the existing index. - * (sort/build would overwrite the existing index with one - * consisting of the newly-qualifying tuples.) + * bootstrap processing does something strange, so don't use + * sort/build for initial catalog indices. at some point i need to + * look harder at this. (there is some kind of incremental processing + * going on there.) -- pma 08/29/95 */ - usefast = false; - } -#endif /* OMIT_PARTIAL_INDEX */ - - /* start a heap scan */ - hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); - htup = heap_getnext(hscan, 0, &buffer); - - /* build the index */ - nhtups = nitups = 0; - - if (usefast) { - spool = _bt_spoolinit(index, 7, isunique); - res = (InsertIndexResult) NULL; - } - - for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) { - - nhtups++; - + usefast = (FastBuild && IsNormalProcessingMode()); + +#ifdef BTREE_BUILD_STATS + if (ShowExecutorStats) + ResetUsage(); +#endif + + /* see if index is unique */ + isunique = IndexIsUniqueNoCache(RelationGetRelationId(index)); + + /* initialize the btree index metadata page (if this is a new index) */ + if (oldPred == NULL) + _bt_metapinit(index); + + /* get tuple descriptors for heap and index relations */ + htupdesc = RelationGetTupleDescriptor(heap); + itupdesc = RelationGetTupleDescriptor(index); + + /* get space for data items that'll appear in the index tuple */ + attdata = (Datum *) palloc(natts * sizeof(Datum)); + nulls = (bool *) palloc(natts * sizeof(bool)); + /* - * If oldPred != NULL, this is an EXTEND INDEX command, so skip - * this tuple if it was already in the existing partial index + * If this is a predicate (partial) index, we will need to evaluate + * the predicate using ExecQual, which requires the current tuple to + * be in a slot of a TupleTable. In addition, ExecQual must have an + * ExprContext referring to that slot. Here, we initialize dummy + * TupleTable and ExprContext objects for this purpose. --Nels, Feb + * '92 */ - if (oldPred != NULL) { +#ifndef OMIT_PARTIAL_INDEX + if (pred != NULL || oldPred != NULL) + { + tupleTable = ExecCreateTupleTable(1); + slot = ExecAllocTableSlot(tupleTable); + econtext = makeNode(ExprContext); + FillDummyExprContext(econtext, slot, htupdesc, InvalidBuffer); + + /* + * we never want to use sort/build if we are extending an existing + * partial index -- it works by inserting the newly-qualifying + * tuples into the existing index. (sort/build would overwrite the + * existing index with one consisting of the newly-qualifying + * tuples.) + */ + usefast = false; + } +#endif /* OMIT_PARTIAL_INDEX */ + + /* start a heap scan */ + hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); + htup = heap_getnext(hscan, 0, &buffer); + + /* build the index */ + nhtups = nitups = 0; + + if (usefast) + { + spool = _bt_spoolinit(index, 7, isunique); + res = (InsertIndexResult) NULL; + } + + for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) + { + + nhtups++; + + /* + * If oldPred != NULL, this is an EXTEND INDEX command, so skip + * this tuple if it was already in the existing partial index + */ + if (oldPred != NULL) + { #ifndef OMIT_PARTIAL_INDEX - /*SetSlotContents(slot, htup);*/ - slot->val = htup; - if (ExecQual((List*)oldPred, econtext) == true) { + /* SetSlotContents(slot, htup); */ + slot->val = htup; + if (ExecQual((List *) oldPred, econtext) == true) + { + nitups++; + continue; + } +#endif /* OMIT_PARTIAL_INDEX */ + } + + /* + * Skip this tuple if it doesn't satisfy the partial-index + * predicate + */ + if (pred != NULL) + { +#ifndef OMIT_PARTIAL_INDEX + /* SetSlotContents(slot, htup); */ + slot->val = htup; + if (ExecQual((List *) pred, econtext) == false) + continue; +#endif /* OMIT_PARTIAL_INDEX */ + } + nitups++; - continue; - } -#endif /* OMIT_PARTIAL_INDEX */ + + /* + * For the current heap tuple, extract all the attributes we use + * in this index, and note which are null. + */ + + for (i = 1; i <= natts; i++) + { + int attoff; + bool attnull; + + /* + * Offsets are from the start of the tuple, and are + * zero-based; indices are one-based. The next call returns i + * - 1. That's data hiding for you. + */ + + attoff = AttrNumberGetAttrOffset(i); + attdata[attoff] = GetIndexValue(htup, + htupdesc, + attoff, + attnum, + finfo, + &attnull, + buffer); + nulls[attoff] = (attnull ? 'n' : ' '); + } + + /* form an index tuple and point it at the heap tuple */ + itup = index_formtuple(itupdesc, attdata, nulls); + + /* + * If the single index key is null, we don't insert it into the + * index. Btrees support scans on <, <=, =, >=, and >. Relational + * algebra says that A op B (where op is one of the operators + * above) returns null if either A or B is null. This means that + * no qualification used in an index scan could ever return true + * on a null attribute. It also means that indices can't be used + * by ISNULL or NOTNULL scans, but that's an artifact of the + * strategy map architecture chosen in 1986, not of the way nulls + * are handled here. + */ + + /* + * New comments: NULLs handling. While we can't do NULL + * comparison, we can follow simple rule for ordering items on + * btree pages - NULLs greater NOT_NULLs and NULL = NULL is TRUE. + * Sure, it's just rule for placing/finding items and no more - + * keytest'll return FALSE for a = 5 for items having 'a' isNULL. + * Look at _bt_skeycmp, _bt_compare and _bt_itemcmp for how it + * works. - vadim 03/23/97 + * + * if (itup->t_info & INDEX_NULL_MASK) { pfree(itup); continue; } + */ + + itup->t_tid = htup->t_ctid; + btitem = _bt_formitem(itup); + + /* + * if we are doing bottom-up btree build, we insert the index into + * a spool page for subsequent processing. otherwise, we insert + * into the btree. + */ + if (usefast) + { + _bt_spool(index, btitem, spool); + } + else + { + res = _bt_doinsert(index, btitem, isunique, heap); + } + + pfree(btitem); + pfree(itup); + if (res) + { + pfree(res); + } } - - /* Skip this tuple if it doesn't satisfy the partial-index predicate */ - if (pred != NULL) { + + /* okay, all heap tuples are indexed */ + heap_endscan(hscan); + + if (pred != NULL || oldPred != NULL) + { #ifndef OMIT_PARTIAL_INDEX - /* SetSlotContents(slot, htup); */ - slot->val = htup; - if (ExecQual((List*)pred, econtext) == false) - continue; -#endif /* OMIT_PARTIAL_INDEX */ + ExecDestroyTupleTable(tupleTable, true); + pfree(econtext); +#endif /* OMIT_PARTIAL_INDEX */ } - - nitups++; - + /* - * For the current heap tuple, extract all the attributes - * we use in this index, and note which are null. + * if we are doing bottom-up btree build, we now have a bunch of + * sorted runs in the spool pages. finish the build by (1) merging + * the runs, (2) inserting the sorted tuples into btree pages and (3) + * building the upper levels. */ - - for (i = 1; i <= natts; i++) { - int attoff; - bool attnull; - - /* - * Offsets are from the start of the tuple, and are - * zero-based; indices are one-based. The next call - * returns i - 1. That's data hiding for you. - */ - - attoff = AttrNumberGetAttrOffset(i); - attdata[attoff] = GetIndexValue(htup, - htupdesc, - attoff, - attnum, - finfo, - &attnull, - buffer); - nulls[attoff] = (attnull ? 'n' : ' '); + if (usefast) + { + _bt_spool(index, (BTItem) NULL, spool); /* flush the spool */ + _bt_leafbuild(index, spool); + _bt_spooldestroy(spool); } - - /* form an index tuple and point it at the heap tuple */ - itup = index_formtuple(itupdesc, attdata, nulls); - - /* - * If the single index key is null, we don't insert it into - * the index. Btrees support scans on <, <=, =, >=, and >. - * Relational algebra says that A op B (where op is one of the - * operators above) returns null if either A or B is null. This - * means that no qualification used in an index scan could ever - * return true on a null attribute. It also means that indices - * can't be used by ISNULL or NOTNULL scans, but that's an - * artifact of the strategy map architecture chosen in 1986, not - * of the way nulls are handled here. - */ - /* - * New comments: NULLs handling. - * While we can't do NULL comparison, we can follow simple - * rule for ordering items on btree pages - NULLs greater - * NOT_NULLs and NULL = NULL is TRUE. Sure, it's just rule - * for placing/finding items and no more - keytest'll return - * FALSE for a = 5 for items having 'a' isNULL. - * Look at _bt_skeycmp, _bt_compare and _bt_itemcmp for - * how it works. - vadim 03/23/97 - - if (itup->t_info & INDEX_NULL_MASK) { - pfree(itup); - continue; + +#ifdef BTREE_BUILD_STATS + if (ShowExecutorStats) + { + fprintf(stderr, "! BtreeBuild Stats:\n"); + ShowUsage(); + ResetUsage(); } - */ - - itup->t_tid = htup->t_ctid; - btitem = _bt_formitem(itup); +#endif /* - * if we are doing bottom-up btree build, we insert the index - * into a spool page for subsequent processing. otherwise, we - * insert into the btree. + * Since we just counted the tuples in the heap, we update its stats + * in pg_class to guarantee that the planner takes advantage of the + * index we just created. Finally, only update statistics during + * normal index definitions, not for indices on system catalogs + * created during bootstrap processing. We must close the relations + * before updatings statistics to guarantee that the relcache entries + * are flushed when we increment the command counter in UpdateStats(). */ - if (usefast) { - _bt_spool(index, btitem, spool); - } else { - res = _bt_doinsert(index, btitem, isunique, heap); + if (IsNormalProcessingMode()) + { + hrelid = heap->rd_id; + irelid = index->rd_id; + heap_close(heap); + index_close(index); + UpdateStats(hrelid, nhtups, true); + UpdateStats(irelid, nitups, false); + if (oldPred != NULL) + { + if (nitups == nhtups) + pred = NULL; + UpdateIndexPredicate(irelid, oldPred, pred); + } } - pfree(btitem); - pfree(itup); - if (res) { - pfree(res); - } - } - - /* okay, all heap tuples are indexed */ - heap_endscan(hscan); - - if (pred != NULL || oldPred != NULL) { -#ifndef OMIT_PARTIAL_INDEX - ExecDestroyTupleTable(tupleTable, true); - pfree(econtext); -#endif /* OMIT_PARTIAL_INDEX */ - } - - /* - * if we are doing bottom-up btree build, we now have a bunch of - * sorted runs in the spool pages. finish the build by (1) - * merging the runs, (2) inserting the sorted tuples into btree - * pages and (3) building the upper levels. - */ - if (usefast) { - _bt_spool(index, (BTItem) NULL, spool); /* flush the spool */ - _bt_leafbuild(index, spool); - _bt_spooldestroy(spool); - } + pfree(nulls); + pfree(attdata); -#ifdef BTREE_BUILD_STATS - if ( ShowExecutorStats ) - { - fprintf(stderr, "! BtreeBuild Stats:\n"); - ShowUsage (); - ResetUsage (); - } -#endif - - /* - * Since we just counted the tuples in the heap, we update its - * stats in pg_class to guarantee that the planner takes advantage - * of the index we just created. Finally, only update statistics - * during normal index definitions, not for indices on system catalogs - * created during bootstrap processing. We must close the relations - * before updatings statistics to guarantee that the relcache entries - * are flushed when we increment the command counter in UpdateStats(). - */ - if (IsNormalProcessingMode()) - { - hrelid = heap->rd_id; - irelid = index->rd_id; - heap_close(heap); - index_close(index); - UpdateStats(hrelid, nhtups, true); - UpdateStats(irelid, nitups, false); - if (oldPred != NULL) { - if (nitups == nhtups) pred = NULL; - UpdateIndexPredicate(irelid, oldPred, pred); - } - } - - pfree(nulls); - pfree(attdata); - - /* all done */ - BuildingBtree = false; + /* all done */ + BuildingBtree = false; } /* - * btinsert() -- insert an index tuple into a btree. + * btinsert() -- insert an index tuple into a btree. * - * Descend the tree recursively, find the appropriate location for our - * new tuple, put it there, set its unique OID as appropriate, and - * return an InsertIndexResult to the caller. + * Descend the tree recursively, find the appropriate location for our + * new tuple, put it there, set its unique OID as appropriate, and + * return an InsertIndexResult to the caller. */ InsertIndexResult -btinsert(Relation rel, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation heapRel) +btinsert(Relation rel, Datum * datum, char *nulls, ItemPointer ht_ctid, Relation heapRel) { - BTItem btitem; - IndexTuple itup; - InsertIndexResult res; - - /* generate an index tuple */ - itup = index_formtuple(RelationGetTupleDescriptor(rel), datum, nulls); - itup->t_tid = *ht_ctid; - - /* - * See comments in btbuild. - - if (itup->t_info & INDEX_NULL_MASK) - return ((InsertIndexResult) NULL); - */ - - btitem = _bt_formitem(itup); - - res = _bt_doinsert(rel, btitem, - IndexIsUnique(RelationGetRelationId(rel)), heapRel); - - pfree(btitem); - pfree(itup); - - /* adjust any active scans that will be affected by this insertion */ - _bt_adjscans(rel, &(res->pointerData), BT_INSERT); - - return (res); + BTItem btitem; + IndexTuple itup; + InsertIndexResult res; + + /* generate an index tuple */ + itup = index_formtuple(RelationGetTupleDescriptor(rel), datum, nulls); + itup->t_tid = *ht_ctid; + + /* + * See comments in btbuild. + * + * if (itup->t_info & INDEX_NULL_MASK) return ((InsertIndexResult) NULL); + */ + + btitem = _bt_formitem(itup); + + res = _bt_doinsert(rel, btitem, + IndexIsUnique(RelationGetRelationId(rel)), heapRel); + + pfree(btitem); + pfree(itup); + + /* adjust any active scans that will be affected by this insertion */ + _bt_adjscans(rel, &(res->pointerData), BT_INSERT); + + return (res); } /* - * btgettuple() -- Get the next tuple in the scan. + * btgettuple() -- Get the next tuple in the scan. */ -char * +char * btgettuple(IndexScanDesc scan, ScanDirection dir) { - RetrieveIndexResult res; - - /* - * If we've already initialized this scan, we can just advance it - * in the appropriate direction. If we haven't done so yet, we - * call a routine to get the first item in the scan. - */ - - if (ItemPointerIsValid(&(scan->currentItemData))) - res = _bt_next(scan, dir); - else - res = _bt_first(scan, dir); - - return ((char *) res); + RetrieveIndexResult res; + + /* + * If we've already initialized this scan, we can just advance it in + * the appropriate direction. If we haven't done so yet, we call a + * routine to get the first item in the scan. + */ + + if (ItemPointerIsValid(&(scan->currentItemData))) + res = _bt_next(scan, dir); + else + res = _bt_first(scan, dir); + + return ((char *) res); } /* - * btbeginscan() -- start a scan on a btree index + * btbeginscan() -- start a scan on a btree index */ -char * +char * btbeginscan(Relation rel, bool fromEnd, uint16 keysz, ScanKey scankey) { - IndexScanDesc scan; - - /* get the scan */ - scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey); - - /* register scan in case we change pages it's using */ - _bt_regscan(scan); - - return ((char *) scan); + IndexScanDesc scan; + + /* get the scan */ + scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey); + + /* register scan in case we change pages it's using */ + _bt_regscan(scan); + + return ((char *) scan); } /* - * btrescan() -- rescan an index relation + * btrescan() -- rescan an index relation */ void btrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey) { - ItemPointer iptr; - BTScanOpaque so; - - so = (BTScanOpaque) scan->opaque; - - /* we hold a read lock on the current page in the scan */ - if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { - _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ); - so->btso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - /* and we hold a read lock on the last marked item in the scan */ - if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { - _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ); - so->btso_mrkbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - if ( so == NULL ) /* if called from btbeginscan */ - { - so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); - so->btso_curbuf = so->btso_mrkbuf = InvalidBuffer; - so->keyData = (ScanKey) NULL; - if ( scan->numberOfKeys > 0) - so->keyData = (ScanKey) palloc (scan->numberOfKeys * sizeof(ScanKeyData)); - scan->opaque = so; - scan->flags = 0x0; - } - - /* - * Reset the scan keys. Note that keys ordering stuff - * moved to _bt_first. - vadim 05/05/97 - */ - so->numberOfKeys = scan->numberOfKeys; - if (scan->numberOfKeys > 0) { - memmove(scan->keyData, - scankey, - scan->numberOfKeys * sizeof(ScanKeyData)); - memmove(so->keyData, - scankey, - so->numberOfKeys * sizeof(ScanKeyData)); - } + ItemPointer iptr; + BTScanOpaque so; + + so = (BTScanOpaque) scan->opaque; + + /* we hold a read lock on the current page in the scan */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) + { + _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ); + so->btso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* and we hold a read lock on the last marked item in the scan */ + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) + { + _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ); + so->btso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + if (so == NULL) /* if called from btbeginscan */ + { + so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); + so->btso_curbuf = so->btso_mrkbuf = InvalidBuffer; + so->keyData = (ScanKey) NULL; + if (scan->numberOfKeys > 0) + so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); + scan->opaque = so; + scan->flags = 0x0; + } + + /* + * Reset the scan keys. Note that keys ordering stuff moved to + * _bt_first. - vadim 05/05/97 + */ + so->numberOfKeys = scan->numberOfKeys; + if (scan->numberOfKeys > 0) + { + memmove(scan->keyData, + scankey, + scan->numberOfKeys * sizeof(ScanKeyData)); + memmove(so->keyData, + scankey, + so->numberOfKeys * sizeof(ScanKeyData)); + } } void btmovescan(IndexScanDesc scan, Datum v) { - ItemPointer iptr; - BTScanOpaque so; - - so = (BTScanOpaque) scan->opaque; - - /* release any locks we still hold */ - if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { - _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ); - so->btso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - -/* scan->keyData[0].sk_argument = v; */ - so->keyData[0].sk_argument = v; + ItemPointer iptr; + BTScanOpaque so; + + so = (BTScanOpaque) scan->opaque; + + /* release any locks we still hold */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) + { + _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ); + so->btso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + +/* scan->keyData[0].sk_argument = v; */ + so->keyData[0].sk_argument = v; } /* - * btendscan() -- close down a scan + * btendscan() -- close down a scan */ void btendscan(IndexScanDesc scan) { - ItemPointer iptr; - BTScanOpaque so; - - so = (BTScanOpaque) scan->opaque; - - /* release any locks we still hold */ - if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { - if (BufferIsValid(so->btso_curbuf)) - _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ); - so->btso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { - if (BufferIsValid(so->btso_mrkbuf)) - _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ); - so->btso_mrkbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - if ( so->keyData != (ScanKey) NULL ) - pfree (so->keyData); - pfree (so); - - _bt_dropscan(scan); + ItemPointer iptr; + BTScanOpaque so; + + so = (BTScanOpaque) scan->opaque; + + /* release any locks we still hold */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) + { + if (BufferIsValid(so->btso_curbuf)) + _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ); + so->btso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) + { + if (BufferIsValid(so->btso_mrkbuf)) + _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ); + so->btso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + if (so->keyData != (ScanKey) NULL) + pfree(so->keyData); + pfree(so); + + _bt_dropscan(scan); } /* - * btmarkpos() -- save current scan position + * btmarkpos() -- save current scan position */ void btmarkpos(IndexScanDesc scan) { - ItemPointer iptr; - BTScanOpaque so; - - so = (BTScanOpaque) scan->opaque; - - /* release lock on old marked data, if any */ - if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { - _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ); - so->btso_mrkbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - /* bump lock on currentItemData and copy to currentMarkData */ - if (ItemPointerIsValid(&(scan->currentItemData))) { - so->btso_mrkbuf = _bt_getbuf(scan->relation, - BufferGetBlockNumber(so->btso_curbuf), - BT_READ); - scan->currentMarkData = scan->currentItemData; - } + ItemPointer iptr; + BTScanOpaque so; + + so = (BTScanOpaque) scan->opaque; + + /* release lock on old marked data, if any */ + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) + { + _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ); + so->btso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* bump lock on currentItemData and copy to currentMarkData */ + if (ItemPointerIsValid(&(scan->currentItemData))) + { + so->btso_mrkbuf = _bt_getbuf(scan->relation, + BufferGetBlockNumber(so->btso_curbuf), + BT_READ); + scan->currentMarkData = scan->currentItemData; + } } /* - * btrestrpos() -- restore scan to last saved position + * btrestrpos() -- restore scan to last saved position */ void btrestrpos(IndexScanDesc scan) { - ItemPointer iptr; - BTScanOpaque so; - - so = (BTScanOpaque) scan->opaque; - - /* release lock on current data, if any */ - if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { - _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ); - so->btso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(iptr); - } - - /* bump lock on currentMarkData and copy to currentItemData */ - if (ItemPointerIsValid(&(scan->currentMarkData))) { - so->btso_curbuf = _bt_getbuf(scan->relation, - BufferGetBlockNumber(so->btso_mrkbuf), - BT_READ); - - scan->currentItemData = scan->currentMarkData; - } + ItemPointer iptr; + BTScanOpaque so; + + so = (BTScanOpaque) scan->opaque; + + /* release lock on current data, if any */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) + { + _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ); + so->btso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* bump lock on currentMarkData and copy to currentItemData */ + if (ItemPointerIsValid(&(scan->currentMarkData))) + { + so->btso_curbuf = _bt_getbuf(scan->relation, + BufferGetBlockNumber(so->btso_mrkbuf), + BT_READ); + + scan->currentItemData = scan->currentMarkData; + } } /* stubs */ void btdelete(Relation rel, ItemPointer tid) { - /* adjust any active scans that will be affected by this deletion */ - _bt_adjscans(rel, tid, BT_DELETE); - - /* delete the data from the page */ - _bt_pagedel(rel, tid); + /* adjust any active scans that will be affected by this deletion */ + _bt_adjscans(rel, tid, BT_DELETE); + + /* delete the data from the page */ + _bt_pagedel(rel, tid); } diff --git a/src/backend/access/nbtree/nbtscan.c b/src/backend/access/nbtree/nbtscan.c index 5e23fe13d7..8a2042403a 100644 --- a/src/backend/access/nbtree/nbtscan.c +++ b/src/backend/access/nbtree/nbtscan.c @@ -1,28 +1,28 @@ /*------------------------------------------------------------------------- * * btscan.c-- - * manage scans on btrees. + * manage scans on btrees. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.7 1997/02/18 17:13:45 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.8 1997/09/07 04:38:57 momjian Exp $ * * * NOTES - * Because we can be doing an index scan on a relation while we update - * it, we need to avoid missing data that moves around in the index. - * The routines and global variables in this file guarantee that all - * scans in the local address space stay correctly positioned. This - * is all we need to worry about, since write locking guarantees that - * no one else will be on the same page at the same time as we are. + * Because we can be doing an index scan on a relation while we update + * it, we need to avoid missing data that moves around in the index. + * The routines and global variables in this file guarantee that all + * scans in the local address space stay correctly positioned. This + * is all we need to worry about, since write locking guarantees that + * no one else will be on the same page at the same time as we are. * - * The scheme is to manage a list of active scans in the current backend. - * Whenever we add or remove records from an index, or whenever we - * split a leaf page, we check the list of active scans to see if any - * has been affected. A scan is affected only if it is on the same - * relation, and the same page, as the update. + * The scheme is to manage a list of active scans in the current backend. + * Whenever we add or remove records from an index, or whenever we + * split a leaf page, we check the list of active scans to see if any + * has been affected. A scan is affected only if it is on the same + * relation, and the same page, as the update. * *------------------------------------------------------------------------- */ @@ -32,83 +32,87 @@ #include <storage/bufpage.h> #include <access/nbtree.h> -typedef struct BTScanListData { - IndexScanDesc btsl_scan; - struct BTScanListData *btsl_next; -} BTScanListData; +typedef struct BTScanListData +{ + IndexScanDesc btsl_scan; + struct BTScanListData *btsl_next; +} BTScanListData; -typedef BTScanListData *BTScanList; +typedef BTScanListData *BTScanList; -static BTScanList BTScans = (BTScanList) NULL; +static BTScanList BTScans = (BTScanList) NULL; -static void _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno); -static bool _bt_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); +static void _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno); +static bool _bt_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); /* - * _bt_regscan() -- register a new scan. + * _bt_regscan() -- register a new scan. */ void _bt_regscan(IndexScanDesc scan) { - BTScanList new_el; - - new_el = (BTScanList) palloc(sizeof(BTScanListData)); - new_el->btsl_scan = scan; - new_el->btsl_next = BTScans; - BTScans = new_el; + BTScanList new_el; + + new_el = (BTScanList) palloc(sizeof(BTScanListData)); + new_el->btsl_scan = scan; + new_el->btsl_next = BTScans; + BTScans = new_el; } /* - * _bt_dropscan() -- drop a scan from the scan list + * _bt_dropscan() -- drop a scan from the scan list */ void _bt_dropscan(IndexScanDesc scan) { - BTScanList chk, last; - - last = (BTScanList) NULL; - for (chk = BTScans; - chk != (BTScanList) NULL && chk->btsl_scan != scan; - chk = chk->btsl_next) { - last = chk; - } - - if (chk == (BTScanList) NULL) - elog(WARN, "btree scan list trashed; can't find 0x%lx", scan); - - if (last == (BTScanList) NULL) - BTScans = chk->btsl_next; - else - last->btsl_next = chk->btsl_next; - - pfree (chk); + BTScanList chk, + last; + + last = (BTScanList) NULL; + for (chk = BTScans; + chk != (BTScanList) NULL && chk->btsl_scan != scan; + chk = chk->btsl_next) + { + last = chk; + } + + if (chk == (BTScanList) NULL) + elog(WARN, "btree scan list trashed; can't find 0x%lx", scan); + + if (last == (BTScanList) NULL) + BTScans = chk->btsl_next; + else + last->btsl_next = chk->btsl_next; + + pfree(chk); } /* - * _bt_adjscans() -- adjust all scans in the scan list to compensate - * for a given deletion or insertion + * _bt_adjscans() -- adjust all scans in the scan list to compensate + * for a given deletion or insertion */ void _bt_adjscans(Relation rel, ItemPointer tid, int op) { - BTScanList l; - Oid relid; - - relid = rel->rd_id; - for (l = BTScans; l != (BTScanList) NULL; l = l->btsl_next) { - if (relid == l->btsl_scan->relation->rd_id) - _bt_scandel(l->btsl_scan, op, - ItemPointerGetBlockNumber(tid), - ItemPointerGetOffsetNumber(tid)); - } + BTScanList l; + Oid relid; + + relid = rel->rd_id; + for (l = BTScans; l != (BTScanList) NULL; l = l->btsl_next) + { + if (relid == l->btsl_scan->relation->rd_id) + _bt_scandel(l->btsl_scan, op, + ItemPointerGetBlockNumber(tid), + ItemPointerGetOffsetNumber(tid)); + } } /* - * _bt_scandel() -- adjust a single scan + * _bt_scandel() -- adjust a single scan * * because each index page is always maintained as an ordered array of * index tuples, the index tuples on a given page shift beneath any - * given scan. an index modification "behind" a scan position (i.e., + * given scan. an index modification "behind" a scan position (i.e., * same page, lower or equal offset number) will therefore force us to * adjust the scan in the following ways: * @@ -126,80 +130,85 @@ _bt_adjscans(Relation rel, ItemPointer tid, int op) static void _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno) { - ItemPointer current; - Buffer buf; - BTScanOpaque so; - - if (!_bt_scantouched(scan, blkno, offno)) - return; - - so = (BTScanOpaque) scan->opaque; - buf = so->btso_curbuf; - - current = &(scan->currentItemData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) { - switch (op) { - case BT_INSERT: - _bt_step(scan, &buf, ForwardScanDirection); - break; - case BT_DELETE: - _bt_step(scan, &buf, BackwardScanDirection); - break; - default: - elog(WARN, "_bt_scandel: bad operation '%d'", op); - /*NOTREACHED*/ + ItemPointer current; + Buffer buf; + BTScanOpaque so; + + if (!_bt_scantouched(scan, blkno, offno)) + return; + + so = (BTScanOpaque) scan->opaque; + buf = so->btso_curbuf; + + current = &(scan->currentItemData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + { + switch (op) + { + case BT_INSERT: + _bt_step(scan, &buf, ForwardScanDirection); + break; + case BT_DELETE: + _bt_step(scan, &buf, BackwardScanDirection); + break; + default: + elog(WARN, "_bt_scandel: bad operation '%d'", op); + /* NOTREACHED */ + } + so->btso_curbuf = buf; } - so->btso_curbuf = buf; - } - - current = &(scan->currentMarkData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) { - ItemPointerData tmp; - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; - switch (op) { - case BT_INSERT: - _bt_step(scan, &buf, ForwardScanDirection); - break; - case BT_DELETE: - _bt_step(scan, &buf, BackwardScanDirection); - break; - default: - elog(WARN, "_bt_scandel: bad operation '%d'", op); - /*NOTREACHED*/ + + current = &(scan->currentMarkData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + { + ItemPointerData tmp; + + tmp = *current; + *current = scan->currentItemData; + scan->currentItemData = tmp; + switch (op) + { + case BT_INSERT: + _bt_step(scan, &buf, ForwardScanDirection); + break; + case BT_DELETE: + _bt_step(scan, &buf, BackwardScanDirection); + break; + default: + elog(WARN, "_bt_scandel: bad operation '%d'", op); + /* NOTREACHED */ + } + so->btso_mrkbuf = buf; + tmp = *current; + *current = scan->currentItemData; + scan->currentItemData = tmp; } - so->btso_mrkbuf = buf; - tmp = *current; - *current = scan->currentItemData; - scan->currentItemData = tmp; - } } /* - * _bt_scantouched() -- check to see if a scan is affected by a given - * change to the index + * _bt_scantouched() -- check to see if a scan is affected by a given + * change to the index */ -static bool +static bool _bt_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno) { - ItemPointer current; - - current = &(scan->currentItemData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - return (true); - - current = &(scan->currentMarkData); - if (ItemPointerIsValid(current) - && ItemPointerGetBlockNumber(current) == blkno - && ItemPointerGetOffsetNumber(current) >= offno) - return (true); - - return (false); + ItemPointer current; + + current = &(scan->currentItemData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + return (true); + + current = &(scan->currentMarkData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + return (true); + + return (false); } diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 1d1c8072b9..8b1f75b753 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * btsearch.c-- - * search code for postgres btrees. + * search code for postgres btrees. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.23 1997/08/19 21:29:42 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.24 1997/09/07 04:38:58 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -22,1435 +22,1516 @@ #include <catalog/pg_proc.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static BTStack -_bt_searchr(Relation rel, int keysz, ScanKey scankey, - Buffer *bufP, BTStack stack_in); -static OffsetNumber -_bt_firsteq(Relation rel, TupleDesc itupdesc, Page page, - Size keysz, ScanKey scankey, OffsetNumber offnum); -static int -_bt_compare(Relation rel, TupleDesc itupdesc, Page page, - int keysz, ScanKey scankey, OffsetNumber offnum); -static bool -_bt_twostep(IndexScanDesc scan, Buffer *bufP, ScanDirection dir); -static RetrieveIndexResult -_bt_endpoint(IndexScanDesc scan, ScanDirection dir); +static BTStack +_bt_searchr(Relation rel, int keysz, ScanKey scankey, + Buffer * bufP, BTStack stack_in); +static OffsetNumber +_bt_firsteq(Relation rel, TupleDesc itupdesc, Page page, + Size keysz, ScanKey scankey, OffsetNumber offnum); +static int +_bt_compare(Relation rel, TupleDesc itupdesc, Page page, + int keysz, ScanKey scankey, OffsetNumber offnum); +static bool + _bt_twostep(IndexScanDesc scan, Buffer * bufP, ScanDirection dir); +static RetrieveIndexResult + _bt_endpoint(IndexScanDesc scan, ScanDirection dir); /* - * _bt_search() -- Search for a scan key in the index. + * _bt_search() -- Search for a scan key in the index. * - * This routine is actually just a helper that sets things up and - * calls a recursive-descent search routine on the tree. + * This routine is actually just a helper that sets things up and + * calls a recursive-descent search routine on the tree. */ BTStack -_bt_search(Relation rel, int keysz, ScanKey scankey, Buffer *bufP) +_bt_search(Relation rel, int keysz, ScanKey scankey, Buffer * bufP) { - *bufP = _bt_getroot(rel, BT_READ); - return (_bt_searchr(rel, keysz, scankey, bufP, (BTStack) NULL)); + *bufP = _bt_getroot(rel, BT_READ); + return (_bt_searchr(rel, keysz, scankey, bufP, (BTStack) NULL)); } /* - * _bt_searchr() -- Search the tree recursively for a particular scankey. + * _bt_searchr() -- Search the tree recursively for a particular scankey. */ -static BTStack +static BTStack _bt_searchr(Relation rel, - int keysz, - ScanKey scankey, - Buffer *bufP, - BTStack stack_in) + int keysz, + ScanKey scankey, + Buffer * bufP, + BTStack stack_in) { - BTStack stack; - OffsetNumber offnum; - Page page; - BTPageOpaque opaque; - BlockNumber par_blkno; - BlockNumber blkno; - ItemId itemid; - BTItem btitem; - BTItem item_save; - int item_nbytes; - IndexTuple itup; - - /* if this is a leaf page, we're done */ - page = BufferGetPage(*bufP); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - if (opaque->btpo_flags & BTP_LEAF) - return (stack_in); - - /* - * Find the appropriate item on the internal page, and get the child - * page that it points to. - */ - - par_blkno = BufferGetBlockNumber(*bufP); - offnum = _bt_binsrch(rel, *bufP, keysz, scankey, BT_DESCENT); - itemid = PageGetItemId(page, offnum); - btitem = (BTItem) PageGetItem(page, itemid); - itup = &(btitem->bti_itup); - blkno = ItemPointerGetBlockNumber(&(itup->t_tid)); - - /* - * We need to save the bit image of the index entry we chose in the - * parent page on a stack. In case we split the tree, we'll use this - * bit image to figure out what our real parent page is, in case the - * parent splits while we're working lower in the tree. See the paper - * by Lehman and Yao for how this is detected and handled. (We use - * unique OIDs to disambiguate duplicate keys in the index -- Lehman - * and Yao disallow duplicate keys). - */ - - item_nbytes = ItemIdGetLength(itemid); - item_save = (BTItem) palloc(item_nbytes); - memmove((char *) item_save, (char *) btitem, item_nbytes); - stack = (BTStack) palloc(sizeof(BTStackData)); - stack->bts_blkno = par_blkno; - stack->bts_offset = offnum; - stack->bts_btitem = item_save; - stack->bts_parent = stack_in; - - /* drop the read lock on the parent page and acquire one on the child */ - _bt_relbuf(rel, *bufP, BT_READ); - *bufP = _bt_getbuf(rel, blkno, BT_READ); - - /* - * Race -- the page we just grabbed may have split since we read its - * pointer in the parent. If it has, we may need to move right to its - * new sibling. Do that. - */ - - *bufP = _bt_moveright(rel, *bufP, keysz, scankey, BT_READ); - - /* okay, all set to move down a level */ - return (_bt_searchr(rel, keysz, scankey, bufP, stack)); + BTStack stack; + OffsetNumber offnum; + Page page; + BTPageOpaque opaque; + BlockNumber par_blkno; + BlockNumber blkno; + ItemId itemid; + BTItem btitem; + BTItem item_save; + int item_nbytes; + IndexTuple itup; + + /* if this is a leaf page, we're done */ + page = BufferGetPage(*bufP); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + if (opaque->btpo_flags & BTP_LEAF) + return (stack_in); + + /* + * Find the appropriate item on the internal page, and get the child + * page that it points to. + */ + + par_blkno = BufferGetBlockNumber(*bufP); + offnum = _bt_binsrch(rel, *bufP, keysz, scankey, BT_DESCENT); + itemid = PageGetItemId(page, offnum); + btitem = (BTItem) PageGetItem(page, itemid); + itup = &(btitem->bti_itup); + blkno = ItemPointerGetBlockNumber(&(itup->t_tid)); + + /* + * We need to save the bit image of the index entry we chose in the + * parent page on a stack. In case we split the tree, we'll use this + * bit image to figure out what our real parent page is, in case the + * parent splits while we're working lower in the tree. See the paper + * by Lehman and Yao for how this is detected and handled. (We use + * unique OIDs to disambiguate duplicate keys in the index -- Lehman + * and Yao disallow duplicate keys). + */ + + item_nbytes = ItemIdGetLength(itemid); + item_save = (BTItem) palloc(item_nbytes); + memmove((char *) item_save, (char *) btitem, item_nbytes); + stack = (BTStack) palloc(sizeof(BTStackData)); + stack->bts_blkno = par_blkno; + stack->bts_offset = offnum; + stack->bts_btitem = item_save; + stack->bts_parent = stack_in; + + /* drop the read lock on the parent page and acquire one on the child */ + _bt_relbuf(rel, *bufP, BT_READ); + *bufP = _bt_getbuf(rel, blkno, BT_READ); + + /* + * Race -- the page we just grabbed may have split since we read its + * pointer in the parent. If it has, we may need to move right to its + * new sibling. Do that. + */ + + *bufP = _bt_moveright(rel, *bufP, keysz, scankey, BT_READ); + + /* okay, all set to move down a level */ + return (_bt_searchr(rel, keysz, scankey, bufP, stack)); } /* - * _bt_moveright() -- move right in the btree if necessary. + * _bt_moveright() -- move right in the btree if necessary. * - * When we drop and reacquire a pointer to a page, it is possible that - * the page has changed in the meanwhile. If this happens, we're - * guaranteed that the page has "split right" -- that is, that any - * data that appeared on the page originally is either on the page - * or strictly to the right of it. + * When we drop and reacquire a pointer to a page, it is possible that + * the page has changed in the meanwhile. If this happens, we're + * guaranteed that the page has "split right" -- that is, that any + * data that appeared on the page originally is either on the page + * or strictly to the right of it. * - * This routine decides whether or not we need to move right in the - * tree by examining the high key entry on the page. If that entry - * is strictly less than one we expect to be on the page, then our - * picture of the page is incorrect and we need to move right. + * This routine decides whether or not we need to move right in the + * tree by examining the high key entry on the page. If that entry + * is strictly less than one we expect to be on the page, then our + * picture of the page is incorrect and we need to move right. * - * On entry, we have the buffer pinned and a lock of the proper type. - * If we move right, we release the buffer and lock and acquire the - * same on the right sibling. + * On entry, we have the buffer pinned and a lock of the proper type. + * If we move right, we release the buffer and lock and acquire the + * same on the right sibling. */ Buffer _bt_moveright(Relation rel, - Buffer buf, - int keysz, - ScanKey scankey, - int access) + Buffer buf, + int keysz, + ScanKey scankey, + int access) { - Page page; - BTPageOpaque opaque; - ItemId hikey; - BlockNumber rblkno; - int natts = rel->rd_rel->relnatts; - - page = BufferGetPage(buf); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - - /* if we're on a rightmost page, we don't need to move right */ - if (P_RIGHTMOST(opaque)) - return (buf); - - /* by convention, item 0 on non-rightmost pages is the high key */ - hikey = PageGetItemId(page, P_HIKEY); - - /* - * If the scan key that brought us to this page is >= the high key - * stored on the page, then the page has split and we need to move - * right. - */ - - if (_bt_skeycmp(rel, keysz, scankey, page, hikey, - BTGreaterEqualStrategyNumber)) - { - /* move right as long as we need to */ - do + Page page; + BTPageOpaque opaque; + ItemId hikey; + BlockNumber rblkno; + int natts = rel->rd_rel->relnatts; + + page = BufferGetPage(buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* if we're on a rightmost page, we don't need to move right */ + if (P_RIGHTMOST(opaque)) + return (buf); + + /* by convention, item 0 on non-rightmost pages is the high key */ + hikey = PageGetItemId(page, P_HIKEY); + + /* + * If the scan key that brought us to this page is >= the high key + * stored on the page, then the page has split and we need to move + * right. + */ + + if (_bt_skeycmp(rel, keysz, scankey, page, hikey, + BTGreaterEqualStrategyNumber)) { - OffsetNumber offmax = PageGetMaxOffsetNumber(page); - /* - * If this page consists of all duplicate keys (hikey and first - * key on the page have the same value), then we don't need to - * step right. - * - * NOTE for multi-column indices: we may do scan using - * keys not for all attrs. But we handle duplicates - * using all attrs in _bt_insert/_bt_spool code. - * And so we've to compare scankey with _last_ item - * on this page to do not lose "good" tuples if number - * of attrs > keysize. Example: (2,0) - last items on - * this page, (2,1) - first item on next page (hikey), - * our scankey is x = 2. Scankey == (2,1) because of - * we compare first attrs only, but we shouldn't to move - * right of here. - vadim 04/15/97 - */ - - if ( _bt_skeycmp (rel, keysz, scankey, page, hikey, - BTEqualStrategyNumber) ) - { - if ( opaque->btpo_flags & BTP_CHAIN ) - { - Assert ( ( opaque->btpo_flags & BTP_LEAF ) || offmax > P_HIKEY ); - break; - } - if ( offmax > P_HIKEY ) - { - if ( natts == keysz ) /* sanity checks */ - { - if ( _bt_skeycmp (rel, keysz, scankey, page, - PageGetItemId (page, P_FIRSTKEY), - BTEqualStrategyNumber) ) - elog (FATAL, "btree: BTP_CHAIN flag was expected"); - if ( _bt_skeycmp (rel, keysz, scankey, page, - PageGetItemId (page, offmax), - BTEqualStrategyNumber) ) - elog (FATAL, "btree: unexpected equal last item"); - if ( _bt_skeycmp (rel, keysz, scankey, page, - PageGetItemId (page, offmax), - BTLessStrategyNumber) ) - elog (FATAL, "btree: unexpected greater last item"); - /* move right */ - } - else if ( _bt_skeycmp (rel, keysz, scankey, page, - PageGetItemId (page, offmax), - BTLessEqualStrategyNumber) ) - break; - } - } - - /* step right one page */ - rblkno = opaque->btpo_next; - _bt_relbuf(rel, buf, access); - buf = _bt_getbuf(rel, rblkno, access); - page = BufferGetPage(buf); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - hikey = PageGetItemId(page, P_HIKEY); - - } while (! P_RIGHTMOST(opaque) - && _bt_skeycmp(rel, keysz, scankey, page, hikey, - BTGreaterEqualStrategyNumber)); - } - return (buf); + /* move right as long as we need to */ + do + { + OffsetNumber offmax = PageGetMaxOffsetNumber(page); + + /* + * If this page consists of all duplicate keys (hikey and + * first key on the page have the same value), then we don't + * need to step right. + * + * NOTE for multi-column indices: we may do scan using keys not + * for all attrs. But we handle duplicates using all attrs in + * _bt_insert/_bt_spool code. And so we've to compare scankey + * with _last_ item on this page to do not lose "good" tuples + * if number of attrs > keysize. Example: (2,0) - last items + * on this page, (2,1) - first item on next page (hikey), our + * scankey is x = 2. Scankey == (2,1) because of we compare + * first attrs only, but we shouldn't to move right of here. + * - vadim 04/15/97 + */ + + if (_bt_skeycmp(rel, keysz, scankey, page, hikey, + BTEqualStrategyNumber)) + { + if (opaque->btpo_flags & BTP_CHAIN) + { + Assert((opaque->btpo_flags & BTP_LEAF) || offmax > P_HIKEY); + break; + } + if (offmax > P_HIKEY) + { + if (natts == keysz) /* sanity checks */ + { + if (_bt_skeycmp(rel, keysz, scankey, page, + PageGetItemId(page, P_FIRSTKEY), + BTEqualStrategyNumber)) + elog(FATAL, "btree: BTP_CHAIN flag was expected"); + if (_bt_skeycmp(rel, keysz, scankey, page, + PageGetItemId(page, offmax), + BTEqualStrategyNumber)) + elog(FATAL, "btree: unexpected equal last item"); + if (_bt_skeycmp(rel, keysz, scankey, page, + PageGetItemId(page, offmax), + BTLessStrategyNumber)) + elog(FATAL, "btree: unexpected greater last item"); + /* move right */ + } + else if (_bt_skeycmp(rel, keysz, scankey, page, + PageGetItemId(page, offmax), + BTLessEqualStrategyNumber)) + break; + } + } + + /* step right one page */ + rblkno = opaque->btpo_next; + _bt_relbuf(rel, buf, access); + buf = _bt_getbuf(rel, rblkno, access); + page = BufferGetPage(buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + hikey = PageGetItemId(page, P_HIKEY); + + } while (!P_RIGHTMOST(opaque) + && _bt_skeycmp(rel, keysz, scankey, page, hikey, + BTGreaterEqualStrategyNumber)); + } + return (buf); } /* - * _bt_skeycmp() -- compare a scan key to a particular item on a page using - * a requested strategy (<, <=, =, >=, >). + * _bt_skeycmp() -- compare a scan key to a particular item on a page using + * a requested strategy (<, <=, =, >=, >). * - * We ignore the unique OIDs stored in the btree item here. Those - * numbers are intended for use internally only, in repositioning a - * scan after a page split. They do not impose any meaningful ordering. + * We ignore the unique OIDs stored in the btree item here. Those + * numbers are intended for use internally only, in repositioning a + * scan after a page split. They do not impose any meaningful ordering. * - * The comparison is A <op> B, where A is the scan key and B is the - * tuple pointed at by itemid on page. + * The comparison is A <op> B, where A is the scan key and B is the + * tuple pointed at by itemid on page. */ bool _bt_skeycmp(Relation rel, - Size keysz, - ScanKey scankey, - Page page, - ItemId itemid, - StrategyNumber strat) + Size keysz, + ScanKey scankey, + Page page, + ItemId itemid, + StrategyNumber strat) { - BTItem item; - IndexTuple indexTuple; - TupleDesc tupDes; - ScanKey entry; - int i; - Datum attrDatum; - Datum keyDatum; - bool compare; - bool isNull; - bool useEqual = false; - bool keyNull; - - if ( strat == BTLessEqualStrategyNumber ) - { - useEqual = true; - strat = BTLessStrategyNumber; - } - else if ( strat == BTGreaterEqualStrategyNumber ) - { - useEqual = true; - strat = BTGreaterStrategyNumber; - } - - item = (BTItem) PageGetItem(page, itemid); - indexTuple = &(item->bti_itup); - - tupDes = RelationGetTupleDescriptor(rel); - - /* see if the comparison is true for all of the key attributes */ - for (i=1; i <= keysz; i++) { - - entry = &scankey[i-1]; - Assert ( entry->sk_attno == i ); - attrDatum = index_getattr(indexTuple, - entry->sk_attno, - tupDes, - &isNull); - keyDatum = entry->sk_argument; - - /* see comments about NULLs handling in btbuild */ - if ( entry->sk_flags & SK_ISNULL ) /* key is NULL */ + BTItem item; + IndexTuple indexTuple; + TupleDesc tupDes; + ScanKey entry; + int i; + Datum attrDatum; + Datum keyDatum; + bool compare; + bool isNull; + bool useEqual = false; + bool keyNull; + + if (strat == BTLessEqualStrategyNumber) { - Assert ( entry->sk_procedure == NullValueRegProcedure ); - keyNull = true; - if ( isNull ) - compare = ( strat == BTEqualStrategyNumber ) ? true : false; - else - compare = ( strat == BTGreaterStrategyNumber ) ? true : false; - } - else if ( isNull ) /* key is NOT_NULL and item is NULL */ - { - keyNull = false; - compare = ( strat == BTLessStrategyNumber ) ? true : false; - } - else - { - keyNull = false; - compare = _bt_invokestrat(rel, i, strat, keyDatum, attrDatum); + useEqual = true; + strat = BTLessStrategyNumber; } - - if ( compare ) /* true for one of ">, <, =" */ + else if (strat == BTGreaterEqualStrategyNumber) { - if ( strat != BTEqualStrategyNumber ) - return (true); + useEqual = true; + strat = BTGreaterStrategyNumber; } - else /* false for one of ">, <, =" */ + + item = (BTItem) PageGetItem(page, itemid); + indexTuple = &(item->bti_itup); + + tupDes = RelationGetTupleDescriptor(rel); + + /* see if the comparison is true for all of the key attributes */ + for (i = 1; i <= keysz; i++) { - if ( strat == BTEqualStrategyNumber ) - return (false); - /* - * if original strat was "<=, >=" OR - * "<, >" but some attribute(s) left - * - need to test for Equality - */ - if ( useEqual || i < keysz ) - { - if ( keyNull || isNull ) - compare = ( keyNull && isNull ) ? true : false; - else - compare = _bt_invokestrat(rel, i, BTEqualStrategyNumber, - keyDatum, attrDatum); - if ( compare ) /* key' and item' attributes are equal */ - continue; /* - try to compare next attributes */ - } - return (false); + + entry = &scankey[i - 1]; + Assert(entry->sk_attno == i); + attrDatum = index_getattr(indexTuple, + entry->sk_attno, + tupDes, + &isNull); + keyDatum = entry->sk_argument; + + /* see comments about NULLs handling in btbuild */ + if (entry->sk_flags & SK_ISNULL) /* key is NULL */ + { + Assert(entry->sk_procedure == NullValueRegProcedure); + keyNull = true; + if (isNull) + compare = (strat == BTEqualStrategyNumber) ? true : false; + else + compare = (strat == BTGreaterStrategyNumber) ? true : false; + } + else if (isNull) /* key is NOT_NULL and item is NULL */ + { + keyNull = false; + compare = (strat == BTLessStrategyNumber) ? true : false; + } + else + { + keyNull = false; + compare = _bt_invokestrat(rel, i, strat, keyDatum, attrDatum); + } + + if (compare) /* true for one of ">, <, =" */ + { + if (strat != BTEqualStrategyNumber) + return (true); + } + else +/* false for one of ">, <, =" */ + { + if (strat == BTEqualStrategyNumber) + return (false); + + /* + * if original strat was "<=, >=" OR "<, >" but some + * attribute(s) left - need to test for Equality + */ + if (useEqual || i < keysz) + { + if (keyNull || isNull) + compare = (keyNull && isNull) ? true : false; + else + compare = _bt_invokestrat(rel, i, BTEqualStrategyNumber, + keyDatum, attrDatum); + if (compare) /* key' and item' attributes are equal */ + continue; /* - try to compare next attributes */ + } + return (false); + } } - } - - return (true); + + return (true); } /* - * _bt_binsrch() -- Do a binary search for a key on a particular page. + * _bt_binsrch() -- Do a binary search for a key on a particular page. * - * The scankey we get has the compare function stored in the procedure - * entry of each data struct. We invoke this regproc to do the - * comparison for every key in the scankey. _bt_binsrch() returns - * the OffsetNumber of the first matching key on the page, or the - * OffsetNumber at which the matching key would appear if it were - * on this page. + * The scankey we get has the compare function stored in the procedure + * entry of each data struct. We invoke this regproc to do the + * comparison for every key in the scankey. _bt_binsrch() returns + * the OffsetNumber of the first matching key on the page, or the + * OffsetNumber at which the matching key would appear if it were + * on this page. * - * By the time this procedure is called, we're sure we're looking - * at the right page -- don't need to walk right. _bt_binsrch() has - * no lock or refcount side effects on the buffer. + * By the time this procedure is called, we're sure we're looking + * at the right page -- don't need to walk right. _bt_binsrch() has + * no lock or refcount side effects on the buffer. */ OffsetNumber _bt_binsrch(Relation rel, - Buffer buf, - int keysz, - ScanKey scankey, - int srchtype) + Buffer buf, + int keysz, + ScanKey scankey, + int srchtype) { - TupleDesc itupdesc; - Page page; - BTPageOpaque opaque; - OffsetNumber low, mid, high; - int natts = rel->rd_rel->relnatts; - int result; - - itupdesc = RelationGetTupleDescriptor(rel); - page = BufferGetPage(buf); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - - /* by convention, item 1 on any non-rightmost page is the high key */ - low = mid = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - - high = PageGetMaxOffsetNumber(page); - - /* - * Since for non-rightmost pages, the first item on the page is the - * high key, there are two notions of emptiness. One is if nothing - * appears on the page. The other is if nothing but the high key does. - * The reason we test high <= low, rather than high == low, is that - * after vacuuming there may be nothing *but* the high key on a page. - * In that case, given the scheme above, low = 2 and high = 1. - */ - - if ( PageIsEmpty (page) ) - return (low); - if ( (! P_RIGHTMOST(opaque) && high <= low)) - { - if ( high < low || - (srchtype == BT_DESCENT && !(opaque->btpo_flags & BTP_LEAF)) ) - return (low); - /* It's insertion and high == low == 2 */ - result = _bt_compare(rel, itupdesc, page, keysz, scankey, low); - if ( result > 0 ) - return ( OffsetNumberNext (low) ); - return (low); - } - - while ((high - low) > 1) { - mid = low + ((high - low) / 2); - result = _bt_compare(rel, itupdesc, page, keysz, scankey, mid); - - if (result > 0) - low = mid; - else if (result < 0) - high = mid - 1; - else + TupleDesc itupdesc; + Page page; + BTPageOpaque opaque; + OffsetNumber low, + mid, + high; + int natts = rel->rd_rel->relnatts; + int result; + + itupdesc = RelationGetTupleDescriptor(rel); + page = BufferGetPage(buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* by convention, item 1 on any non-rightmost page is the high key */ + low = mid = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + + high = PageGetMaxOffsetNumber(page); + + /* + * Since for non-rightmost pages, the first item on the page is the + * high key, there are two notions of emptiness. One is if nothing + * appears on the page. The other is if nothing but the high key + * does. The reason we test high <= low, rather than high == low, is + * that after vacuuming there may be nothing *but* the high key on a + * page. In that case, given the scheme above, low = 2 and high = 1. + */ + + if (PageIsEmpty(page)) + return (low); + if ((!P_RIGHTMOST(opaque) && high <= low)) { - mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, mid); - /* - * NOTE for multi-column indices: we may do scan using - * keys not for all attrs. But we handle duplicates using - * all attrs in _bt_insert/_bt_spool code. And so while - * searching on internal pages having number of attrs > keysize - * we want to point at the last item < the scankey, not at the - * first item = the scankey (!!!), and let _bt_moveright - * decide later whether to move right or not (see comments and - * example there). Note also that INSERTions are not affected - * by this code (natts == keysz). - vadim 04/15/97 - */ - if ( natts == keysz || opaque->btpo_flags & BTP_LEAF ) - return (mid); - low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - if ( mid == low ) - return (mid); - return (OffsetNumberPrev(mid)); + if (high < low || + (srchtype == BT_DESCENT && !(opaque->btpo_flags & BTP_LEAF))) + return (low); + /* It's insertion and high == low == 2 */ + result = _bt_compare(rel, itupdesc, page, keysz, scankey, low); + if (result > 0) + return (OffsetNumberNext(low)); + return (low); } - } - - /* - * We terminated because the endpoints got too close together. There - * are two cases to take care of. - * - * For non-insertion searches on internal pages, we want to point at - * the last key <, or first key =, the scankey on the page. This - * guarantees that we'll descend the tree correctly. - * (NOTE comments above for multi-column indices). - * - * For all other cases, we want to point at the first key >= - * the scankey on the page. This guarantees that scans and - * insertions will happen correctly. - */ - - if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT) - { /* - * We want the last key <, or first key ==, the scan key. - */ - result = _bt_compare(rel, itupdesc, page, keysz, scankey, high); - - if (result == 0) + + while ((high - low) > 1) { - mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, high); - /* - * If natts > keysz we want last item < the scan key. - * See comments above for multi-column indices. - */ - if ( natts == keysz ) - return (mid); - low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - if ( mid == low ) - return (mid); - return (OffsetNumberPrev(mid)); + mid = low + ((high - low) / 2); + result = _bt_compare(rel, itupdesc, page, keysz, scankey, mid); + + if (result > 0) + low = mid; + else if (result < 0) + high = mid - 1; + else + { + mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, mid); + + /* + * NOTE for multi-column indices: we may do scan using keys + * not for all attrs. But we handle duplicates using all attrs + * in _bt_insert/_bt_spool code. And so while searching on + * internal pages having number of attrs > keysize we want to + * point at the last item < the scankey, not at the first item + * = the scankey (!!!), and let _bt_moveright decide later + * whether to move right or not (see comments and example + * there). Note also that INSERTions are not affected by this + * code (natts == keysz). - vadim 04/15/97 + */ + if (natts == keysz || opaque->btpo_flags & BTP_LEAF) + return (mid); + low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + if (mid == low) + return (mid); + return (OffsetNumberPrev(mid)); + } + } + + /* + * We terminated because the endpoints got too close together. There + * are two cases to take care of. + * + * For non-insertion searches on internal pages, we want to point at the + * last key <, or first key =, the scankey on the page. This + * guarantees that we'll descend the tree correctly. (NOTE comments + * above for multi-column indices). + * + * For all other cases, we want to point at the first key >= the scankey + * on the page. This guarantees that scans and insertions will happen + * correctly. + */ + + if (!(opaque->btpo_flags & BTP_LEAF) && srchtype == BT_DESCENT) + { /* We want the last key <, or first key + * ==, the scan key. */ + result = _bt_compare(rel, itupdesc, page, keysz, scankey, high); + + if (result == 0) + { + mid = _bt_firsteq(rel, itupdesc, page, keysz, scankey, high); + + /* + * If natts > keysz we want last item < the scan key. See + * comments above for multi-column indices. + */ + if (natts == keysz) + return (mid); + low = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + if (mid == low) + return (mid); + return (OffsetNumberPrev(mid)); + } + else if (result > 0) + return (high); + else + return (low); } - else if (result > 0) - return (high); - else - return (low); - } - else /* we want the first key >= the scan key */ - { - result = _bt_compare(rel, itupdesc, page, keysz, scankey, low); - if (result <= 0) - return (low); else +/* we want the first key >= the scan key */ { - if (low == high) - return (OffsetNumberNext(low)); - - result = _bt_compare(rel, itupdesc, page, keysz, scankey, high); - if (result <= 0) - return (high); - else - return (OffsetNumberNext(high)); + result = _bt_compare(rel, itupdesc, page, keysz, scankey, low); + if (result <= 0) + return (low); + else + { + if (low == high) + return (OffsetNumberNext(low)); + + result = _bt_compare(rel, itupdesc, page, keysz, scankey, high); + if (result <= 0) + return (high); + else + return (OffsetNumberNext(high)); + } } - } } -static OffsetNumber +static OffsetNumber _bt_firsteq(Relation rel, - TupleDesc itupdesc, - Page page, - Size keysz, - ScanKey scankey, - OffsetNumber offnum) + TupleDesc itupdesc, + Page page, + Size keysz, + ScanKey scankey, + OffsetNumber offnum) { - BTPageOpaque opaque; - OffsetNumber limit; - - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - - /* skip the high key, if any */ - limit = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - - /* walk backwards looking for the first key in the chain of duplicates */ - while (offnum > limit - && _bt_compare(rel, itupdesc, page, - keysz, scankey, OffsetNumberPrev(offnum)) == 0) { - offnum = OffsetNumberPrev(offnum); - } - - return (offnum); + BTPageOpaque opaque; + OffsetNumber limit; + + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* skip the high key, if any */ + limit = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + + /* walk backwards looking for the first key in the chain of duplicates */ + while (offnum > limit + && _bt_compare(rel, itupdesc, page, + keysz, scankey, OffsetNumberPrev(offnum)) == 0) + { + offnum = OffsetNumberPrev(offnum); + } + + return (offnum); } /* - * _bt_compare() -- Compare scankey to a particular tuple on the page. + * _bt_compare() -- Compare scankey to a particular tuple on the page. * - * This routine returns: - * -1 if scankey < tuple at offnum; - * 0 if scankey == tuple at offnum; - * +1 if scankey > tuple at offnum. + * This routine returns: + * -1 if scankey < tuple at offnum; + * 0 if scankey == tuple at offnum; + * +1 if scankey > tuple at offnum. * - * -- Old comments: - * In order to avoid having to propagate changes up the tree any time - * a new minimal key is inserted, the leftmost entry on the leftmost - * page is less than all possible keys, by definition. + * -- Old comments: + * In order to avoid having to propagate changes up the tree any time + * a new minimal key is inserted, the leftmost entry on the leftmost + * page is less than all possible keys, by definition. * - * -- New ones: - * New insertion code (fix against updating _in_place_ if new minimal - * key has bigger size than old one) may delete P_HIKEY entry on the - * root page in order to insert new minimal key - and so this definition - * does not work properly in this case and breaks key' order on root - * page. BTW, this propagation occures only while page' splitting, - * but not "any time a new min key is inserted" (see _bt_insertonpg). - * - vadim 12/05/96 + * -- New ones: + * New insertion code (fix against updating _in_place_ if new minimal + * key has bigger size than old one) may delete P_HIKEY entry on the + * root page in order to insert new minimal key - and so this definition + * does not work properly in this case and breaks key' order on root + * page. BTW, this propagation occures only while page' splitting, + * but not "any time a new min key is inserted" (see _bt_insertonpg). + * - vadim 12/05/96 */ static int _bt_compare(Relation rel, - TupleDesc itupdesc, - Page page, - int keysz, - ScanKey scankey, - OffsetNumber offnum) + TupleDesc itupdesc, + Page page, + int keysz, + ScanKey scankey, + OffsetNumber offnum) { - Datum datum; - BTItem btitem; - ItemId itemid; - IndexTuple itup; - BTPageOpaque opaque; - ScanKey entry; - AttrNumber attno; - int result; - int i; - bool null; - - /* - * If this is a leftmost internal page, and if our comparison is - * with the first key on the page, then the item at that position is - * by definition less than the scan key. - * - * - see new comments above... - */ - - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - if (!(opaque->btpo_flags & BTP_LEAF) - && P_LEFTMOST(opaque) - && offnum == P_HIKEY) { - itemid = PageGetItemId(page, offnum); - + Datum datum; + BTItem btitem; + ItemId itemid; + IndexTuple itup; + BTPageOpaque opaque; + ScanKey entry; + AttrNumber attno; + int result; + int i; + bool null; + /* - * we just have to believe that this will only be called with - * offnum == P_HIKEY when P_HIKEY is the OffsetNumber of the - * first actual data key (i.e., this is also a rightmost - * page). there doesn't seem to be any code that implies - * that the leftmost page is normally missing a high key as - * well as the rightmost page. but that implies that this - * code path only applies to the root -- which seems - * unlikely.. + * If this is a leftmost internal page, and if our comparison is with + * the first key on the page, then the item at that position is by + * definition less than the scan key. * - * - see new comments above... + * - see new comments above... */ - if (! P_RIGHTMOST(opaque)) { - elog(WARN, "_bt_compare: invalid comparison to high key"); - } + + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + if (!(opaque->btpo_flags & BTP_LEAF) + && P_LEFTMOST(opaque) + && offnum == P_HIKEY) + { + itemid = PageGetItemId(page, offnum); + + /* + * we just have to believe that this will only be called with + * offnum == P_HIKEY when P_HIKEY is the OffsetNumber of the first + * actual data key (i.e., this is also a rightmost page). there + * doesn't seem to be any code that implies that the leftmost page + * is normally missing a high key as well as the rightmost page. + * but that implies that this code path only applies to the root + * -- which seems unlikely.. + * + * - see new comments above... + */ + if (!P_RIGHTMOST(opaque)) + { + elog(WARN, "_bt_compare: invalid comparison to high key"); + } #if 0 + + /* + * We just have to belive that right answer will not break + * anything. I've checked code and all seems to be ok. See new + * comments above... + * + * -- Old comments If the item on the page is equal to the scankey, + * that's okay to admit. We just can't claim that the first key + * on the page is greater than anything. + */ + + if (_bt_skeycmp(rel, keysz, scankey, page, itemid, + BTEqualStrategyNumber)) + { + return (0); + } + return (1); +#endif + } + + btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &(btitem->bti_itup); + /* - * We just have to belive that right answer will not - * break anything. I've checked code and all seems to be ok. - * See new comments above... + * The scan key is set up with the attribute number associated with + * each term in the key. It is important that, if the index is + * multi-key, the scan contain the first k key attributes, and that + * they be in order. If you think about how multi-key ordering works, + * you'll understand why this is. * - * -- Old comments - * If the item on the page is equal to the scankey, that's - * okay to admit. We just can't claim that the first key on - * the page is greater than anything. + * We don't test for violation of this condition here. */ - - if (_bt_skeycmp(rel, keysz, scankey, page, itemid, - BTEqualStrategyNumber)) { - return (0); - } - return (1); -#endif - } - - btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); - itup = &(btitem->bti_itup); - - /* - * The scan key is set up with the attribute number associated with each - * term in the key. It is important that, if the index is multi-key, - * the scan contain the first k key attributes, and that they be in - * order. If you think about how multi-key ordering works, you'll - * understand why this is. - * - * We don't test for violation of this condition here. - */ - - for (i = 1; i <= keysz; i++) { - long tmpres; - - entry = &scankey[i - 1]; - attno = entry->sk_attno; - datum = index_getattr(itup, attno, itupdesc, &null); - - /* see comments about NULLs handling in btbuild */ - if ( entry->sk_flags & SK_ISNULL ) /* key is NULL */ + + for (i = 1; i <= keysz; i++) { - Assert ( entry->sk_procedure == NullValueRegProcedure ); - if ( null ) - tmpres = (long) 0; /* NULL "=" NULL */ - else - tmpres = (long) 1; /* NULL ">" NOT_NULL */ - } - else if ( null ) /* key is NOT_NULL and item is NULL */ - { - tmpres = (long) -1; /* NOT_NULL "<" NULL */ - } - else - { - tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure, - entry->sk_argument, datum); + long tmpres; + + entry = &scankey[i - 1]; + attno = entry->sk_attno; + datum = index_getattr(itup, attno, itupdesc, &null); + + /* see comments about NULLs handling in btbuild */ + if (entry->sk_flags & SK_ISNULL) /* key is NULL */ + { + Assert(entry->sk_procedure == NullValueRegProcedure); + if (null) + tmpres = (long) 0; /* NULL "=" NULL */ + else + tmpres = (long) 1; /* NULL ">" NOT_NULL */ + } + else if (null) /* key is NOT_NULL and item is NULL */ + { + tmpres = (long) -1; /* NOT_NULL "<" NULL */ + } + else + { + tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure, + entry->sk_argument, datum); + } + result = tmpres; + + /* if the keys are unequal, return the difference */ + if (result != 0) + return (result); } - result = tmpres; - - /* if the keys are unequal, return the difference */ - if (result != 0) - return (result); - } - - /* by here, the keys are equal */ - return (0); + + /* by here, the keys are equal */ + return (0); } /* - * _bt_next() -- Get the next item in a scan. + * _bt_next() -- Get the next item in a scan. * - * On entry, we have a valid currentItemData in the scan, and a - * read lock on the page that contains that item. We do not have - * the page pinned. We return the next item in the scan. On - * exit, we have the page containing the next item locked but not - * pinned. + * On entry, we have a valid currentItemData in the scan, and a + * read lock on the page that contains that item. We do not have + * the page pinned. We return the next item in the scan. On + * exit, we have the page containing the next item locked but not + * pinned. */ RetrieveIndexResult _bt_next(IndexScanDesc scan, ScanDirection dir) { - Relation rel; - Buffer buf; - Page page; - OffsetNumber offnum; - RetrieveIndexResult res; - ItemPointer current; - BTItem btitem; - IndexTuple itup; - BTScanOpaque so; - Size keysok; - - rel = scan->relation; - so = (BTScanOpaque) scan->opaque; - current = &(scan->currentItemData); - - /* - * XXX 10 may 91: somewhere there's a bug in our management of the - * cached buffer for this scan. wei discovered it. the following - * is a workaround so he can work until i figure out what's going on. - */ - - if (!BufferIsValid(so->btso_curbuf)) - so->btso_curbuf = _bt_getbuf(rel, ItemPointerGetBlockNumber(current), - BT_READ); - - /* we still have the buffer pinned and locked */ - buf = so->btso_curbuf; - - do - { - /* step one tuple in the appropriate direction */ - if (!_bt_step(scan, &buf, dir)) - return ((RetrieveIndexResult) NULL); - - /* by here, current is the tuple we want to return */ - offnum = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); - btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); - itup = &btitem->bti_itup; - - if ( _bt_checkkeys (scan, itup, &keysok) ) - { - Assert (keysok == so->numberOfKeys); - res = FormRetrieveIndexResult(current, &(itup->t_tid)); - - /* remember which buffer we have pinned and locked */ - so->btso_curbuf = buf; - return (res); - } + Relation rel; + Buffer buf; + Page page; + OffsetNumber offnum; + RetrieveIndexResult res; + ItemPointer current; + BTItem btitem; + IndexTuple itup; + BTScanOpaque so; + Size keysok; + + rel = scan->relation; + so = (BTScanOpaque) scan->opaque; + current = &(scan->currentItemData); + + /* + * XXX 10 may 91: somewhere there's a bug in our management of the + * cached buffer for this scan. wei discovered it. the following is + * a workaround so he can work until i figure out what's going on. + */ + + if (!BufferIsValid(so->btso_curbuf)) + so->btso_curbuf = _bt_getbuf(rel, ItemPointerGetBlockNumber(current), + BT_READ); + + /* we still have the buffer pinned and locked */ + buf = so->btso_curbuf; + + do + { + /* step one tuple in the appropriate direction */ + if (!_bt_step(scan, &buf, dir)) + return ((RetrieveIndexResult) NULL); - } while ( keysok >= so->numberOfFirstKeys ); + /* by here, current is the tuple we want to return */ + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &btitem->bti_itup; + + if (_bt_checkkeys(scan, itup, &keysok)) + { + Assert(keysok == so->numberOfKeys); + res = FormRetrieveIndexResult(current, &(itup->t_tid)); + + /* remember which buffer we have pinned and locked */ + so->btso_curbuf = buf; + return (res); + } + + } while (keysok >= so->numberOfFirstKeys); - ItemPointerSetInvalid(current); - so->btso_curbuf = InvalidBuffer; - _bt_relbuf(rel, buf, BT_READ); - - return ((RetrieveIndexResult) NULL); + ItemPointerSetInvalid(current); + so->btso_curbuf = InvalidBuffer; + _bt_relbuf(rel, buf, BT_READ); + + return ((RetrieveIndexResult) NULL); } /* - * _bt_first() -- Find the first item in a scan. + * _bt_first() -- Find the first item in a scan. * - * We need to be clever about the type of scan, the operation it's - * performing, and the tree ordering. We return the RetrieveIndexResult - * of the first item in the tree that satisfies the qualification - * associated with the scan descriptor. On exit, the page containing - * the current index tuple is read locked and pinned, and the scan's - * opaque data entry is updated to include the buffer. + * We need to be clever about the type of scan, the operation it's + * performing, and the tree ordering. We return the RetrieveIndexResult + * of the first item in the tree that satisfies the qualification + * associated with the scan descriptor. On exit, the page containing + * the current index tuple is read locked and pinned, and the scan's + * opaque data entry is updated to include the buffer. */ RetrieveIndexResult _bt_first(IndexScanDesc scan, ScanDirection dir) { - Relation rel; - TupleDesc itupdesc; - Buffer buf; - Page page; - BTPageOpaque pop; - BTStack stack; - OffsetNumber offnum, maxoff; - bool offGmax = false; - BTItem btitem; - IndexTuple itup; - ItemPointer current; - BlockNumber blkno; - StrategyNumber strat; - RetrieveIndexResult res; - RegProcedure proc; - int result; - BTScanOpaque so; - ScanKeyData skdata; - Size keysok; - - rel = scan->relation; - so = (BTScanOpaque) scan->opaque; - - /* - * Order the keys in the qualification and be sure - * that the scan exploits the tree order. - */ - so->numberOfFirstKeys = 0; /* may be changed by _bt_orderkeys */ - so->qual_ok = 1; /* may be changed by _bt_orderkeys */ - scan->scanFromEnd = false; - if ( so->numberOfKeys > 0 ) - { - _bt_orderkeys(rel, so); - - strat = _bt_getstrat(rel, 1, so->keyData[0].sk_procedure); + Relation rel; + TupleDesc itupdesc; + Buffer buf; + Page page; + BTPageOpaque pop; + BTStack stack; + OffsetNumber offnum, + maxoff; + bool offGmax = false; + BTItem btitem; + IndexTuple itup; + ItemPointer current; + BlockNumber blkno; + StrategyNumber strat; + RetrieveIndexResult res; + RegProcedure proc; + int result; + BTScanOpaque so; + ScanKeyData skdata; + Size keysok; - /* NOTE: it assumes ForwardScanDirection */ - if ( strat == BTLessStrategyNumber || - strat == BTLessEqualStrategyNumber ) - scan->scanFromEnd = true; - } - else - scan->scanFromEnd = true; - - if ( so->qual_ok == 0 ) - return ((RetrieveIndexResult) NULL); - - /* if we just need to walk down one edge of the tree, do that */ - if (scan->scanFromEnd) - return (_bt_endpoint(scan, dir)); - - itupdesc = RelationGetTupleDescriptor(rel); - current = &(scan->currentItemData); - - /* - * Okay, we want something more complicated. What we'll do is use - * the first item in the scan key passed in (which has been correctly - * ordered to take advantage of index ordering) to position ourselves - * at the right place in the scan. - */ - /* _bt_orderkeys disallows it, but it's place to add some code latter */ - if ( so->keyData[0].sk_flags & SK_ISNULL ) - { - elog (WARN, "_bt_first: btree doesn't support is(not)null, yet"); - return ((RetrieveIndexResult) NULL); - } - proc = index_getprocid(rel, 1, BTORDER_PROC); - ScanKeyEntryInitialize(&skdata, so->keyData[0].sk_flags, 1, proc, - so->keyData[0].sk_argument); - - stack = _bt_search(rel, 1, &skdata, &buf); - _bt_freestack(stack); - - blkno = BufferGetBlockNumber(buf); - page = BufferGetPage(buf); - - /* - * This will happen if the tree we're searching is entirely empty, - * or if we're doing a search for a key that would appear on an - * entirely empty internal page. In either case, there are no - * matching tuples in the index. - */ - - if (PageIsEmpty(page)) { - ItemPointerSetInvalid(current); - so->btso_curbuf = InvalidBuffer; - _bt_relbuf(rel, buf, BT_READ); - return ((RetrieveIndexResult) NULL); - } - maxoff = PageGetMaxOffsetNumber(page); - pop = (BTPageOpaque) PageGetSpecialPointer(page); - - /* - * Now _bt_moveright doesn't move from non-rightmost leaf page - * if scankey == hikey and there is only hikey there. It's - * good for insertion, but we need to do work for scan here. - * - vadim 05/27/97 - */ - - while ( maxoff == P_HIKEY && !P_RIGHTMOST(pop) && - _bt_skeycmp(rel, 1, &skdata, page, - PageGetItemId(page, P_HIKEY), - BTGreaterEqualStrategyNumber) ) - { - /* step right one page */ - blkno = pop->btpo_next; - _bt_relbuf(rel, buf, BT_READ); - buf = _bt_getbuf(rel, blkno, BT_READ); + rel = scan->relation; + so = (BTScanOpaque) scan->opaque; + + /* + * Order the keys in the qualification and be sure that the scan + * exploits the tree order. + */ + so->numberOfFirstKeys = 0; /* may be changed by _bt_orderkeys */ + so->qual_ok = 1; /* may be changed by _bt_orderkeys */ + scan->scanFromEnd = false; + if (so->numberOfKeys > 0) + { + _bt_orderkeys(rel, so); + + strat = _bt_getstrat(rel, 1, so->keyData[0].sk_procedure); + + /* NOTE: it assumes ForwardScanDirection */ + if (strat == BTLessStrategyNumber || + strat == BTLessEqualStrategyNumber) + scan->scanFromEnd = true; + } + else + scan->scanFromEnd = true; + + if (so->qual_ok == 0) + return ((RetrieveIndexResult) NULL); + + /* if we just need to walk down one edge of the tree, do that */ + if (scan->scanFromEnd) + return (_bt_endpoint(scan, dir)); + + itupdesc = RelationGetTupleDescriptor(rel); + current = &(scan->currentItemData); + + /* + * Okay, we want something more complicated. What we'll do is use the + * first item in the scan key passed in (which has been correctly + * ordered to take advantage of index ordering) to position ourselves + * at the right place in the scan. + */ + /* _bt_orderkeys disallows it, but it's place to add some code latter */ + if (so->keyData[0].sk_flags & SK_ISNULL) + { + elog(WARN, "_bt_first: btree doesn't support is(not)null, yet"); + return ((RetrieveIndexResult) NULL); + } + proc = index_getprocid(rel, 1, BTORDER_PROC); + ScanKeyEntryInitialize(&skdata, so->keyData[0].sk_flags, 1, proc, + so->keyData[0].sk_argument); + + stack = _bt_search(rel, 1, &skdata, &buf); + _bt_freestack(stack); + + blkno = BufferGetBlockNumber(buf); page = BufferGetPage(buf); - if (PageIsEmpty(page)) { - ItemPointerSetInvalid(current); - so->btso_curbuf = InvalidBuffer; - _bt_relbuf(rel, buf, BT_READ); - return ((RetrieveIndexResult) NULL); + + /* + * This will happen if the tree we're searching is entirely empty, or + * if we're doing a search for a key that would appear on an entirely + * empty internal page. In either case, there are no matching tuples + * in the index. + */ + + if (PageIsEmpty(page)) + { + ItemPointerSetInvalid(current); + so->btso_curbuf = InvalidBuffer; + _bt_relbuf(rel, buf, BT_READ); + return ((RetrieveIndexResult) NULL); } - maxoff = PageGetMaxOffsetNumber(page); + maxoff = PageGetMaxOffsetNumber(page); pop = (BTPageOpaque) PageGetSpecialPointer(page); - } - - - /* find the nearest match to the manufactured scan key on the page */ - offnum = _bt_binsrch(rel, buf, 1, &skdata, BT_DESCENT); - - if (offnum > maxoff) - { - offnum = maxoff; - offGmax = true; - } - - ItemPointerSet(current, blkno, offnum); - - /* - * Now find the right place to start the scan. Result is the - * value we're looking for minus the value we're looking at - * in the index. - */ - - result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum); - - /* it's yet other place to add some code latter for is(not)null */ - - strat = _bt_getstrat(rel, 1, so->keyData[0].sk_procedure); - - switch (strat) { - case BTLessStrategyNumber: - if (result <= 0) { - do { - if (!_bt_twostep(scan, &buf, BackwardScanDirection)) - break; - - offnum = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); - result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum); - } while (result <= 0); - - /* if this is true, the key we just looked at is gone */ - if (result > 0) - _bt_twostep(scan, &buf, ForwardScanDirection); - } - break; - - case BTLessEqualStrategyNumber: - if (result >= 0) { - do { - if (!_bt_twostep(scan, &buf, ForwardScanDirection)) - break; - - offnum = ItemPointerGetOffsetNumber(current); + + /* + * Now _bt_moveright doesn't move from non-rightmost leaf page if + * scankey == hikey and there is only hikey there. It's good for + * insertion, but we need to do work for scan here. - vadim 05/27/97 + */ + + while (maxoff == P_HIKEY && !P_RIGHTMOST(pop) && + _bt_skeycmp(rel, 1, &skdata, page, + PageGetItemId(page, P_HIKEY), + BTGreaterEqualStrategyNumber)) + { + /* step right one page */ + blkno = pop->btpo_next; + _bt_relbuf(rel, buf, BT_READ); + buf = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(buf); - result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum); - } while (result >= 0); - - if (result < 0) - _bt_twostep(scan, &buf, BackwardScanDirection); + if (PageIsEmpty(page)) + { + ItemPointerSetInvalid(current); + so->btso_curbuf = InvalidBuffer; + _bt_relbuf(rel, buf, BT_READ); + return ((RetrieveIndexResult) NULL); + } + maxoff = PageGetMaxOffsetNumber(page); + pop = (BTPageOpaque) PageGetSpecialPointer(page); } - break; - - case BTEqualStrategyNumber: - if (result != 0) { - _bt_relbuf(scan->relation, buf, BT_READ); - so->btso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(&(scan->currentItemData)); - return ((RetrieveIndexResult) NULL); + + + /* find the nearest match to the manufactured scan key on the page */ + offnum = _bt_binsrch(rel, buf, 1, &skdata, BT_DESCENT); + + if (offnum > maxoff) + { + offnum = maxoff; + offGmax = true; } - break; - - case BTGreaterEqualStrategyNumber: - if ( offGmax ) + + ItemPointerSet(current, blkno, offnum); + + /* + * Now find the right place to start the scan. Result is the value + * we're looking for minus the value we're looking at in the index. + */ + + result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum); + + /* it's yet other place to add some code latter for is(not)null */ + + strat = _bt_getstrat(rel, 1, so->keyData[0].sk_procedure); + + switch (strat) { - if (result < 0) - { - Assert ( !P_RIGHTMOST(pop) && maxoff == P_HIKEY ); - if ( !_bt_step(scan, &buf, ForwardScanDirection) ) - { - _bt_relbuf(scan->relation, buf, BT_READ); - so->btso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(&(scan->currentItemData)); - return ((RetrieveIndexResult) NULL); + case BTLessStrategyNumber: + if (result <= 0) + { + do + { + if (!_bt_twostep(scan, &buf, BackwardScanDirection)) + break; + + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum); + } while (result <= 0); + + /* if this is true, the key we just looked at is gone */ + if (result > 0) + _bt_twostep(scan, &buf, ForwardScanDirection); } - } - else if (result > 0) - { /* - * Just remember: _bt_binsrch() returns the OffsetNumber of - * the first matching key on the page, or the OffsetNumber at - * which the matching key WOULD APPEAR IF IT WERE on this page. - * No key on this page, but offnum from _bt_binsrch() greater - * maxoff - have to move right. - vadim 12/06/96 - */ - _bt_twostep(scan, &buf, ForwardScanDirection); - } + break; + + case BTLessEqualStrategyNumber: + if (result >= 0) + { + do + { + if (!_bt_twostep(scan, &buf, ForwardScanDirection)) + break; + + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum); + } while (result >= 0); + + if (result < 0) + _bt_twostep(scan, &buf, BackwardScanDirection); + } + break; + + case BTEqualStrategyNumber: + if (result != 0) + { + _bt_relbuf(scan->relation, buf, BT_READ); + so->btso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(&(scan->currentItemData)); + return ((RetrieveIndexResult) NULL); + } + break; + + case BTGreaterEqualStrategyNumber: + if (offGmax) + { + if (result < 0) + { + Assert(!P_RIGHTMOST(pop) && maxoff == P_HIKEY); + if (!_bt_step(scan, &buf, ForwardScanDirection)) + { + _bt_relbuf(scan->relation, buf, BT_READ); + so->btso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(&(scan->currentItemData)); + return ((RetrieveIndexResult) NULL); + } + } + else if (result > 0) + { /* Just remember: _bt_binsrch() returns + * the OffsetNumber of the first matching + * key on the page, or the OffsetNumber at + * which the matching key WOULD APPEAR IF + * IT WERE on this page. No key on this + * page, but offnum from _bt_binsrch() + * greater maxoff - have to move right. - + * vadim 12/06/96 */ + _bt_twostep(scan, &buf, ForwardScanDirection); + } + } + else if (result < 0) + { + do + { + if (!_bt_twostep(scan, &buf, BackwardScanDirection)) + break; + + page = BufferGetPage(buf); + offnum = ItemPointerGetOffsetNumber(current); + result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum); + } while (result < 0); + + if (result > 0) + _bt_twostep(scan, &buf, ForwardScanDirection); + } + break; + + case BTGreaterStrategyNumber: + /* offGmax helps as above */ + if (result >= 0 || offGmax) + { + do + { + if (!_bt_twostep(scan, &buf, ForwardScanDirection)) + break; + + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum); + } while (result >= 0); + } + break; } - else if (result < 0) + + /* okay, current item pointer for the scan is right */ + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &btitem->bti_itup; + + if (_bt_checkkeys(scan, itup, &keysok)) { - do { - if (!_bt_twostep(scan, &buf, BackwardScanDirection)) - break; - - page = BufferGetPage(buf); - offnum = ItemPointerGetOffsetNumber(current); - result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum); - } while (result < 0); - - if (result > 0) - _bt_twostep(scan, &buf, ForwardScanDirection); + res = FormRetrieveIndexResult(current, &(itup->t_tid)); + + /* remember which buffer we have pinned */ + so->btso_curbuf = buf; } - break; - - case BTGreaterStrategyNumber: - /* offGmax helps as above */ - if (result >= 0 || offGmax) { - do { - if (!_bt_twostep(scan, &buf, ForwardScanDirection)) - break; - - offnum = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); - result = _bt_compare(rel, itupdesc, page, 1, &skdata, offnum); - } while (result >= 0); + else if (keysok >= so->numberOfFirstKeys) + { + so->btso_curbuf = buf; + return (_bt_next(scan, dir)); } - break; - } - - /* okay, current item pointer for the scan is right */ - offnum = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); - btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); - itup = &btitem->bti_itup; - - if ( _bt_checkkeys (scan, itup, &keysok) ) - { - res = FormRetrieveIndexResult(current, &(itup->t_tid)); - - /* remember which buffer we have pinned */ - so->btso_curbuf = buf; - } - else if ( keysok >= so->numberOfFirstKeys ) - { - so->btso_curbuf = buf; - return (_bt_next (scan, dir)); - } - else - { - ItemPointerSetInvalid(current); - so->btso_curbuf = InvalidBuffer; - _bt_relbuf(rel, buf, BT_READ); - res = (RetrieveIndexResult) NULL; - } - - return (res); + else + { + ItemPointerSetInvalid(current); + so->btso_curbuf = InvalidBuffer; + _bt_relbuf(rel, buf, BT_READ); + res = (RetrieveIndexResult) NULL; + } + + return (res); } /* - * _bt_step() -- Step one item in the requested direction in a scan on - * the tree. + * _bt_step() -- Step one item in the requested direction in a scan on + * the tree. * - * If no adjacent record exists in the requested direction, return - * false. Else, return true and set the currentItemData for the - * scan to the right thing. + * If no adjacent record exists in the requested direction, return + * false. Else, return true and set the currentItemData for the + * scan to the right thing. */ bool -_bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) +_bt_step(IndexScanDesc scan, Buffer * bufP, ScanDirection dir) { - Page page; - BTPageOpaque opaque; - OffsetNumber offnum, maxoff; - OffsetNumber start; - BlockNumber blkno; - BlockNumber obknum; - BTScanOpaque so; - ItemPointer current; - Relation rel; - - rel = scan->relation; - current = &(scan->currentItemData); - offnum = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(*bufP); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - so = (BTScanOpaque) scan->opaque; - maxoff = PageGetMaxOffsetNumber(page); - - /* get the next tuple */ - if (ScanDirectionIsForward(dir)) { - if (!PageIsEmpty(page) && offnum < maxoff) { - offnum = OffsetNumberNext(offnum); - } else { - - /* if we're at end of scan, release the buffer and return */ - blkno = opaque->btpo_next; - if (P_RIGHTMOST(opaque)) { - _bt_relbuf(rel, *bufP, BT_READ); - ItemPointerSetInvalid(current); - *bufP = so->btso_curbuf = InvalidBuffer; - return (false); - } else { - - /* walk right to the next page with data */ - _bt_relbuf(rel, *bufP, BT_READ); - for (;;) { - *bufP = _bt_getbuf(rel, blkno, BT_READ); - page = BufferGetPage(*bufP); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - maxoff = PageGetMaxOffsetNumber(page); - start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - - if (!PageIsEmpty(page) && start <= maxoff) { - break; - } else { + Page page; + BTPageOpaque opaque; + OffsetNumber offnum, + maxoff; + OffsetNumber start; + BlockNumber blkno; + BlockNumber obknum; + BTScanOpaque so; + ItemPointer current; + Relation rel; + + rel = scan->relation; + current = &(scan->currentItemData); + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(*bufP); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + so = (BTScanOpaque) scan->opaque; + maxoff = PageGetMaxOffsetNumber(page); + + /* get the next tuple */ + if (ScanDirectionIsForward(dir)) + { + if (!PageIsEmpty(page) && offnum < maxoff) + { + offnum = OffsetNumberNext(offnum); + } + else + { + + /* if we're at end of scan, release the buffer and return */ blkno = opaque->btpo_next; - _bt_relbuf(rel, *bufP, BT_READ); - if (blkno == P_NONE) { - *bufP = so->btso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(current); - return (false); + if (P_RIGHTMOST(opaque)) + { + _bt_relbuf(rel, *bufP, BT_READ); + ItemPointerSetInvalid(current); + *bufP = so->btso_curbuf = InvalidBuffer; + return (false); + } + else + { + + /* walk right to the next page with data */ + _bt_relbuf(rel, *bufP, BT_READ); + for (;;) + { + *bufP = _bt_getbuf(rel, blkno, BT_READ); + page = BufferGetPage(*bufP); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + maxoff = PageGetMaxOffsetNumber(page); + start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + + if (!PageIsEmpty(page) && start <= maxoff) + { + break; + } + else + { + blkno = opaque->btpo_next; + _bt_relbuf(rel, *bufP, BT_READ); + if (blkno == P_NONE) + { + *bufP = so->btso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(current); + return (false); + } + } + } + offnum = start; } - } } - offnum = start; - } } - } else if (ScanDirectionIsBackward(dir)) { - - /* remember that high key is item zero on non-rightmost pages */ - start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + else if (ScanDirectionIsBackward(dir)) + { - if (offnum > start) { - offnum = OffsetNumberPrev(offnum); - } else { - - /* if we're at end of scan, release the buffer and return */ - blkno = opaque->btpo_prev; - if (P_LEFTMOST(opaque)) { - _bt_relbuf(rel, *bufP, BT_READ); - *bufP = so->btso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(current); - return (false); - } else { - - obknum = BufferGetBlockNumber(*bufP); - - /* walk right to the next page with data */ - _bt_relbuf(rel, *bufP, BT_READ); - for (;;) { - *bufP = _bt_getbuf(rel, blkno, BT_READ); - page = BufferGetPage(*bufP); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - maxoff = PageGetMaxOffsetNumber(page); - - /* - * If the adjacent page just split, then we may have the - * wrong block. Handle this case. Because pages only - * split right, we don't have to worry about this failing - * to terminate. - */ - - while (opaque->btpo_next != obknum) { - blkno = opaque->btpo_next; - _bt_relbuf(rel, *bufP, BT_READ); - *bufP = _bt_getbuf(rel, blkno, BT_READ); - page = BufferGetPage(*bufP); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - maxoff = PageGetMaxOffsetNumber(page); - } - - /* don't consider the high key */ - start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - - /* anything to look at here? */ - if (!PageIsEmpty(page) && maxoff >= start) { - break; - } else { + /* remember that high key is item zero on non-rightmost pages */ + start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + + if (offnum > start) + { + offnum = OffsetNumberPrev(offnum); + } + else + { + + /* if we're at end of scan, release the buffer and return */ blkno = opaque->btpo_prev; - obknum = BufferGetBlockNumber(*bufP); - _bt_relbuf(rel, *bufP, BT_READ); - if (blkno == P_NONE) { - *bufP = so->btso_curbuf = InvalidBuffer; - ItemPointerSetInvalid(current); - return (false); + if (P_LEFTMOST(opaque)) + { + _bt_relbuf(rel, *bufP, BT_READ); + *bufP = so->btso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(current); + return (false); + } + else + { + + obknum = BufferGetBlockNumber(*bufP); + + /* walk right to the next page with data */ + _bt_relbuf(rel, *bufP, BT_READ); + for (;;) + { + *bufP = _bt_getbuf(rel, blkno, BT_READ); + page = BufferGetPage(*bufP); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + maxoff = PageGetMaxOffsetNumber(page); + + /* + * If the adjacent page just split, then we may have + * the wrong block. Handle this case. Because pages + * only split right, we don't have to worry about this + * failing to terminate. + */ + + while (opaque->btpo_next != obknum) + { + blkno = opaque->btpo_next; + _bt_relbuf(rel, *bufP, BT_READ); + *bufP = _bt_getbuf(rel, blkno, BT_READ); + page = BufferGetPage(*bufP); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + maxoff = PageGetMaxOffsetNumber(page); + } + + /* don't consider the high key */ + start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + + /* anything to look at here? */ + if (!PageIsEmpty(page) && maxoff >= start) + { + break; + } + else + { + blkno = opaque->btpo_prev; + obknum = BufferGetBlockNumber(*bufP); + _bt_relbuf(rel, *bufP, BT_READ); + if (blkno == P_NONE) + { + *bufP = so->btso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(current); + return (false); + } + } + } + offnum = maxoff;/* XXX PageIsEmpty? */ } - } } - offnum = maxoff; /* XXX PageIsEmpty? */ - } } - } - blkno = BufferGetBlockNumber(*bufP); - so->btso_curbuf = *bufP; - ItemPointerSet(current, blkno, offnum); - - return (true); + blkno = BufferGetBlockNumber(*bufP); + so->btso_curbuf = *bufP; + ItemPointerSet(current, blkno, offnum); + + return (true); } /* - * _bt_twostep() -- Move to an adjacent record in a scan on the tree, - * if an adjacent record exists. + * _bt_twostep() -- Move to an adjacent record in a scan on the tree, + * if an adjacent record exists. * - * This is like _bt_step, except that if no adjacent record exists - * it restores us to where we were before trying the step. This is - * only hairy when you cross page boundaries, since the page you cross - * from could have records inserted or deleted, or could even split. - * This is unlikely, but we try to handle it correctly here anyway. + * This is like _bt_step, except that if no adjacent record exists + * it restores us to where we were before trying the step. This is + * only hairy when you cross page boundaries, since the page you cross + * from could have records inserted or deleted, or could even split. + * This is unlikely, but we try to handle it correctly here anyway. * - * This routine contains the only case in which our changes to Lehman - * and Yao's algorithm. + * This routine contains the only case in which our changes to Lehman + * and Yao's algorithm. * - * Like step, this routine leaves the scan's currentItemData in the - * proper state and acquires a lock and pin on *bufP. If the twostep - * succeeded, we return true; otherwise, we return false. + * Like step, this routine leaves the scan's currentItemData in the + * proper state and acquires a lock and pin on *bufP. If the twostep + * succeeded, we return true; otherwise, we return false. */ -static bool -_bt_twostep(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) +static bool +_bt_twostep(IndexScanDesc scan, Buffer * bufP, ScanDirection dir) { - Page page; - BTPageOpaque opaque; - OffsetNumber offnum, maxoff; - OffsetNumber start; - ItemPointer current; - ItemId itemid; - int itemsz; - BTItem btitem; - BTItem svitem; - BlockNumber blkno; - - blkno = BufferGetBlockNumber(*bufP); - page = BufferGetPage(*bufP); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - maxoff = PageGetMaxOffsetNumber(page); - current = &(scan->currentItemData); - offnum = ItemPointerGetOffsetNumber(current); - - start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - - /* if we're safe, just do it */ - if (ScanDirectionIsForward(dir) && offnum < maxoff) { /* XXX PageIsEmpty? */ - ItemPointerSet(current, blkno, OffsetNumberNext(offnum)); - return (true); - } else if (ScanDirectionIsBackward(dir) && offnum > start) { - ItemPointerSet(current, blkno, OffsetNumberPrev(offnum)); - return (true); - } - - /* if we've hit end of scan we don't have to do any work */ - if (ScanDirectionIsForward(dir) && P_RIGHTMOST(opaque)) { - return (false); - } else if (ScanDirectionIsBackward(dir) && P_LEFTMOST(opaque)) { - return (false); - } - - /* - * Okay, it's off the page; let _bt_step() do the hard work, and we'll - * try to remember where we were. This is not guaranteed to work; this - * is the only place in the code where concurrency can screw us up, - * and it's because we want to be able to move in two directions in - * the scan. - */ - - itemid = PageGetItemId(page, offnum); - itemsz = ItemIdGetLength(itemid); - btitem = (BTItem) PageGetItem(page, itemid); - svitem = (BTItem) palloc(itemsz); - memmove((char *) svitem, (char *) btitem, itemsz); - - if (_bt_step(scan, bufP, dir)) { - pfree(svitem); - return (true); - } - - /* try to find our place again */ - *bufP = _bt_getbuf(scan->relation, blkno, BT_READ); - page = BufferGetPage(*bufP); - maxoff = PageGetMaxOffsetNumber(page); - - while (offnum <= maxoff) { + Page page; + BTPageOpaque opaque; + OffsetNumber offnum, + maxoff; + OffsetNumber start; + ItemPointer current; + ItemId itemid; + int itemsz; + BTItem btitem; + BTItem svitem; + BlockNumber blkno; + + blkno = BufferGetBlockNumber(*bufP); + page = BufferGetPage(*bufP); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + maxoff = PageGetMaxOffsetNumber(page); + current = &(scan->currentItemData); + offnum = ItemPointerGetOffsetNumber(current); + + start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + + /* if we're safe, just do it */ + if (ScanDirectionIsForward(dir) && offnum < maxoff) + { /* XXX PageIsEmpty? */ + ItemPointerSet(current, blkno, OffsetNumberNext(offnum)); + return (true); + } + else if (ScanDirectionIsBackward(dir) && offnum > start) + { + ItemPointerSet(current, blkno, OffsetNumberPrev(offnum)); + return (true); + } + + /* if we've hit end of scan we don't have to do any work */ + if (ScanDirectionIsForward(dir) && P_RIGHTMOST(opaque)) + { + return (false); + } + else if (ScanDirectionIsBackward(dir) && P_LEFTMOST(opaque)) + { + return (false); + } + + /* + * Okay, it's off the page; let _bt_step() do the hard work, and we'll + * try to remember where we were. This is not guaranteed to work; + * this is the only place in the code where concurrency can screw us + * up, and it's because we want to be able to move in two directions + * in the scan. + */ + itemid = PageGetItemId(page, offnum); + itemsz = ItemIdGetLength(itemid); btitem = (BTItem) PageGetItem(page, itemid); - if ( BTItemSame (btitem, svitem) ) { - pfree(svitem); - ItemPointerSet(current, blkno, offnum); - return (false); + svitem = (BTItem) palloc(itemsz); + memmove((char *) svitem, (char *) btitem, itemsz); + + if (_bt_step(scan, bufP, dir)) + { + pfree(svitem); + return (true); + } + + /* try to find our place again */ + *bufP = _bt_getbuf(scan->relation, blkno, BT_READ); + page = BufferGetPage(*bufP); + maxoff = PageGetMaxOffsetNumber(page); + + while (offnum <= maxoff) + { + itemid = PageGetItemId(page, offnum); + btitem = (BTItem) PageGetItem(page, itemid); + if (BTItemSame(btitem, svitem)) + { + pfree(svitem); + ItemPointerSet(current, blkno, offnum); + return (false); + } } - } - - /* - * XXX crash and burn -- can't find our place. We can be a little - * smarter -- walk to the next page to the right, for example, since - * that's the only direction that splits happen in. Deletions screw - * us up less often since they're only done by the vacuum daemon. - */ - - elog(WARN, "btree synchronization error: concurrent update botched scan"); - - return (false); + + /* + * XXX crash and burn -- can't find our place. We can be a little + * smarter -- walk to the next page to the right, for example, since + * that's the only direction that splits happen in. Deletions screw + * us up less often since they're only done by the vacuum daemon. + */ + + elog(WARN, "btree synchronization error: concurrent update botched scan"); + + return (false); } /* - * _bt_endpoint() -- Find the first or last key in the index. + * _bt_endpoint() -- Find the first or last key in the index. */ -static RetrieveIndexResult +static RetrieveIndexResult _bt_endpoint(IndexScanDesc scan, ScanDirection dir) { - Relation rel; - Buffer buf; - Page page; - BTPageOpaque opaque; - ItemPointer current; - OffsetNumber offnum, maxoff; - OffsetNumber start = 0; - BlockNumber blkno; - BTItem btitem; - IndexTuple itup; - BTScanOpaque so; - RetrieveIndexResult res; - Size keysok; - - rel = scan->relation; - current = &(scan->currentItemData); - so = (BTScanOpaque) scan->opaque; - - buf = _bt_getroot(rel, BT_READ); - blkno = BufferGetBlockNumber(buf); - page = BufferGetPage(buf); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - - for (;;) { - if (opaque->btpo_flags & BTP_LEAF) - break; - - if (ScanDirectionIsForward(dir)) { - offnum = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - } else { - offnum = PageGetMaxOffsetNumber(page); - } - - btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); - itup = &(btitem->bti_itup); - - blkno = ItemPointerGetBlockNumber(&(itup->t_tid)); - - _bt_relbuf(rel, buf, BT_READ); - buf = _bt_getbuf(rel, blkno, BT_READ); + Relation rel; + Buffer buf; + Page page; + BTPageOpaque opaque; + ItemPointer current; + OffsetNumber offnum, + maxoff; + OffsetNumber start = 0; + BlockNumber blkno; + BTItem btitem; + IndexTuple itup; + BTScanOpaque so; + RetrieveIndexResult res; + Size keysok; + + rel = scan->relation; + current = &(scan->currentItemData); + so = (BTScanOpaque) scan->opaque; + + buf = _bt_getroot(rel, BT_READ); + blkno = BufferGetBlockNumber(buf); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); - - /* - * Race condition: If the child page we just stepped onto is - * in the process of being split, we need to make sure we're - * all the way at the right edge of the tree. See the paper - * by Lehman and Yao. - */ - - if (ScanDirectionIsBackward(dir) && ! P_RIGHTMOST(opaque)) { - do { - blkno = opaque->btpo_next; + + for (;;) + { + if (opaque->btpo_flags & BTP_LEAF) + break; + + if (ScanDirectionIsForward(dir)) + { + offnum = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + } + else + { + offnum = PageGetMaxOffsetNumber(page); + } + + btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &(btitem->bti_itup); + + blkno = ItemPointerGetBlockNumber(&(itup->t_tid)); + _bt_relbuf(rel, buf, BT_READ); buf = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); - } while (! P_RIGHTMOST(opaque)); + + /* + * Race condition: If the child page we just stepped onto is in + * the process of being split, we need to make sure we're all the + * way at the right edge of the tree. See the paper by Lehman and + * Yao. + */ + + if (ScanDirectionIsBackward(dir) && !P_RIGHTMOST(opaque)) + { + do + { + blkno = opaque->btpo_next; + _bt_relbuf(rel, buf, BT_READ); + buf = _bt_getbuf(rel, blkno, BT_READ); + page = BufferGetPage(buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + } while (!P_RIGHTMOST(opaque)); + } } - } - - /* okay, we've got the {left,right}-most page in the tree */ - maxoff = PageGetMaxOffsetNumber(page); - - if (ScanDirectionIsForward(dir)) { - if ( !P_LEFTMOST(opaque) ) /* non-leftmost page ? */ - elog (WARN, "_bt_endpoint: leftmost page (%u) has not leftmost flag", blkno); - start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; - /* - * I don't understand this stuff! It doesn't work for non-rightmost - * pages with only one element (P_HIKEY) which we have after - * deletion itups by vacuum (it's case of start > maxoff). - * Scanning in BackwardScanDirection is not understandable at all. - * Well - new stuff. - vadim 12/06/96 - */ + + /* okay, we've got the {left,right}-most page in the tree */ + maxoff = PageGetMaxOffsetNumber(page); + + if (ScanDirectionIsForward(dir)) + { + if (!P_LEFTMOST(opaque))/* non-leftmost page ? */ + elog(WARN, "_bt_endpoint: leftmost page (%u) has not leftmost flag", blkno); + start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY; + + /* + * I don't understand this stuff! It doesn't work for + * non-rightmost pages with only one element (P_HIKEY) which we + * have after deletion itups by vacuum (it's case of start > + * maxoff). Scanning in BackwardScanDirection is not + * understandable at all. Well - new stuff. - vadim 12/06/96 + */ #if 0 - if (PageIsEmpty(page) || start > maxoff) { - ItemPointerSet(current, blkno, maxoff); - if (!_bt_step(scan, &buf, BackwardScanDirection)) - return ((RetrieveIndexResult) NULL); - - start = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); - } + if (PageIsEmpty(page) || start > maxoff) + { + ItemPointerSet(current, blkno, maxoff); + if (!_bt_step(scan, &buf, BackwardScanDirection)) + return ((RetrieveIndexResult) NULL); + + start = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + } #endif - if ( PageIsEmpty (page) ) + if (PageIsEmpty(page)) + { + if (start != P_HIKEY) /* non-rightmost page */ + elog(WARN, "_bt_endpoint: non-rightmost page (%u) is empty", blkno); + + /* + * It's left- & right- most page - root page, - and it's + * empty... + */ + _bt_relbuf(rel, buf, BT_READ); + ItemPointerSetInvalid(current); + so->btso_curbuf = InvalidBuffer; + return ((RetrieveIndexResult) NULL); + } + if (start > maxoff) /* start == 2 && maxoff == 1 */ + { + ItemPointerSet(current, blkno, maxoff); + if (!_bt_step(scan, &buf, ForwardScanDirection)) + return ((RetrieveIndexResult) NULL); + + start = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + } + /* new stuff ends here */ + else + { + ItemPointerSet(current, blkno, start); + } + } + else if (ScanDirectionIsBackward(dir)) { - if ( start != P_HIKEY ) /* non-rightmost page */ - elog (WARN, "_bt_endpoint: non-rightmost page (%u) is empty", blkno); - /* It's left- & right- most page - root page, - and it's empty... */ - _bt_relbuf(rel, buf, BT_READ); - ItemPointerSetInvalid(current); - so->btso_curbuf = InvalidBuffer; - return ((RetrieveIndexResult) NULL); + + /* + * I don't understand this stuff too! If RIGHT-most leaf page is + * empty why do scanning in ForwardScanDirection ??? Well - new + * stuff. - vadim 12/06/96 + */ +#if 0 + if (PageIsEmpty(page)) + { + ItemPointerSet(current, blkno, FirstOffsetNumber); + if (!_bt_step(scan, &buf, ForwardScanDirection)) + return ((RetrieveIndexResult) NULL); + + start = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + } +#endif + if (PageIsEmpty(page)) + { + /* If it's leftmost page too - it's empty root page... */ + if (P_LEFTMOST(opaque)) + { + _bt_relbuf(rel, buf, BT_READ); + ItemPointerSetInvalid(current); + so->btso_curbuf = InvalidBuffer; + return ((RetrieveIndexResult) NULL); + } + /* Go back ! */ + ItemPointerSet(current, blkno, FirstOffsetNumber); + if (!_bt_step(scan, &buf, BackwardScanDirection)) + return ((RetrieveIndexResult) NULL); + + start = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + } + /* new stuff ends here */ + else + { + start = PageGetMaxOffsetNumber(page); + ItemPointerSet(current, blkno, start); + } } - if ( start > maxoff ) /* start == 2 && maxoff == 1 */ + else { - ItemPointerSet(current, blkno, maxoff); - if (!_bt_step(scan, &buf, ForwardScanDirection)) - return ((RetrieveIndexResult) NULL); - - start = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); + elog(WARN, "Illegal scan direction %d", dir); } - /* new stuff ends here */ - else { - ItemPointerSet(current, blkno, start); + + btitem = (BTItem) PageGetItem(page, PageGetItemId(page, start)); + itup = &(btitem->bti_itup); + + /* see if we picked a winner */ + if (_bt_checkkeys(scan, itup, &keysok)) + { + res = FormRetrieveIndexResult(current, &(itup->t_tid)); + + /* remember which buffer we have pinned */ + so->btso_curbuf = buf; } - } else if (ScanDirectionIsBackward(dir)) { - /* - * I don't understand this stuff too! If RIGHT-most leaf page is - * empty why do scanning in ForwardScanDirection ??? - * Well - new stuff. - vadim 12/06/96 - */ -#if 0 - if (PageIsEmpty(page)) { - ItemPointerSet(current, blkno, FirstOffsetNumber); - if (!_bt_step(scan, &buf, ForwardScanDirection)) - return ((RetrieveIndexResult) NULL); - - start = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); + else if (keysok >= so->numberOfFirstKeys) + { + so->btso_curbuf = buf; + return (_bt_next(scan, dir)); } -#endif - if (PageIsEmpty(page)) + else { - /* If it's leftmost page too - it's empty root page... */ - if ( P_LEFTMOST(opaque) ) - { - _bt_relbuf(rel, buf, BT_READ); ItemPointerSetInvalid(current); so->btso_curbuf = InvalidBuffer; - return ((RetrieveIndexResult) NULL); - } - /* Go back ! */ - ItemPointerSet(current, blkno, FirstOffsetNumber); - if (!_bt_step(scan, &buf, BackwardScanDirection)) - return ((RetrieveIndexResult) NULL); - - start = ItemPointerGetOffsetNumber(current); - page = BufferGetPage(buf); - } - /* new stuff ends here */ - else { - start = PageGetMaxOffsetNumber(page); - ItemPointerSet(current, blkno, start); + _bt_relbuf(rel, buf, BT_READ); + res = (RetrieveIndexResult) NULL; } - } else { - elog(WARN, "Illegal scan direction %d", dir); - } - - btitem = (BTItem) PageGetItem(page, PageGetItemId(page, start)); - itup = &(btitem->bti_itup); - - /* see if we picked a winner */ - if ( _bt_checkkeys (scan, itup, &keysok) ) - { - res = FormRetrieveIndexResult(current, &(itup->t_tid)); - - /* remember which buffer we have pinned */ - so->btso_curbuf = buf; - } - else if ( keysok >= so->numberOfFirstKeys ) - { - so->btso_curbuf = buf; - return (_bt_next (scan, dir)); - } - else - { - ItemPointerSetInvalid(current); - so->btso_curbuf = InvalidBuffer; - _bt_relbuf(rel, buf, BT_READ); - res = (RetrieveIndexResult) NULL; - } - - return (res); + + return (res); } diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 8e054d24ab..09cb43769f 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -5,30 +5,30 @@ * * * IDENTIFICATION - * $Id: nbtsort.c,v 1.19 1997/08/19 21:29:46 momjian Exp $ + * $Id: nbtsort.c,v 1.20 1997/09/07 04:39:02 momjian Exp $ * * NOTES * * what we do is: * - generate a set of initial one-block runs, distributed round-robin - * between the output tapes. + * between the output tapes. * - for each pass, - * - swap input and output tape sets, rewinding both and truncating - * the output tapes. - * - merge the current run in each input tape to the current output - * tape. - * - when each input run has been exhausted, switch to another output - * tape and start processing another run. + * - swap input and output tape sets, rewinding both and truncating + * the output tapes. + * - merge the current run in each input tape to the current output + * tape. + * - when each input run has been exhausted, switch to another output + * tape and start processing another run. * - when we have fewer runs than tapes, we know we are ready to start - * merging into the btree leaf pages. (i.e., we do not have to wait - * until we have exactly one tape.) + * merging into the btree leaf pages. (i.e., we do not have to wait + * until we have exactly one tape.) * - as we extract tuples from the final runs, we build the pages for - * each level. when we have only one page on a level, it must be the - * root -- it can be attached to the btree metapage and we are done. + * each level. when we have only one page on a level, it must be the + * root -- it can be attached to the btree metapage and we are done. * * conventions: * - external interface routines take in and return "void *" for their - * opaque handles. this is for modularity reasons. + * opaque handles. this is for modularity reasons. * * this code is moderately slow (~10% slower) compared to the regular * btree (insertion) build code on sorted or well-clustered data. on @@ -58,20 +58,21 @@ #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif #ifdef BTREE_BUILD_STATS #include <tcop/tcopprot.h> -extern int ShowExecutorStats; +extern int ShowExecutorStats; + #endif -static BTItem _bt_buildadd(Relation index, void *pstate, BTItem bti, int flags); -static BTItem _bt_minitem(Page opage, BlockNumber oblkno, int atend); -static void *_bt_pagestate(Relation index, int flags, int level, bool doupper); -static void _bt_uppershutdown(Relation index, BTPageState *state); +static BTItem _bt_buildadd(Relation index, void *pstate, BTItem bti, int flags); +static BTItem _bt_minitem(Page opage, BlockNumber oblkno, int atend); +static void *_bt_pagestate(Relation index, int flags, int level, bool doupper); +static void _bt_uppershutdown(Relation index, BTPageState * state); /* * turn on debugging output. @@ -83,18 +84,18 @@ static void _bt_uppershutdown(Relation index, BTPageState *state); #define FASTBUILD_SPOOL #define FASTBUILD_MERGE -#define MAXTAPES (7) -#define TAPEBLCKSZ (MAXBLCKSZ << 2) -#define TAPETEMP "pg_btsortXXXXXX" +#define MAXTAPES (7) +#define TAPEBLCKSZ (MAXBLCKSZ << 2) +#define TAPETEMP "pg_btsortXXXXXX" -extern int NDirectFileRead; -extern int NDirectFileWrite; -extern char *mktemp(char *template); +extern int NDirectFileRead; +extern int NDirectFileWrite; +extern char *mktemp(char *template); /* - * this is what we use to shovel BTItems in and out of memory. it's + * this is what we use to shovel BTItems in and out of memory. it's * bigger than a standard block because we are doing a lot of strictly - * sequential i/o. this is obviously something of a tradeoff since we + * sequential i/o. this is obviously something of a tradeoff since we * are potentially reading a bunch of zeroes off of disk in many * cases. * @@ -104,14 +105,15 @@ extern char *mktemp(char *template); * the only thing like that so i'm not going to worry about wasting a * few bytes. */ -typedef struct { - int bttb_magic; /* magic number */ - int bttb_fd; /* file descriptor */ - int bttb_top; /* top of free space within bttb_data */ - short bttb_ntup; /* number of tuples in this block */ - short bttb_eor; /* End-Of-Run marker */ - char bttb_data[TAPEBLCKSZ - 2 * sizeof(double)]; -} BTTapeBlock; +typedef struct +{ + int bttb_magic; /* magic number */ + int bttb_fd; /* file descriptor */ + int bttb_top; /* top of free space within bttb_data */ + short bttb_ntup; /* number of tuples in this block */ + short bttb_eor; /* End-Of-Run marker */ + char bttb_data[TAPEBLCKSZ - 2 * sizeof(double)]; +} BTTapeBlock; /* * this structure holds the bookkeeping for a simple balanced multiway @@ -120,13 +122,14 @@ typedef struct { * right now. though if psort was in a condition that i could hack it * to do this, you bet i would.) */ -typedef struct { - int bts_ntapes; - int bts_tape; - BTTapeBlock **bts_itape; /* input tape blocks */ - BTTapeBlock **bts_otape; /* output tape blocks */ - bool isunique; -} BTSpool; +typedef struct +{ + int bts_ntapes; + int bts_tape; + BTTapeBlock **bts_itape; /* input tape blocks */ + BTTapeBlock **bts_otape; /* output tape blocks */ + bool isunique; +} BTSpool; /*------------------------------------------------------------------------- * sorting comparison routine - returns {-1,0,1} depending on whether @@ -146,101 +149,102 @@ typedef struct { * what the heck. * *------------------------------------------------------------------------- */ -typedef struct { - Datum *btsk_datum; - char *btsk_nulls; - BTItem btsk_item; -} BTSortKey; +typedef struct +{ + Datum *btsk_datum; + char *btsk_nulls; + BTItem btsk_item; +} BTSortKey; static Relation _bt_sortrel; -static int _bt_nattr; -static BTSpool * _bt_inspool; +static int _bt_nattr; +static BTSpool *_bt_inspool; static void -_bt_isortcmpinit(Relation index, BTSpool *spool) +_bt_isortcmpinit(Relation index, BTSpool * spool) { - _bt_sortrel = index; - _bt_inspool = spool; - _bt_nattr = index->rd_att->natts; + _bt_sortrel = index; + _bt_inspool = spool; + _bt_nattr = index->rd_att->natts; } static int -_bt_isortcmp(BTSortKey *k1, BTSortKey *k2) +_bt_isortcmp(BTSortKey * k1, BTSortKey * k2) { - Datum *k1_datum = k1->btsk_datum; - Datum *k2_datum = k2->btsk_datum; - char *k1_nulls = k1->btsk_nulls; - char *k2_nulls = k2->btsk_nulls; - bool equal_isnull = false; - int i; - - if (k1->btsk_item == (BTItem) NULL) - { - if (k2->btsk_item == (BTItem) NULL) - return(0); /* 1 = 2 */ - return(1); /* 1 > 2 */ - } - else if (k2->btsk_item == (BTItem) NULL) - return(-1); /* 1 < 2 */ - - for (i = 0; i < _bt_nattr; i++) - { - if ( k1_nulls[i] != ' ' ) /* k1 attr is NULL */ + Datum *k1_datum = k1->btsk_datum; + Datum *k2_datum = k2->btsk_datum; + char *k1_nulls = k1->btsk_nulls; + char *k2_nulls = k2->btsk_nulls; + bool equal_isnull = false; + int i; + + if (k1->btsk_item == (BTItem) NULL) { - if ( k2_nulls[i] != ' ' ) /* the same for k2 */ - { - equal_isnull = true; - continue; - } - return (1); /* NULL ">" NOT_NULL */ + if (k2->btsk_item == (BTItem) NULL) + return (0); /* 1 = 2 */ + return (1); /* 1 > 2 */ } - else if ( k2_nulls[i] != ' ' ) /* k2 attr is NULL */ - return (-1); /* NOT_NULL "<" NULL */ - - if (_bt_invokestrat(_bt_sortrel, i+1, BTGreaterStrategyNumber, - k1_datum[i], k2_datum[i])) - return(1); /* 1 > 2 */ - else if (_bt_invokestrat(_bt_sortrel, i+1, BTGreaterStrategyNumber, - k2_datum[i], k1_datum[i])) - return(-1); /* 1 < 2 */ - } - - if ( _bt_inspool->isunique && !equal_isnull ) - { - _bt_spooldestroy ((void*)_bt_inspool); - elog (WARN, "Cannot create unique index. Table contains non-unique values"); - } - return(0); /* 1 = 2 */ + else if (k2->btsk_item == (BTItem) NULL) + return (-1); /* 1 < 2 */ + + for (i = 0; i < _bt_nattr; i++) + { + if (k1_nulls[i] != ' ') /* k1 attr is NULL */ + { + if (k2_nulls[i] != ' ') /* the same for k2 */ + { + equal_isnull = true; + continue; + } + return (1); /* NULL ">" NOT_NULL */ + } + else if (k2_nulls[i] != ' ') /* k2 attr is NULL */ + return (-1); /* NOT_NULL "<" NULL */ + + if (_bt_invokestrat(_bt_sortrel, i + 1, BTGreaterStrategyNumber, + k1_datum[i], k2_datum[i])) + return (1); /* 1 > 2 */ + else if (_bt_invokestrat(_bt_sortrel, i + 1, BTGreaterStrategyNumber, + k2_datum[i], k1_datum[i])) + return (-1); /* 1 < 2 */ + } + + if (_bt_inspool->isunique && !equal_isnull) + { + _bt_spooldestroy((void *) _bt_inspool); + elog(WARN, "Cannot create unique index. Table contains non-unique values"); + } + return (0); /* 1 = 2 */ } static void -_bt_setsortkey(Relation index, BTItem bti, BTSortKey *sk) +_bt_setsortkey(Relation index, BTItem bti, BTSortKey * sk) { - sk->btsk_item = (BTItem) NULL; - sk->btsk_datum = (Datum*) NULL; - sk->btsk_nulls = (char*) NULL; - - if (bti != (BTItem) NULL) - { - IndexTuple it = &(bti->bti_itup); - TupleDesc itdesc = index->rd_att; - Datum *dp = (Datum*) palloc (_bt_nattr * sizeof (Datum)); - char *np = (char*) palloc (_bt_nattr * sizeof (char)); - bool isnull; - int i; - - for (i = 0; i < _bt_nattr; i++) - { - dp[i] = index_getattr(it, i+1, itdesc, &isnull); - if ( isnull ) - np[i] = 'n'; - else - np[i] = ' '; + sk->btsk_item = (BTItem) NULL; + sk->btsk_datum = (Datum *) NULL; + sk->btsk_nulls = (char *) NULL; + + if (bti != (BTItem) NULL) + { + IndexTuple it = &(bti->bti_itup); + TupleDesc itdesc = index->rd_att; + Datum *dp = (Datum *) palloc(_bt_nattr * sizeof(Datum)); + char *np = (char *) palloc(_bt_nattr * sizeof(char)); + bool isnull; + int i; + + for (i = 0; i < _bt_nattr; i++) + { + dp[i] = index_getattr(it, i + 1, itdesc, &isnull); + if (isnull) + np[i] = 'n'; + else + np[i] = ' '; + } + sk->btsk_item = bti; + sk->btsk_datum = dp; + sk->btsk_nulls = np; } - sk->btsk_item = bti; - sk->btsk_datum = dp; - sk->btsk_nulls = np; - } } /*------------------------------------------------------------------------- @@ -254,84 +258,100 @@ _bt_setsortkey(Relation index, BTItem bti, BTSortKey *sk) * XXX these probably ought to be generic library functions. *------------------------------------------------------------------------- */ -typedef struct { - int btpqe_tape; /* tape identifier */ - BTSortKey btpqe_item; /* pointer to BTItem in tape buffer */ -} BTPriQueueElem; - -#define MAXELEM MAXTAPES -typedef struct { - int btpq_nelem; - BTPriQueueElem btpq_queue[MAXELEM]; - Relation btpq_rel; -} BTPriQueue; +typedef struct +{ + int btpqe_tape; /* tape identifier */ + BTSortKey btpqe_item; /* pointer to BTItem in tape buffer */ +} BTPriQueueElem; + +#define MAXELEM MAXTAPES +typedef struct +{ + int btpq_nelem; + BTPriQueueElem btpq_queue[MAXELEM]; + Relation btpq_rel; +} BTPriQueue; /* be sure to call _bt_isortcmpinit first */ #define GREATER(a, b) \ - (_bt_isortcmp(&((a)->btpqe_item), &((b)->btpqe_item)) > 0) + (_bt_isortcmp(&((a)->btpqe_item), &((b)->btpqe_item)) > 0) static void -_bt_pqsift(BTPriQueue *q, int parent) +_bt_pqsift(BTPriQueue * q, int parent) { - int child; - BTPriQueueElem e; - - for (child = parent * 2 + 1; - child < q->btpq_nelem; - child = parent * 2 + 1) { - if (child < q->btpq_nelem - 1) { - if (GREATER(&(q->btpq_queue[child]), &(q->btpq_queue[child+1]))) { - ++child; - } - } - if (GREATER(&(q->btpq_queue[parent]), &(q->btpq_queue[child]))) { - e = q->btpq_queue[child]; /* struct = */ - q->btpq_queue[child] = q->btpq_queue[parent]; /* struct = */ - q->btpq_queue[parent] = e; /* struct = */ - parent = child; - } else { - parent = child + 1; + int child; + BTPriQueueElem e; + + for (child = parent * 2 + 1; + child < q->btpq_nelem; + child = parent * 2 + 1) + { + if (child < q->btpq_nelem - 1) + { + if (GREATER(&(q->btpq_queue[child]), &(q->btpq_queue[child + 1]))) + { + ++child; + } + } + if (GREATER(&(q->btpq_queue[parent]), &(q->btpq_queue[child]))) + { + e = q->btpq_queue[child]; /* struct = */ + q->btpq_queue[child] = q->btpq_queue[parent]; /* struct = */ + q->btpq_queue[parent] = e; /* struct = */ + parent = child; + } + else + { + parent = child + 1; + } } - } } static int -_bt_pqnext(BTPriQueue *q, BTPriQueueElem *e) +_bt_pqnext(BTPriQueue * q, BTPriQueueElem * e) { - if (q->btpq_nelem < 1) { /* already empty */ - return(-1); - } - *e = q->btpq_queue[0]; /* struct = */ - - if (--q->btpq_nelem < 1) { /* now empty, don't sift */ - return(0); - } - q->btpq_queue[0] = q->btpq_queue[q->btpq_nelem]; /* struct = */ - _bt_pqsift(q, 0); - return(0); + if (q->btpq_nelem < 1) + { /* already empty */ + return (-1); + } + *e = q->btpq_queue[0]; /* struct = */ + + if (--q->btpq_nelem < 1) + { /* now empty, don't sift */ + return (0); + } + q->btpq_queue[0] = q->btpq_queue[q->btpq_nelem]; /* struct = */ + _bt_pqsift(q, 0); + return (0); } static void -_bt_pqadd(BTPriQueue *q, BTPriQueueElem *e) +_bt_pqadd(BTPriQueue * q, BTPriQueueElem * e) { - int child, parent; - - if (q->btpq_nelem >= MAXELEM) { - elog(WARN, "_bt_pqadd: queue overflow"); - } - - child = q->btpq_nelem++; - while (child > 0) { - parent = child / 2; - if (GREATER(e, &(q->btpq_queue[parent]))) { - break; - } else { - q->btpq_queue[child] = q->btpq_queue[parent]; /* struct = */ - child = parent; + int child, + parent; + + if (q->btpq_nelem >= MAXELEM) + { + elog(WARN, "_bt_pqadd: queue overflow"); + } + + child = q->btpq_nelem++; + while (child > 0) + { + parent = child / 2; + if (GREATER(e, &(q->btpq_queue[parent]))) + { + break; + } + else + { + q->btpq_queue[child] = q->btpq_queue[parent]; /* struct = */ + child = parent; + } } - } - q->btpq_queue[child] = *e; /* struct = */ + q->btpq_queue[child] = *e; /* struct = */ } /*------------------------------------------------------------------------- @@ -339,37 +359,37 @@ _bt_pqadd(BTPriQueue *q, BTPriQueueElem *e) *------------------------------------------------------------------------- */ -#define BTITEMSZ(btitem) \ - ((btitem) ? \ - (IndexTupleDSize((btitem)->bti_itup) + \ - (sizeof(BTItemData) - sizeof(IndexTupleData))) : \ - 0) -#define SPCLEFT(tape) \ - (sizeof((tape)->bttb_data) - (tape)->bttb_top) -#define EMPTYTAPE(tape) \ - ((tape)->bttb_ntup <= 0) -#define BTTAPEMAGIC 0x19660226 +#define BTITEMSZ(btitem) \ + ((btitem) ? \ + (IndexTupleDSize((btitem)->bti_itup) + \ + (sizeof(BTItemData) - sizeof(IndexTupleData))) : \ + 0) +#define SPCLEFT(tape) \ + (sizeof((tape)->bttb_data) - (tape)->bttb_top) +#define EMPTYTAPE(tape) \ + ((tape)->bttb_ntup <= 0) +#define BTTAPEMAGIC 0x19660226 /* * reset the tape header for its next use without doing anything to - * the physical tape file. (setting bttb_top to 0 makes the block + * the physical tape file. (setting bttb_top to 0 makes the block * empty.) */ static void -_bt_tapereset(BTTapeBlock *tape) +_bt_tapereset(BTTapeBlock * tape) { - tape->bttb_eor = 0; - tape->bttb_top = 0; - tape->bttb_ntup = 0; + tape->bttb_eor = 0; + tape->bttb_top = 0; + tape->bttb_ntup = 0; } /* * rewind the physical tape file. */ static void -_bt_taperewind(BTTapeBlock *tape) +_bt_taperewind(BTTapeBlock * tape) { - FileSeek(tape->bttb_fd, 0, SEEK_SET); + FileSeek(tape->bttb_fd, 0, SEEK_SET); } /* @@ -382,17 +402,17 @@ _bt_taperewind(BTTapeBlock *tape) * least you don't have to delete and reinsert the directory entries. */ static void -_bt_tapeclear(BTTapeBlock *tape) +_bt_tapeclear(BTTapeBlock * tape) { - /* blow away the contents of the old file */ - _bt_taperewind(tape); + /* blow away the contents of the old file */ + _bt_taperewind(tape); #if 0 - FileSync(tape->bttb_fd); + FileSync(tape->bttb_fd); #endif - FileTruncate(tape->bttb_fd, 0); + FileTruncate(tape->bttb_fd, 0); - /* reset the buffer */ - _bt_tapereset(tape); + /* reset the buffer */ + _bt_tapereset(tape); } /* @@ -402,43 +422,44 @@ _bt_tapeclear(BTTapeBlock *tape) static BTTapeBlock * _bt_tapecreate(char *fname) { - BTTapeBlock *tape = (BTTapeBlock *) palloc(sizeof(BTTapeBlock)); + BTTapeBlock *tape = (BTTapeBlock *) palloc(sizeof(BTTapeBlock)); - if (tape == (BTTapeBlock *) NULL) { - elog(WARN, "_bt_tapecreate: out of memory"); - } + if (tape == (BTTapeBlock *) NULL) + { + elog(WARN, "_bt_tapecreate: out of memory"); + } - tape->bttb_magic = BTTAPEMAGIC; + tape->bttb_magic = BTTAPEMAGIC; - tape->bttb_fd = FileNameOpenFile(fname, O_RDWR|O_CREAT|O_TRUNC, 0600); - Assert(tape->bttb_fd >= 0); + tape->bttb_fd = FileNameOpenFile(fname, O_RDWR | O_CREAT | O_TRUNC, 0600); + Assert(tape->bttb_fd >= 0); - /* initialize the buffer */ - _bt_tapereset(tape); + /* initialize the buffer */ + _bt_tapereset(tape); - return(tape); + return (tape); } /* * destroy the BTTapeBlock structure and its physical tape file. */ static void -_bt_tapedestroy(BTTapeBlock *tape) +_bt_tapedestroy(BTTapeBlock * tape) { - FileUnlink(tape->bttb_fd); - pfree((void *) tape); + FileUnlink(tape->bttb_fd); + pfree((void *) tape); } /* * flush the tape block to the file, marking End-Of-Run if requested. */ static void -_bt_tapewrite(BTTapeBlock *tape, int eor) +_bt_tapewrite(BTTapeBlock * tape, int eor) { - tape->bttb_eor = eor; - FileWrite(tape->bttb_fd, (char *) tape, TAPEBLCKSZ); - NDirectFileWrite += TAPEBLCKSZ/MAXBLCKSZ; - _bt_tapereset(tape); + tape->bttb_eor = eor; + FileWrite(tape->bttb_fd, (char *) tape, TAPEBLCKSZ); + NDirectFileWrite += TAPEBLCKSZ / MAXBLCKSZ; + _bt_tapereset(tape); } /* @@ -447,34 +468,36 @@ _bt_tapewrite(BTTapeBlock *tape, int eor) * * returns: * - 0 if there are no more blocks in the tape or in this run (call - * _bt_tapereset to clear the End-Of-Run marker) + * _bt_tapereset to clear the End-Of-Run marker) * - 1 if a valid block was read */ static int -_bt_taperead(BTTapeBlock *tape) +_bt_taperead(BTTapeBlock * tape) { - int fd; - int nread; - - if (tape->bttb_eor) { - return(0); /* we are already at End-Of-Run */ - } - - /* - * we're clobbering the old tape block, but we do need to save the - * VFD (the one in the block we're reading is bogus). - */ - fd = tape->bttb_fd; - nread = FileRead(fd, (char *) tape, TAPEBLCKSZ); - tape->bttb_fd = fd; - - if (nread != TAPEBLCKSZ) { - Assert(nread == 0); /* we are at EOF */ - return(0); - } - Assert(tape->bttb_magic == BTTAPEMAGIC); - NDirectFileRead += TAPEBLCKSZ/MAXBLCKSZ; - return(1); + int fd; + int nread; + + if (tape->bttb_eor) + { + return (0); /* we are already at End-Of-Run */ + } + + /* + * we're clobbering the old tape block, but we do need to save the VFD + * (the one in the block we're reading is bogus). + */ + fd = tape->bttb_fd; + nread = FileRead(fd, (char *) tape, TAPEBLCKSZ); + tape->bttb_fd = fd; + + if (nread != TAPEBLCKSZ) + { + Assert(nread == 0); /* we are at EOF */ + return (0); + } + Assert(tape->bttb_magic == BTTAPEMAGIC); + NDirectFileRead += TAPEBLCKSZ / MAXBLCKSZ; + return (1); } /* @@ -487,19 +510,20 @@ _bt_taperead(BTTapeBlock *tape) * side effects: * - sets 'pos' to the current position within the block. */ -static BTItem -_bt_tapenext(BTTapeBlock *tape, char **pos) +static BTItem +_bt_tapenext(BTTapeBlock * tape, char **pos) { - Size itemsz; - BTItem bti; - - if (*pos >= tape->bttb_data + tape->bttb_top) { - return((BTItem) NULL); - } - bti = (BTItem) *pos; - itemsz = BTITEMSZ(bti); - *pos += DOUBLEALIGN(itemsz); - return(bti); + Size itemsz; + BTItem bti; + + if (*pos >= tape->bttb_data + tape->bttb_top) + { + return ((BTItem) NULL); + } + bti = (BTItem) * pos; + itemsz = BTITEMSZ(bti); + *pos += DOUBLEALIGN(itemsz); + return (bti); } /* @@ -514,11 +538,11 @@ _bt_tapenext(BTTapeBlock *tape, char **pos) * the beginning of free space. */ static void -_bt_tapeadd(BTTapeBlock *tape, BTItem item, int itemsz) +_bt_tapeadd(BTTapeBlock * tape, BTItem item, int itemsz) { - memcpy(tape->bttb_data + tape->bttb_top, item, itemsz); - ++tape->bttb_ntup; - tape->bttb_top += DOUBLEALIGN(itemsz); + memcpy(tape->bttb_data + tape->bttb_top, item, itemsz); + ++tape->bttb_ntup; + tape->bttb_top += DOUBLEALIGN(itemsz); } /*------------------------------------------------------------------------- @@ -530,41 +554,44 @@ _bt_tapeadd(BTTapeBlock *tape, BTItem item, int itemsz) * create and initialize a spool structure, including the underlying * files. */ -void * +void * _bt_spoolinit(Relation index, int ntapes, bool isunique) { - BTSpool *btspool = (BTSpool *) palloc(sizeof(BTSpool)); - int i; - char *fname = (char *) palloc(sizeof(TAPETEMP) + 1); - - if (btspool == (BTSpool *) NULL || fname == (char *) NULL) { - elog(WARN, "_bt_spoolinit: out of memory"); - } - memset((char *) btspool, 0, sizeof(BTSpool)); - btspool->bts_ntapes = ntapes; - btspool->bts_tape = 0; - btspool->isunique = isunique; - - btspool->bts_itape = - (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes); - btspool->bts_otape = - (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes); - if (btspool->bts_itape == (BTTapeBlock **) NULL || - btspool->bts_otape == (BTTapeBlock **) NULL) { - elog(WARN, "_bt_spoolinit: out of memory"); - } - - for (i = 0; i < ntapes; ++i) { - btspool->bts_itape[i] = - _bt_tapecreate(mktemp(strcpy(fname, TAPETEMP))); - btspool->bts_otape[i] = - _bt_tapecreate(mktemp(strcpy(fname, TAPETEMP))); - } - pfree((void *) fname); - - _bt_isortcmpinit(index, btspool); - - return((void *) btspool); + BTSpool *btspool = (BTSpool *) palloc(sizeof(BTSpool)); + int i; + char *fname = (char *) palloc(sizeof(TAPETEMP) + 1); + + if (btspool == (BTSpool *) NULL || fname == (char *) NULL) + { + elog(WARN, "_bt_spoolinit: out of memory"); + } + memset((char *) btspool, 0, sizeof(BTSpool)); + btspool->bts_ntapes = ntapes; + btspool->bts_tape = 0; + btspool->isunique = isunique; + + btspool->bts_itape = + (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes); + btspool->bts_otape = + (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes); + if (btspool->bts_itape == (BTTapeBlock **) NULL || + btspool->bts_otape == (BTTapeBlock **) NULL) + { + elog(WARN, "_bt_spoolinit: out of memory"); + } + + for (i = 0; i < ntapes; ++i) + { + btspool->bts_itape[i] = + _bt_tapecreate(mktemp(strcpy(fname, TAPETEMP))); + btspool->bts_otape[i] = + _bt_tapecreate(mktemp(strcpy(fname, TAPETEMP))); + } + pfree((void *) fname); + + _bt_isortcmpinit(index, btspool); + + return ((void *) btspool); } /* @@ -573,29 +600,32 @@ _bt_spoolinit(Relation index, int ntapes, bool isunique) void _bt_spooldestroy(void *spool) { - BTSpool *btspool = (BTSpool *) spool; - int i; - - for (i = 0; i < btspool->bts_ntapes; ++i) { - _bt_tapedestroy(btspool->bts_otape[i]); - _bt_tapedestroy(btspool->bts_itape[i]); - } - pfree((void *) btspool); + BTSpool *btspool = (BTSpool *) spool; + int i; + + for (i = 0; i < btspool->bts_ntapes; ++i) + { + _bt_tapedestroy(btspool->bts_otape[i]); + _bt_tapedestroy(btspool->bts_itape[i]); + } + pfree((void *) btspool); } /* * flush out any dirty output tape blocks */ static void -_bt_spoolflush(BTSpool *btspool) +_bt_spoolflush(BTSpool * btspool) { - int i; + int i; - for (i = 0; i < btspool->bts_ntapes; ++i) { - if (!EMPTYTAPE(btspool->bts_otape[i])) { - _bt_tapewrite(btspool->bts_otape[i], 1); + for (i = 0; i < btspool->bts_ntapes; ++i) + { + if (!EMPTYTAPE(btspool->bts_otape[i])) + { + _bt_tapewrite(btspool->bts_otape[i], 1); + } } - } } /* @@ -605,36 +635,37 @@ _bt_spoolflush(BTSpool *btspool) * output tapes. */ static void -_bt_spoolswap(BTSpool *btspool) +_bt_spoolswap(BTSpool * btspool) { - File tmpfd; - BTTapeBlock *itape; - BTTapeBlock *otape; - int i; + File tmpfd; + BTTapeBlock *itape; + BTTapeBlock *otape; + int i; - for (i = 0; i < btspool->bts_ntapes; ++i) { - itape = btspool->bts_itape[i]; - otape = btspool->bts_otape[i]; + for (i = 0; i < btspool->bts_ntapes; ++i) + { + itape = btspool->bts_itape[i]; + otape = btspool->bts_otape[i]; - /* - * swap the input and output VFDs. - */ - tmpfd = itape->bttb_fd; - itape->bttb_fd = otape->bttb_fd; - otape->bttb_fd = tmpfd; + /* + * swap the input and output VFDs. + */ + tmpfd = itape->bttb_fd; + itape->bttb_fd = otape->bttb_fd; + otape->bttb_fd = tmpfd; - /* - * rewind the new input tape. - */ - _bt_taperewind(itape); - _bt_tapereset(itape); + /* + * rewind the new input tape. + */ + _bt_taperewind(itape); + _bt_tapereset(itape); - /* - * clear the new output tape -- it's ok to throw away the old - * inputs. - */ - _bt_tapeclear(otape); - } + /* + * clear the new output tape -- it's ok to throw away the old + * inputs. + */ + _bt_tapeclear(otape); + } } /*------------------------------------------------------------------------- @@ -643,7 +674,7 @@ _bt_spoolswap(BTSpool *btspool) */ /* - * spool 'btitem' into an initial run. as tape blocks are filled, the + * spool 'btitem' into an initial run. as tape blocks are filled, the * block BTItems are qsorted and written into some output tape (it * doesn't matter which; we go round-robin for simplicity). the * initial runs are therefore always just one block. @@ -651,134 +682,137 @@ _bt_spoolswap(BTSpool *btspool) void _bt_spool(Relation index, BTItem btitem, void *spool) { - BTSpool *btspool = (BTSpool *) spool; - BTTapeBlock *itape; - Size itemsz; - - _bt_isortcmpinit (index, btspool); - - itape = btspool->bts_itape[btspool->bts_tape]; - itemsz = BTITEMSZ(btitem); - itemsz = DOUBLEALIGN(itemsz); - - /* - * if this buffer is too full for this BTItemData, or if we have - * run out of BTItems, we need to sort the buffer and write it - * out. in this case, the BTItemData will go into the next tape's - * buffer. - */ - if (btitem == (BTItem) NULL || SPCLEFT(itape) < itemsz) { - BTSortKey *parray = (BTSortKey *) NULL; - BTTapeBlock *otape; - BTItem bti; - char *pos; - int btisz; - int it_ntup = itape->bttb_ntup; - int i; + BTSpool *btspool = (BTSpool *) spool; + BTTapeBlock *itape; + Size itemsz; - /* - * build an array of pointers to the BTItemDatas on the input - * block. - */ - if (it_ntup > 0) { - parray = - (BTSortKey *) palloc(it_ntup * sizeof(BTSortKey)); - pos = itape->bttb_data; - for (i = 0; i < it_ntup; ++i) { - _bt_setsortkey(index, _bt_tapenext(itape, &pos), &(parray[i])); - } - - /* - * qsort the pointer array. - */ - qsort((void *) parray, it_ntup, sizeof(BTSortKey), - (int (*)(const void *,const void *))_bt_isortcmp); - } + _bt_isortcmpinit(index, btspool); + + itape = btspool->bts_itape[btspool->bts_tape]; + itemsz = BTITEMSZ(btitem); + itemsz = DOUBLEALIGN(itemsz); /* - * write the spooled run into the output tape. we copy the - * BTItemDatas in the order dictated by the sorted array of - * BTItems, not the original order. - * - * (since everything was DOUBLEALIGN'd and is all on a single - * tape block, everything had *better* still fit on one tape - * block..) + * if this buffer is too full for this BTItemData, or if we have run + * out of BTItems, we need to sort the buffer and write it out. in + * this case, the BTItemData will go into the next tape's buffer. */ - otape = btspool->bts_otape[btspool->bts_tape]; - for (i = 0; i < it_ntup; ++i) { - bti = parray[i].btsk_item; - btisz = BTITEMSZ(bti); - btisz = DOUBLEALIGN(btisz); - _bt_tapeadd(otape, bti, btisz); + if (btitem == (BTItem) NULL || SPCLEFT(itape) < itemsz) + { + BTSortKey *parray = (BTSortKey *) NULL; + BTTapeBlock *otape; + BTItem bti; + char *pos; + int btisz; + int it_ntup = itape->bttb_ntup; + int i; + + /* + * build an array of pointers to the BTItemDatas on the input + * block. + */ + if (it_ntup > 0) + { + parray = + (BTSortKey *) palloc(it_ntup * sizeof(BTSortKey)); + pos = itape->bttb_data; + for (i = 0; i < it_ntup; ++i) + { + _bt_setsortkey(index, _bt_tapenext(itape, &pos), &(parray[i])); + } + + /* + * qsort the pointer array. + */ + qsort((void *) parray, it_ntup, sizeof(BTSortKey), + (int (*) (const void *, const void *)) _bt_isortcmp); + } + + /* + * write the spooled run into the output tape. we copy the + * BTItemDatas in the order dictated by the sorted array of + * BTItems, not the original order. + * + * (since everything was DOUBLEALIGN'd and is all on a single tape + * block, everything had *better* still fit on one tape block..) + */ + otape = btspool->bts_otape[btspool->bts_tape]; + for (i = 0; i < it_ntup; ++i) + { + bti = parray[i].btsk_item; + btisz = BTITEMSZ(bti); + btisz = DOUBLEALIGN(btisz); + _bt_tapeadd(otape, bti, btisz); #if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_SPOOL) - { - bool isnull; - Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, - &isnull); - printf("_bt_spool: inserted <%x> into output tape %d\n", - d, btspool->bts_tape); - } -#endif /* FASTBUILD_DEBUG && FASTBUILD_SPOOL */ - } + { + bool isnull; + Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, + &isnull); - /* - * the initial runs are always single tape blocks. flush the - * output block, marking End-Of-Run. - */ - _bt_tapewrite(otape, 1); + printf("_bt_spool: inserted <%x> into output tape %d\n", + d, btspool->bts_tape); + } +#endif /* FASTBUILD_DEBUG && FASTBUILD_SPOOL */ + } - /* - * reset the input buffer for the next run. we don't have to - * write it out or anything -- we only use it to hold the - * unsorted BTItemDatas, the output tape contains all the - * sorted stuff. - * - * changing bts_tape changes the output tape and input tape; - * we change itape for the code below. - */ - _bt_tapereset(itape); - btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes; - itape = btspool->bts_itape[btspool->bts_tape]; + /* + * the initial runs are always single tape blocks. flush the + * output block, marking End-Of-Run. + */ + _bt_tapewrite(otape, 1); - /* - * destroy the pointer array. - */ - if (parray != (BTSortKey *) NULL) - { - for (i = 0; i < it_ntup; i++) - { - if ( parray[i].btsk_datum != (Datum*) NULL ) - pfree ((void*)(parray[i].btsk_datum)); - if ( parray[i].btsk_nulls != (char*) NULL ) - pfree ((void*)(parray[i].btsk_nulls)); - } - pfree((void *) parray); + /* + * reset the input buffer for the next run. we don't have to + * write it out or anything -- we only use it to hold the unsorted + * BTItemDatas, the output tape contains all the sorted stuff. + * + * changing bts_tape changes the output tape and input tape; we + * change itape for the code below. + */ + _bt_tapereset(itape); + btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes; + itape = btspool->bts_itape[btspool->bts_tape]; + + /* + * destroy the pointer array. + */ + if (parray != (BTSortKey *) NULL) + { + for (i = 0; i < it_ntup; i++) + { + if (parray[i].btsk_datum != (Datum *) NULL) + pfree((void *) (parray[i].btsk_datum)); + if (parray[i].btsk_nulls != (char *) NULL) + pfree((void *) (parray[i].btsk_nulls)); + } + pfree((void *) parray); + } } - } - /* insert this item into the current buffer */ - if (btitem != (BTItem) NULL) { - _bt_tapeadd(itape, btitem, itemsz); - } + /* insert this item into the current buffer */ + if (btitem != (BTItem) NULL) + { + _bt_tapeadd(itape, btitem, itemsz); + } } /* * allocate a new, clean btree page, not linked to any siblings. */ static void -_bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags) +_bt_blnewpage(Relation index, Buffer * buf, Page * page, int flags) { - BTPageOpaque opaque; + BTPageOpaque opaque; - *buf = _bt_getbuf(index, P_NEW, BT_WRITE); + *buf = _bt_getbuf(index, P_NEW, BT_WRITE); #if 0 - printf("\tblk=%d\n", BufferGetBlockNumber(*buf)); + printf("\tblk=%d\n", BufferGetBlockNumber(*buf)); #endif - *page = BufferGetPage(*buf); - _bt_pageinit(*page, BufferGetPageSize(*buf)); - opaque = (BTPageOpaque) PageGetSpecialPointer(*page); - opaque->btpo_prev = opaque->btpo_next = P_NONE; - opaque->btpo_flags = flags; + *page = BufferGetPage(*buf); + _bt_pageinit(*page, BufferGetPageSize(*buf)); + opaque = (BTPageOpaque) PageGetSpecialPointer(*page); + opaque->btpo_prev = opaque->btpo_next = P_NONE; + opaque->btpo_flags = flags; } /* @@ -790,42 +824,44 @@ _bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags) static void _bt_slideleft(Relation index, Buffer buf, Page page) { - OffsetNumber off; - OffsetNumber maxoff; - ItemId previi; - ItemId thisii; - - if (!PageIsEmpty(page)) { - maxoff = PageGetMaxOffsetNumber(page); - previi = PageGetItemId(page, P_HIKEY); - for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off)) { - thisii = PageGetItemId(page, off); - *previi = *thisii; - previi = thisii; + OffsetNumber off; + OffsetNumber maxoff; + ItemId previi; + ItemId thisii; + + if (!PageIsEmpty(page)) + { + maxoff = PageGetMaxOffsetNumber(page); + previi = PageGetItemId(page, P_HIKEY); + for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off)) + { + thisii = PageGetItemId(page, off); + *previi = *thisii; + previi = thisii; + } + ((PageHeader) page)->pd_lower -= sizeof(ItemIdData); } - ((PageHeader) page)->pd_lower -= sizeof(ItemIdData); - } } /* * allocate and initialize a new BTPageState. the returned structure * is suitable for immediate use by _bt_buildadd. */ -static void * +static void * _bt_pagestate(Relation index, int flags, int level, bool doupper) { - BTPageState *state = (BTPageState *) palloc(sizeof(BTPageState)); - - memset((char *) state, 0, sizeof(BTPageState)); - _bt_blnewpage(index, &(state->btps_buf), &(state->btps_page), flags); - state->btps_firstoff = InvalidOffsetNumber; - state->btps_lastoff = P_HIKEY; - state->btps_lastbti = (BTItem) NULL; - state->btps_next = (BTPageState *) NULL; - state->btps_level = level; - state->btps_doupper = doupper; - - return((void *) state); + BTPageState *state = (BTPageState *) palloc(sizeof(BTPageState)); + + memset((char *) state, 0, sizeof(BTPageState)); + _bt_blnewpage(index, &(state->btps_buf), &(state->btps_page), flags); + state->btps_firstoff = InvalidOffsetNumber; + state->btps_lastoff = P_HIKEY; + state->btps_lastbti = (BTItem) NULL; + state->btps_next = (BTPageState *) NULL; + state->btps_level = level; + state->btps_doupper = doupper; + + return ((void *) state); } /* @@ -834,19 +870,19 @@ _bt_pagestate(Relation index, int flags, int level, bool doupper) * the page to which the item used to point, e.g., a heap page if * 'opage' is a leaf page). */ -static BTItem +static BTItem _bt_minitem(Page opage, BlockNumber oblkno, int atend) { - OffsetNumber off; - BTItem obti; - BTItem nbti; + OffsetNumber off; + BTItem obti; + BTItem nbti; - off = atend ? P_HIKEY : P_FIRSTKEY; - obti = (BTItem) PageGetItem(opage, PageGetItemId(opage, off)); - nbti = _bt_formitem(&(obti->bti_itup)); - ItemPointerSet(&(nbti->bti_itup.t_tid), oblkno, P_HIKEY); + off = atend ? P_HIKEY : P_FIRSTKEY; + obti = (BTItem) PageGetItem(opage, PageGetItemId(opage, off)); + nbti = _bt_formitem(&(obti->bti_itup)); + ItemPointerSet(&(nbti->bti_itup.t_tid), oblkno, P_HIKEY); - return(nbti); + return (nbti); } /* @@ -855,26 +891,26 @@ _bt_minitem(Page opage, BlockNumber oblkno, int atend) * we must be careful to observe the following restrictions, placed * upon us by the conventions in nbtsearch.c: * - rightmost pages start data items at P_HIKEY instead of at - * P_FIRSTKEY. + * P_FIRSTKEY. * - duplicates cannot be split among pages unless the chain of - * duplicates starts at the first data item. + * duplicates starts at the first data item. * * a leaf page being built looks like: * * +----------------+---------------------------------+ - * | PageHeaderData | linp0 linp1 linp2 ... | + * | PageHeaderData | linp0 linp1 linp2 ... | * +-----------+----+---------------------------------+ - * | ... linpN | ^ first | + * | ... linpN | ^ first | * +-----------+--------------------------------------+ - * | ^ last | - * | | - * | v last | + * | ^ last | + * | | + * | v last | * +-------------+------------------------------------+ - * | | itemN ... | + * | | itemN ... | * +-------------+------------------+-----------------+ - * | ... item3 item2 item1 | "special space" | + * | ... item3 item2 item1 | "special space" | * +--------------------------------+-----------------+ - * ^ first + * ^ first * * contrast this with the diagram in bufpage.h; note the mismatch * between linps and items. this is because we reserve linp0 as a @@ -888,216 +924,230 @@ _bt_minitem(Page opage, BlockNumber oblkno, int atend) * * if all keys are unique, 'first' will always be the same as 'last'. */ -static BTItem +static BTItem _bt_buildadd(Relation index, void *pstate, BTItem bti, int flags) { - BTPageState *state = (BTPageState *) pstate; - Buffer nbuf; - Page npage; - BTItem last_bti; - OffsetNumber first_off; - OffsetNumber last_off; - OffsetNumber off; - Size pgspc; - Size btisz; - - nbuf = state->btps_buf; - npage = state->btps_page; - first_off = state->btps_firstoff; - last_off = state->btps_lastoff; - last_bti = state->btps_lastbti; - - pgspc = PageGetFreeSpace(npage); - btisz = BTITEMSZ(bti); - btisz = DOUBLEALIGN(btisz); - if (pgspc < btisz) { - Buffer obuf = nbuf; - Page opage = npage; - OffsetNumber o, n; - ItemId ii; - ItemId hii; - - _bt_blnewpage(index, &nbuf, &npage, flags); + BTPageState *state = (BTPageState *) pstate; + Buffer nbuf; + Page npage; + BTItem last_bti; + OffsetNumber first_off; + OffsetNumber last_off; + OffsetNumber off; + Size pgspc; + Size btisz; + + nbuf = state->btps_buf; + npage = state->btps_page; + first_off = state->btps_firstoff; + last_off = state->btps_lastoff; + last_bti = state->btps_lastbti; + + pgspc = PageGetFreeSpace(npage); + btisz = BTITEMSZ(bti); + btisz = DOUBLEALIGN(btisz); + if (pgspc < btisz) + { + Buffer obuf = nbuf; + Page opage = npage; + OffsetNumber o, + n; + ItemId ii; + ItemId hii; - /* - * if 'last' is part of a chain of duplicates that does not - * start at the beginning of the old page, the entire chain is - * copied to the new page; we delete all of the duplicates - * from the old page except the first, which becomes the high - * key item of the old page. - * - * if the chain starts at the beginning of the page or there - * is no chain ('first' == 'last'), we need only copy 'last' - * to the new page. again, 'first' (== 'last') becomes the - * high key of the old page. - * - * note that in either case, we copy at least one item to the - * new page, so 'last_bti' will always be valid. 'bti' will - * never be the first data item on the new page. - */ - if (first_off == P_FIRSTKEY) { - Assert(last_off != P_FIRSTKEY); - first_off = last_off; - } - for (o = first_off, n = P_FIRSTKEY; - o <= last_off; - o = OffsetNumberNext(o), n = OffsetNumberNext(n)) { - ii = PageGetItemId(opage, o); - if ( PageAddItem(npage, PageGetItem(opage, ii), - ii->lp_len, n, LP_USED) == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add item to the page in _bt_sort (1)"); + _bt_blnewpage(index, &nbuf, &npage, flags); + + /* + * if 'last' is part of a chain of duplicates that does not start + * at the beginning of the old page, the entire chain is copied to + * the new page; we delete all of the duplicates from the old page + * except the first, which becomes the high key item of the old + * page. + * + * if the chain starts at the beginning of the page or there is no + * chain ('first' == 'last'), we need only copy 'last' to the new + * page. again, 'first' (== 'last') becomes the high key of the + * old page. + * + * note that in either case, we copy at least one item to the new + * page, so 'last_bti' will always be valid. 'bti' will never be + * the first data item on the new page. + */ + if (first_off == P_FIRSTKEY) + { + Assert(last_off != P_FIRSTKEY); + first_off = last_off; + } + for (o = first_off, n = P_FIRSTKEY; + o <= last_off; + o = OffsetNumberNext(o), n = OffsetNumberNext(n)) + { + ii = PageGetItemId(opage, o); + if (PageAddItem(npage, PageGetItem(opage, ii), + ii->lp_len, n, LP_USED) == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add item to the page in _bt_sort (1)"); #if 0 #if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) - { - bool isnull; - BTItem tmpbti = - (BTItem) PageGetItem(npage, PageGetItemId(npage, n)); - Datum d = index_getattr(&(tmpbti->bti_itup), 1, - index->rd_att, &isnull); - printf("_bt_buildadd: moved <%x> to offset %d at level %d\n", - d, n, state->btps_level); - } -#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ + { + bool isnull; + BTItem tmpbti = + (BTItem) PageGetItem(npage, PageGetItemId(npage, n)); + Datum d = index_getattr(&(tmpbti->bti_itup), 1, + index->rd_att, &isnull); + + printf("_bt_buildadd: moved <%x> to offset %d at level %d\n", + d, n, state->btps_level); + } +#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ #endif - } - /* - * this loop is backward because PageIndexTupleDelete shuffles - * the tuples to fill holes in the page -- by starting at the - * end and working back, we won't create holes (and thereby - * avoid shuffling). - */ - for (o = last_off; o > first_off; o = OffsetNumberPrev(o)) { - PageIndexTupleDelete(opage, o); - } - hii = PageGetItemId(opage, P_HIKEY); - ii = PageGetItemId(opage, first_off); - *hii = *ii; - ii->lp_flags &= ~LP_USED; - ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData); + } - first_off = P_FIRSTKEY; - last_off = PageGetMaxOffsetNumber(npage); - last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, last_off)); + /* + * this loop is backward because PageIndexTupleDelete shuffles the + * tuples to fill holes in the page -- by starting at the end and + * working back, we won't create holes (and thereby avoid + * shuffling). + */ + for (o = last_off; o > first_off; o = OffsetNumberPrev(o)) + { + PageIndexTupleDelete(opage, o); + } + hii = PageGetItemId(opage, P_HIKEY); + ii = PageGetItemId(opage, first_off); + *hii = *ii; + ii->lp_flags &= ~LP_USED; + ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData); - /* - * set the page (side link) pointers. - */ - { - BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage); - BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(npage); - - oopaque->btpo_next = BufferGetBlockNumber(nbuf); - nopaque->btpo_prev = BufferGetBlockNumber(obuf); - nopaque->btpo_next = P_NONE; - - if ( _bt_itemcmp(index, _bt_nattr, - (BTItem) PageGetItem(opage, PageGetItemId(opage, P_HIKEY)), - (BTItem) PageGetItem(opage, PageGetItemId(opage, P_FIRSTKEY)), - BTEqualStrategyNumber) ) - oopaque->btpo_flags |= BTP_CHAIN; - } + first_off = P_FIRSTKEY; + last_off = PageGetMaxOffsetNumber(npage); + last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, last_off)); - /* - * copy the old buffer's minimum key to its parent. if we - * don't have a parent, we have to create one; this adds a new - * btree level. - */ - if (state->btps_doupper) { - BTItem nbti; - - if (state->btps_next == (BTPageState *) NULL) { - state->btps_next = - _bt_pagestate(index, 0, state->btps_level + 1, true); - } - nbti = _bt_minitem(opage, BufferGetBlockNumber(obuf), 0); - _bt_buildadd(index, state->btps_next, nbti, 0); - pfree((void *) nbti); + /* + * set the page (side link) pointers. + */ + { + BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage); + BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(npage); + + oopaque->btpo_next = BufferGetBlockNumber(nbuf); + nopaque->btpo_prev = BufferGetBlockNumber(obuf); + nopaque->btpo_next = P_NONE; + + if (_bt_itemcmp(index, _bt_nattr, + (BTItem) PageGetItem(opage, PageGetItemId(opage, P_HIKEY)), + (BTItem) PageGetItem(opage, PageGetItemId(opage, P_FIRSTKEY)), + BTEqualStrategyNumber)) + oopaque->btpo_flags |= BTP_CHAIN; + } + + /* + * copy the old buffer's minimum key to its parent. if we don't + * have a parent, we have to create one; this adds a new btree + * level. + */ + if (state->btps_doupper) + { + BTItem nbti; + + if (state->btps_next == (BTPageState *) NULL) + { + state->btps_next = + _bt_pagestate(index, 0, state->btps_level + 1, true); + } + nbti = _bt_minitem(opage, BufferGetBlockNumber(obuf), 0); + _bt_buildadd(index, state->btps_next, nbti, 0); + pfree((void *) nbti); + } + + /* + * write out the old stuff. we never want to see it again, so we + * can give up our lock (if we had one; BuildingBtree is set, so + * we aren't locking). + */ + _bt_wrtbuf(index, obuf); } /* - * write out the old stuff. we never want to see it again, so - * we can give up our lock (if we had one; BuildingBtree is - * set, so we aren't locking). + * if this item is different from the last item added, we start a new + * chain of duplicates. */ - _bt_wrtbuf(index, obuf); - } - - /* - * if this item is different from the last item added, we start a - * new chain of duplicates. - */ - off = OffsetNumberNext(last_off); - if ( PageAddItem(npage, (Item) bti, btisz, off, LP_USED) == InvalidOffsetNumber ) - elog (FATAL, "btree: failed to add item to the page in _bt_sort (2)"); + off = OffsetNumberNext(last_off); + if (PageAddItem(npage, (Item) bti, btisz, off, LP_USED) == InvalidOffsetNumber) + elog(FATAL, "btree: failed to add item to the page in _bt_sort (2)"); #if 0 #if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) - { - bool isnull; - Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, &isnull); - printf("_bt_buildadd: inserted <%x> at offset %d at level %d\n", - d, off, state->btps_level); - } -#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ + { + bool isnull; + Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, &isnull); + + printf("_bt_buildadd: inserted <%x> at offset %d at level %d\n", + d, off, state->btps_level); + } +#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ #endif - if (last_bti == (BTItem) NULL) - { - first_off = P_FIRSTKEY; - } - else if ( !_bt_itemcmp(index, _bt_nattr, - bti, last_bti, BTEqualStrategyNumber) ) - { - first_off = off; - } - last_off = off; - last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, off)); - - state->btps_buf = nbuf; - state->btps_page = npage; - state->btps_lastbti = last_bti; - state->btps_lastoff = last_off; - state->btps_firstoff = first_off; - - return(last_bti); + if (last_bti == (BTItem) NULL) + { + first_off = P_FIRSTKEY; + } + else if (!_bt_itemcmp(index, _bt_nattr, + bti, last_bti, BTEqualStrategyNumber)) + { + first_off = off; + } + last_off = off; + last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, off)); + + state->btps_buf = nbuf; + state->btps_page = npage; + state->btps_lastbti = last_bti; + state->btps_lastoff = last_off; + state->btps_firstoff = first_off; + + return (last_bti); } static void -_bt_uppershutdown(Relation index, BTPageState *state) +_bt_uppershutdown(Relation index, BTPageState * state) { - BTPageState *s; - BlockNumber blkno; - BTPageOpaque opaque; - BTItem bti; + BTPageState *s; + BlockNumber blkno; + BTPageOpaque opaque; + BTItem bti; - for (s = state; s != (BTPageState *) NULL; s = s->btps_next) { - blkno = BufferGetBlockNumber(s->btps_buf); - opaque = (BTPageOpaque) PageGetSpecialPointer(s->btps_page); + for (s = state; s != (BTPageState *) NULL; s = s->btps_next) + { + blkno = BufferGetBlockNumber(s->btps_buf); + opaque = (BTPageOpaque) PageGetSpecialPointer(s->btps_page); - /* - * if this is the root, attach it to the metapage. otherwise, - * stick the minimum key of the last page on this level (which - * has not been split, or else it wouldn't be the last page) - * into its parent. this may cause the last page of upper - * levels to split, but that's not a problem -- we haven't - * gotten to them yet. - */ - if (s->btps_doupper) { - if (s->btps_next == (BTPageState *) NULL) { - opaque->btpo_flags |= BTP_ROOT; - _bt_metaproot(index, blkno, s->btps_level + 1); - } else { - bti = _bt_minitem(s->btps_page, blkno, 0); - _bt_buildadd(index, s->btps_next, bti, 0); - pfree((void *) bti); - } - } + /* + * if this is the root, attach it to the metapage. otherwise, + * stick the minimum key of the last page on this level (which has + * not been split, or else it wouldn't be the last page) into its + * parent. this may cause the last page of upper levels to split, + * but that's not a problem -- we haven't gotten to them yet. + */ + if (s->btps_doupper) + { + if (s->btps_next == (BTPageState *) NULL) + { + opaque->btpo_flags |= BTP_ROOT; + _bt_metaproot(index, blkno, s->btps_level + 1); + } + else + { + bti = _bt_minitem(s->btps_page, blkno, 0); + _bt_buildadd(index, s->btps_next, bti, 0); + pfree((void *) bti); + } + } - /* - * this is the rightmost page, so the ItemId array needs to be - * slid back one slot. - */ - _bt_slideleft(index, s->btps_buf, s->btps_page); - _bt_wrtbuf(index, s->btps_buf); - } + /* + * this is the rightmost page, so the ItemId array needs to be + * slid back one slot. + */ + _bt_slideleft(index, s->btps_buf, s->btps_page); + _bt_wrtbuf(index, s->btps_buf); + } } /* @@ -1105,203 +1155,230 @@ _bt_uppershutdown(Relation index, BTPageState *state) * merging passes until at most one run is left in each tape. at that * point, merge the final tape runs into a set of btree leaves. * - * XXX three nested loops? gross. cut me up into smaller routines. + * XXX three nested loops? gross. cut me up into smaller routines. */ static void -_bt_merge(Relation index, BTSpool *btspool) +_bt_merge(Relation index, BTSpool * btspool) { - BTPageState *state; - BTPriQueue q; - BTPriQueueElem e; - BTSortKey btsk; - BTItem bti; - BTTapeBlock *itape; - BTTapeBlock *otape; - char *tapepos[MAXTAPES]; - int tapedone[MAXTAPES]; - int t; - int goodtapes; - int npass; - int nruns; - Size btisz; - bool doleaf = false; - - /* - * initialize state needed for the merge into the btree leaf pages. - */ - state = (BTPageState *) _bt_pagestate(index, BTP_LEAF, 0, true); - - npass = 0; - do { /* pass */ + BTPageState *state; + BTPriQueue q; + BTPriQueueElem e; + BTSortKey btsk; + BTItem bti; + BTTapeBlock *itape; + BTTapeBlock *otape; + char *tapepos[MAXTAPES]; + int tapedone[MAXTAPES]; + int t; + int goodtapes; + int npass; + int nruns; + Size btisz; + bool doleaf = false; + /* - * each pass starts by flushing the previous outputs and - * swapping inputs and outputs. flushing sets End-of-Run for - * any dirty output tapes. swapping clears the new output - * tapes and rewinds the new input tapes. + * initialize state needed for the merge into the btree leaf pages. */ - btspool->bts_tape = btspool->bts_ntapes - 1; - _bt_spoolflush(btspool); - _bt_spoolswap(btspool); - - ++npass; - nruns = 0; - - for (;;) { /* run */ - /* - * each run starts by selecting a new output tape. the - * merged results of a given run are always sent to this - * one tape. - */ - btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes; - otape = btspool->bts_otape[btspool->bts_tape]; - - /* - * initialize the priority queue by loading it with the - * first element of the given run in each tape. since we - * are starting a new run, we reset the tape (clearing the - * End-Of-Run marker) before reading it. this means that - * _bt_taperead will return 0 only if the tape is actually - * at EOF. - */ - memset((char *) &q, 0, sizeof(BTPriQueue)); - goodtapes = 0; - for (t = 0; t < btspool->bts_ntapes; ++t) { - itape = btspool->bts_itape[t]; - tapepos[t] = itape->bttb_data; - tapedone[t] = 0; - _bt_tapereset(itape); - do { - if (_bt_taperead(itape) == 0) { - tapedone[t] = 1; - } - } while (!tapedone[t] && EMPTYTAPE(itape)); - if (!tapedone[t]) { - ++goodtapes; - e.btpqe_tape = t; - _bt_setsortkey(index, _bt_tapenext(itape, &tapepos[t]), - &(e.btpqe_item)); - if (e.btpqe_item.btsk_item != (BTItem) NULL) { - _bt_pqadd(&q, &e); - } - } - } - /* - * if we don't have any tapes with any input (i.e., they - * are all at EOF), there is no work to do in this run -- - * we must be done with this pass. - */ - if (goodtapes == 0) { - break; /* for */ - } - ++nruns; - - /* - * output the smallest element from the queue until there - * are no more. - */ - while (_bt_pqnext(&q, &e) >= 0) { /* item */ + state = (BTPageState *) _bt_pagestate(index, BTP_LEAF, 0, true); + + npass = 0; + do + { /* pass */ + /* - * replace the element taken from priority queue, - * fetching a new block if needed. a tape can run out - * if it hits either End-Of-Run or EOF. + * each pass starts by flushing the previous outputs and swapping + * inputs and outputs. flushing sets End-of-Run for any dirty + * output tapes. swapping clears the new output tapes and rewinds + * the new input tapes. */ - t = e.btpqe_tape; - btsk = e.btpqe_item; - bti = btsk.btsk_item; - if (bti != (BTItem) NULL) { - btisz = BTITEMSZ(bti); - btisz = DOUBLEALIGN(btisz); - if (doleaf) { - _bt_buildadd(index, state, bti, BTP_LEAF); -#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) + btspool->bts_tape = btspool->bts_ntapes - 1; + _bt_spoolflush(btspool); + _bt_spoolswap(btspool); + + ++npass; + nruns = 0; + + for (;;) + { /* run */ + + /* + * each run starts by selecting a new output tape. the merged + * results of a given run are always sent to this one tape. + */ + btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes; + otape = btspool->bts_otape[btspool->bts_tape]; + + /* + * initialize the priority queue by loading it with the first + * element of the given run in each tape. since we are + * starting a new run, we reset the tape (clearing the + * End-Of-Run marker) before reading it. this means that + * _bt_taperead will return 0 only if the tape is actually at + * EOF. + */ + memset((char *) &q, 0, sizeof(BTPriQueue)); + goodtapes = 0; + for (t = 0; t < btspool->bts_ntapes; ++t) { - bool isnull; - Datum d = index_getattr(&(bti->bti_itup), 1, - index->rd_att, &isnull); - printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into block %d\n", - npass, nruns, d, t, - BufferGetBlockNumber(state->btps_buf)); + itape = btspool->bts_itape[t]; + tapepos[t] = itape->bttb_data; + tapedone[t] = 0; + _bt_tapereset(itape); + do + { + if (_bt_taperead(itape) == 0) + { + tapedone[t] = 1; + } + } while (!tapedone[t] && EMPTYTAPE(itape)); + if (!tapedone[t]) + { + ++goodtapes; + e.btpqe_tape = t; + _bt_setsortkey(index, _bt_tapenext(itape, &tapepos[t]), + &(e.btpqe_item)); + if (e.btpqe_item.btsk_item != (BTItem) NULL) + { + _bt_pqadd(&q, &e); + } + } } -#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ - } else { - if (SPCLEFT(otape) < btisz) { - /* - * if it's full, write it out and add the - * item to the next block. (since we will - * be adding another tuple immediately - * after this, we can be sure that there - * will be at least one more block in this - * run and so we know we do *not* want to - * set End-Of-Run here.) - */ - _bt_tapewrite(otape, 0); - } - _bt_tapeadd(otape, bti, btisz); -#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) + + /* + * if we don't have any tapes with any input (i.e., they are + * all at EOF), there is no work to do in this run -- we must + * be done with this pass. + */ + if (goodtapes == 0) { - bool isnull; - Datum d = index_getattr(&(bti->bti_itup), 1, - index->rd_att, &isnull); - printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into output tape %d\n", - npass, nruns, d, t, - btspool->bts_tape); - } -#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ - } - - if ( btsk.btsk_datum != (Datum*) NULL ) - pfree ((void*)(btsk.btsk_datum)); - if ( btsk.btsk_nulls != (char*) NULL ) - pfree ((void*)(btsk.btsk_nulls)); - - } - itape = btspool->bts_itape[t]; - if (!tapedone[t]) { - BTItem newbti = _bt_tapenext(itape, &tapepos[t]); - - if (newbti == (BTItem) NULL) { - do { - if (_bt_taperead(itape) == 0) { - tapedone[t] = 1; - } - } while (!tapedone[t] && EMPTYTAPE(itape)); - if (!tapedone[t]) { - tapepos[t] = itape->bttb_data; - newbti = _bt_tapenext(itape, &tapepos[t]); + break; /* for */ } - } - if (newbti != (BTItem) NULL) { - BTPriQueueElem nexte; - - nexte.btpqe_tape = t; - _bt_setsortkey(index, newbti, &(nexte.btpqe_item)); - _bt_pqadd(&q, &nexte); - } + ++nruns; + + /* + * output the smallest element from the queue until there are + * no more. + */ + while (_bt_pqnext(&q, &e) >= 0) + { /* item */ + + /* + * replace the element taken from priority queue, fetching + * a new block if needed. a tape can run out if it hits + * either End-Of-Run or EOF. + */ + t = e.btpqe_tape; + btsk = e.btpqe_item; + bti = btsk.btsk_item; + if (bti != (BTItem) NULL) + { + btisz = BTITEMSZ(bti); + btisz = DOUBLEALIGN(btisz); + if (doleaf) + { + _bt_buildadd(index, state, bti, BTP_LEAF); +#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) + { + bool isnull; + Datum d = index_getattr(&(bti->bti_itup), 1, + index->rd_att, &isnull); + + printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into block %d\n", + npass, nruns, d, t, + BufferGetBlockNumber(state->btps_buf)); + } +#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ + } + else + { + if (SPCLEFT(otape) < btisz) + { + + /* + * if it's full, write it out and add the item + * to the next block. (since we will be + * adding another tuple immediately after + * this, we can be sure that there will be at + * least one more block in this run and so we + * know we do *not* want to set End-Of-Run + * here.) + */ + _bt_tapewrite(otape, 0); + } + _bt_tapeadd(otape, bti, btisz); +#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) + { + bool isnull; + Datum d = index_getattr(&(bti->bti_itup), 1, + index->rd_att, &isnull); + + printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into output tape %d\n", + npass, nruns, d, t, + btspool->bts_tape); + } +#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ + } + + if (btsk.btsk_datum != (Datum *) NULL) + pfree((void *) (btsk.btsk_datum)); + if (btsk.btsk_nulls != (char *) NULL) + pfree((void *) (btsk.btsk_nulls)); + + } + itape = btspool->bts_itape[t]; + if (!tapedone[t]) + { + BTItem newbti = _bt_tapenext(itape, &tapepos[t]); + + if (newbti == (BTItem) NULL) + { + do + { + if (_bt_taperead(itape) == 0) + { + tapedone[t] = 1; + } + } while (!tapedone[t] && EMPTYTAPE(itape)); + if (!tapedone[t]) + { + tapepos[t] = itape->bttb_data; + newbti = _bt_tapenext(itape, &tapepos[t]); + } + } + if (newbti != (BTItem) NULL) + { + BTPriQueueElem nexte; + + nexte.btpqe_tape = t; + _bt_setsortkey(index, newbti, &(nexte.btpqe_item)); + _bt_pqadd(&q, &nexte); + } + } + } /* item */ + + /* + * that's it for this run. flush the output tape, marking + * End-of-Run. + */ + _bt_tapewrite(otape, 1); + } /* run */ + + /* + * we are here because we ran out of input on all of the input + * tapes. + * + * if this pass did not generate more actual output runs than we have + * tapes, we know we have at most one run in each tape. this + * means that we are ready to merge into the final btree leaf + * pages instead of merging into a tape file. + */ + if (nruns <= btspool->bts_ntapes) + { + doleaf = true; } - } /* item */ - - /* - * that's it for this run. flush the output tape, marking - * End-of-Run. - */ - _bt_tapewrite(otape, 1); - } /* run */ - - /* - * we are here because we ran out of input on all of the input - * tapes. - * - * if this pass did not generate more actual output runs than - * we have tapes, we know we have at most one run in each - * tape. this means that we are ready to merge into the final - * btree leaf pages instead of merging into a tape file. - */ - if (nruns <= btspool->bts_ntapes) { - doleaf = true; - } - } while (nruns > 0); /* pass */ + } while (nruns > 0); /* pass */ - _bt_uppershutdown(index, state); + _bt_uppershutdown(index, state); } @@ -1320,62 +1397,65 @@ _bt_merge(Relation index, BTSpool *btspool) void _bt_upperbuild(Relation index) { - Buffer rbuf; - BlockNumber blk; - Page rpage; - BTPageOpaque ropaque; - BTPageState *state; - BTItem nbti; - - /* - * find the first leaf block. while we're at it, clear the - * BTP_ROOT flag that we set while building it (so we could find - * it later). - */ - rbuf = _bt_getroot(index, BT_WRITE); - blk = BufferGetBlockNumber(rbuf); - rpage = BufferGetPage(rbuf); - ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage); - ropaque->btpo_flags &= ~BTP_ROOT; - _bt_wrtbuf(index, rbuf); - - state = (BTPageState *) _bt_pagestate(index, 0, 0, true); - - /* for each page... */ - do { -#if 0 - printf("\t\tblk=%d\n", blk); -#endif - rbuf = _bt_getbuf(index, blk, BT_READ); + Buffer rbuf; + BlockNumber blk; + Page rpage; + BTPageOpaque ropaque; + BTPageState *state; + BTItem nbti; + + /* + * find the first leaf block. while we're at it, clear the BTP_ROOT + * flag that we set while building it (so we could find it later). + */ + rbuf = _bt_getroot(index, BT_WRITE); + blk = BufferGetBlockNumber(rbuf); rpage = BufferGetPage(rbuf); ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage); - - /* for each item... */ - if (!PageIsEmpty(rpage)) { - /* - * form a new index tuple corresponding to the minimum key - * of the lower page and insert it into a page at this - * level. - */ - nbti = _bt_minitem(rpage, blk, P_RIGHTMOST(ropaque)); + ropaque->btpo_flags &= ~BTP_ROOT; + _bt_wrtbuf(index, rbuf); + + state = (BTPageState *) _bt_pagestate(index, 0, 0, true); + + /* for each page... */ + do + { +#if 0 + printf("\t\tblk=%d\n", blk); +#endif + rbuf = _bt_getbuf(index, blk, BT_READ); + rpage = BufferGetPage(rbuf); + ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage); + + /* for each item... */ + if (!PageIsEmpty(rpage)) + { + + /* + * form a new index tuple corresponding to the minimum key of + * the lower page and insert it into a page at this level. + */ + nbti = _bt_minitem(rpage, blk, P_RIGHTMOST(ropaque)); #if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) - { - bool isnull; - Datum d = index_getattr(&(nbti->bti_itup), 1, index->rd_att, - &isnull); - printf("_bt_upperbuild: inserting <%x> at %d\n", - d, state->btps_level); - } -#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ - _bt_buildadd(index, state, nbti, 0); - pfree((void *) nbti); - } - blk = ropaque->btpo_next; - _bt_relbuf(index, rbuf, BT_READ); - } while (blk != P_NONE); - - _bt_uppershutdown(index, state); + { + bool isnull; + Datum d = index_getattr(&(nbti->bti_itup), 1, index->rd_att, + &isnull); + + printf("_bt_upperbuild: inserting <%x> at %d\n", + d, state->btps_level); + } +#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ + _bt_buildadd(index, state, nbti, 0); + pfree((void *) nbti); + } + blk = ropaque->btpo_next; + _bt_relbuf(index, rbuf, BT_READ); + } while (blk != P_NONE); + + _bt_uppershutdown(index, state); } + #endif /* @@ -1385,17 +1465,17 @@ _bt_upperbuild(Relation index) void _bt_leafbuild(Relation index, void *spool) { - _bt_isortcmpinit (index, (BTSpool *) spool); + _bt_isortcmpinit(index, (BTSpool *) spool); #ifdef BTREE_BUILD_STATS - if ( ShowExecutorStats ) - { - fprintf(stderr, "! BtreeBuild (Spool) Stats:\n"); - ShowUsage (); - ResetUsage (); - } + if (ShowExecutorStats) + { + fprintf(stderr, "! BtreeBuild (Spool) Stats:\n"); + ShowUsage(); + ResetUsage(); + } #endif - _bt_merge(index, (BTSpool *) spool); + _bt_merge(index, (BTSpool *) spool); } diff --git a/src/backend/access/nbtree/nbtstrat.c b/src/backend/access/nbtree/nbtstrat.c index 6de003c06a..5215d2000d 100644 --- a/src/backend/access/nbtree/nbtstrat.c +++ b/src/backend/access/nbtree/nbtstrat.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * btstrat.c-- - * Srategy map entries for the btree indexed access method + * Srategy map entries for the btree indexed access method * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtstrat.c,v 1.4 1996/11/05 10:35:37 scrappy Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtstrat.c,v 1.5 1997/09/07 04:39:04 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -20,111 +20,111 @@ /* * Note: - * StrategyNegate, StrategyCommute, and StrategyNegateCommute - * assume <, <=, ==, >=, > ordering. + * StrategyNegate, StrategyCommute, and StrategyNegateCommute + * assume <, <=, ==, >=, > ordering. */ -static StrategyNumber BTNegate[5] = { - BTGreaterEqualStrategyNumber, - BTGreaterStrategyNumber, - InvalidStrategy, - BTLessStrategyNumber, - BTLessEqualStrategyNumber +static StrategyNumber BTNegate[5] = { + BTGreaterEqualStrategyNumber, + BTGreaterStrategyNumber, + InvalidStrategy, + BTLessStrategyNumber, + BTLessEqualStrategyNumber }; -static StrategyNumber BTCommute[5] = { - BTGreaterStrategyNumber, - BTGreaterEqualStrategyNumber, - InvalidStrategy, - BTLessEqualStrategyNumber, - BTLessStrategyNumber +static StrategyNumber BTCommute[5] = { + BTGreaterStrategyNumber, + BTGreaterEqualStrategyNumber, + InvalidStrategy, + BTLessEqualStrategyNumber, + BTLessStrategyNumber }; -static StrategyNumber BTNegateCommute[5] = { - BTLessEqualStrategyNumber, - BTLessStrategyNumber, - InvalidStrategy, - BTGreaterStrategyNumber, - BTGreaterEqualStrategyNumber +static StrategyNumber BTNegateCommute[5] = { + BTLessEqualStrategyNumber, + BTLessStrategyNumber, + InvalidStrategy, + BTGreaterStrategyNumber, + BTGreaterEqualStrategyNumber }; -static uint16 BTLessTermData[] = { /* XXX type clash */ - 2, - BTLessStrategyNumber, - SK_NEGATE, - BTLessStrategyNumber, - SK_NEGATE | SK_COMMUTE +static uint16 BTLessTermData[] = { /* XXX type clash */ + 2, + BTLessStrategyNumber, + SK_NEGATE, + BTLessStrategyNumber, + SK_NEGATE | SK_COMMUTE }; -static uint16 BTLessEqualTermData[] = { /* XXX type clash */ - 2, - BTLessEqualStrategyNumber, - 0x0, - BTLessEqualStrategyNumber, - SK_COMMUTE +static uint16 BTLessEqualTermData[] = { /* XXX type clash */ + 2, + BTLessEqualStrategyNumber, + 0x0, + BTLessEqualStrategyNumber, + SK_COMMUTE }; static uint16 BTGreaterEqualTermData[] = { /* XXX type clash */ - 2, - BTGreaterEqualStrategyNumber, - 0x0, - BTGreaterEqualStrategyNumber, - SK_COMMUTE - }; - -static uint16 BTGreaterTermData[] = { /* XXX type clash */ - 2, - BTGreaterStrategyNumber, - SK_NEGATE, - BTGreaterStrategyNumber, - SK_NEGATE | SK_COMMUTE + 2, + BTGreaterEqualStrategyNumber, + 0x0, + BTGreaterEqualStrategyNumber, + SK_COMMUTE }; -static StrategyTerm BTEqualExpressionData[] = { - (StrategyTerm)BTLessTermData, /* XXX */ - (StrategyTerm)BTLessEqualTermData, /* XXX */ - (StrategyTerm)BTGreaterEqualTermData, /* XXX */ - (StrategyTerm)BTGreaterTermData, /* XXX */ - NULL +static uint16 BTGreaterTermData[] = { /* XXX type clash */ + 2, + BTGreaterStrategyNumber, + SK_NEGATE, + BTGreaterStrategyNumber, + SK_NEGATE | SK_COMMUTE }; -static StrategyEvaluationData BTEvaluationData = { - /* XXX static for simplicity */ - - BTMaxStrategyNumber, - (StrategyTransformMap)BTNegate, /* XXX */ - (StrategyTransformMap)BTCommute, /* XXX */ - (StrategyTransformMap)BTNegateCommute, /* XXX */ +static StrategyTerm BTEqualExpressionData[] = { + (StrategyTerm) BTLessTermData, /* XXX */ + (StrategyTerm) BTLessEqualTermData, /* XXX */ + (StrategyTerm) BTGreaterEqualTermData, /* XXX */ + (StrategyTerm) BTGreaterTermData, /* XXX */ + NULL +}; + +static StrategyEvaluationData BTEvaluationData = { + /* XXX static for simplicity */ + + BTMaxStrategyNumber, + (StrategyTransformMap) BTNegate, /* XXX */ + (StrategyTransformMap) BTCommute, /* XXX */ + (StrategyTransformMap) BTNegateCommute, /* XXX */ - { NULL, NULL, (StrategyExpression)BTEqualExpressionData, NULL, NULL, - NULL,NULL,NULL,NULL,NULL,NULL,NULL} + {NULL, NULL, (StrategyExpression) BTEqualExpressionData, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL} }; /* ---------------------------------------------------------------- - * RelationGetBTStrategy + * RelationGetBTStrategy * ---------------------------------------------------------------- */ StrategyNumber _bt_getstrat(Relation rel, - AttrNumber attno, - RegProcedure proc) + AttrNumber attno, + RegProcedure proc) { - StrategyNumber strat; - - strat = RelationGetStrategy(rel, attno, &BTEvaluationData, proc); - - Assert(StrategyNumberIsValid(strat)); - - return (strat); + StrategyNumber strat; + + strat = RelationGetStrategy(rel, attno, &BTEvaluationData, proc); + + Assert(StrategyNumberIsValid(strat)); + + return (strat); } bool _bt_invokestrat(Relation rel, - AttrNumber attno, - StrategyNumber strat, - Datum left, - Datum right) + AttrNumber attno, + StrategyNumber strat, + Datum left, + Datum right) { - return (RelationInvokeStrategy(rel, &BTEvaluationData, attno, strat, - left, right)); + return (RelationInvokeStrategy(rel, &BTEvaluationData, attno, strat, + left, right)); } diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 738e55dbcc..096f1d2691 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * btutils.c-- - * Utility code for Postgres btree implementation. + * Utility code for Postgres btree implementation. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.11 1997/08/19 21:29:47 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.12 1997/09/07 04:39:05 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -23,367 +23,384 @@ #include <catalog/pg_proc.h> #include <executor/execdebug.h> -extern int NIndexTupleProcessed; +extern int NIndexTupleProcessed; #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -ScanKey +ScanKey _bt_mkscankey(Relation rel, IndexTuple itup) -{ - ScanKey skey; - TupleDesc itupdesc; - int natts; - int i; - Datum arg; - RegProcedure proc; - bool null; - bits16 flag; - - natts = rel->rd_rel->relnatts; - itupdesc = RelationGetTupleDescriptor(rel); - - skey = (ScanKey) palloc(natts * sizeof(ScanKeyData)); - - for (i = 0; i < natts; i++) { - arg = index_getattr(itup, i + 1, itupdesc, &null); - if ( null ) - { - proc = NullValueRegProcedure; - flag = SK_ISNULL; - } - else +{ + ScanKey skey; + TupleDesc itupdesc; + int natts; + int i; + Datum arg; + RegProcedure proc; + bool null; + bits16 flag; + + natts = rel->rd_rel->relnatts; + itupdesc = RelationGetTupleDescriptor(rel); + + skey = (ScanKey) palloc(natts * sizeof(ScanKeyData)); + + for (i = 0; i < natts; i++) { - proc = index_getprocid(rel, i + 1, BTORDER_PROC); - flag = 0x0; + arg = index_getattr(itup, i + 1, itupdesc, &null); + if (null) + { + proc = NullValueRegProcedure; + flag = SK_ISNULL; + } + else + { + proc = index_getprocid(rel, i + 1, BTORDER_PROC); + flag = 0x0; + } + ScanKeyEntryInitialize(&skey[i], + flag, (AttrNumber) (i + 1), proc, arg); } - ScanKeyEntryInitialize(&skey[i], - flag, (AttrNumber) (i + 1), proc, arg); - } - - return (skey); + + return (skey); } void _bt_freeskey(ScanKey skey) { - pfree(skey); + pfree(skey); } void _bt_freestack(BTStack stack) { - BTStack ostack; - - while (stack != (BTStack) NULL) { - ostack = stack; - stack = stack->bts_parent; - pfree(ostack->bts_btitem); - pfree(ostack); - } + BTStack ostack; + + while (stack != (BTStack) NULL) + { + ostack = stack; + stack = stack->bts_parent; + pfree(ostack->bts_btitem); + pfree(ostack); + } } /* - * _bt_orderkeys() -- Put keys in a sensible order for conjunctive quals. + * _bt_orderkeys() -- Put keys in a sensible order for conjunctive quals. * - * The order of the keys in the qual match the ordering imposed by - * the index. This routine only needs to be called if there are - * more than one qual clauses using this index. + * The order of the keys in the qual match the ordering imposed by + * the index. This routine only needs to be called if there are + * more than one qual clauses using this index. */ void _bt_orderkeys(Relation relation, BTScanOpaque so) { - ScanKey xform; - ScanKeyData *cur; - StrategyMap map; - int nbytes; - long test; - int i, j; - int init[BTMaxStrategyNumber+1]; - ScanKey key; - uint16 numberOfKeys = so->numberOfKeys; - uint16 new_numberOfKeys = 0; - AttrNumber attno = 1; - - if ( numberOfKeys < 1 ) - return; - - key = so->keyData; - - cur = &key[0]; - if ( cur->sk_attno != 1 ) - elog (WARN, "_bt_orderkeys: key(s) for attribute 1 missed"); - - if ( numberOfKeys == 1 ) - { - /* - * We don't use indices for 'A is null' and 'A is not null' - * currently and 'A < = > <> NULL' is non-sense' - so - * qual is not Ok. - vadim 03/21/97 - */ - if ( cur->sk_flags & SK_ISNULL ) - so->qual_ok = 0; - so->numberOfFirstKeys = 1; - return; - } - - /* get space for the modified array of keys */ - nbytes = BTMaxStrategyNumber * sizeof(ScanKeyData); - xform = (ScanKey) palloc(nbytes); - - memset(xform, 0, nbytes); - map = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation), - BTMaxStrategyNumber, - attno); - for (j = 0; j <= BTMaxStrategyNumber; j++) - init[j] = 0; - - /* check each key passed in */ - for (i = 0; ; ) - { - if ( i < numberOfKeys ) - cur = &key[i]; - - if ( cur->sk_flags & SK_ISNULL ) /* see comments above */ - so->qual_ok = 0; - - if ( i == numberOfKeys || cur->sk_attno != attno ) + ScanKey xform; + ScanKeyData *cur; + StrategyMap map; + int nbytes; + long test; + int i, + j; + int init[BTMaxStrategyNumber + 1]; + ScanKey key; + uint16 numberOfKeys = so->numberOfKeys; + uint16 new_numberOfKeys = 0; + AttrNumber attno = 1; + + if (numberOfKeys < 1) + return; + + key = so->keyData; + + cur = &key[0]; + if (cur->sk_attno != 1) + elog(WARN, "_bt_orderkeys: key(s) for attribute 1 missed"); + + if (numberOfKeys == 1) { - if ( cur->sk_attno != attno + 1 && i < numberOfKeys ) - { - elog (WARN, "_bt_orderkeys: key(s) for attribute %d missed", attno + 1); - } - /* - * If = has been specified, no other key will be used. - * In case of key < 2 && key == 1 and so on - * we have to set qual_ok to 0 - */ - if (init[BTEqualStrategyNumber - 1]) - { - ScanKeyData *eq, *chk; - - eq = &xform[BTEqualStrategyNumber - 1]; - for (j = BTMaxStrategyNumber; --j >= 0; ) - { - if ( j == (BTEqualStrategyNumber - 1) || init[j] == 0 ) - continue; - chk = &xform[j]; - test = (long) fmgr(chk->sk_procedure, eq->sk_argument, chk->sk_argument); - if (!test) - so->qual_ok = 0; - } - init[BTLessStrategyNumber - 1] = 0; - init[BTLessEqualStrategyNumber - 1] = 0; - init[BTGreaterEqualStrategyNumber - 1] = 0; - init[BTGreaterStrategyNumber - 1] = 0; - } - - /* only one of <, <= */ - if (init[BTLessStrategyNumber - 1] - && init[BTLessEqualStrategyNumber - 1]) - { - ScanKeyData *lt, *le; - - lt = &xform[BTLessStrategyNumber - 1]; - le = &xform[BTLessEqualStrategyNumber - 1]; + /* - * DO NOT use the cached function stuff here -- this is key - * ordering, happens only when the user expresses a hokey - * qualification, and gets executed only once, anyway. The - * transform maps are hard-coded, and can't be initialized - * in the correct way. + * We don't use indices for 'A is null' and 'A is not null' + * currently and 'A < = > <> NULL' is non-sense' - so qual is not + * Ok. - vadim 03/21/97 */ - test = (long) fmgr(le->sk_procedure, lt->sk_argument, le->sk_argument); - if (test) - init[BTLessEqualStrategyNumber - 1] = 0; - else - init[BTLessStrategyNumber - 1] = 0; - } - - /* only one of >, >= */ - if (init[BTGreaterStrategyNumber - 1] - && init[BTGreaterEqualStrategyNumber - 1]) - { - ScanKeyData *gt, *ge; - - gt = &xform[BTGreaterStrategyNumber - 1]; - ge = &xform[BTGreaterEqualStrategyNumber - 1]; - - /* see note above on function cache */ - test = (long) fmgr(ge->sk_procedure, gt->sk_argument, ge->sk_argument); - if (test) - init[BTGreaterEqualStrategyNumber - 1] = 0; - else - init[BTGreaterStrategyNumber - 1] = 0; - } - - /* okay, reorder and count */ - for (j = BTMaxStrategyNumber; --j >= 0; ) - if (init[j]) - key[new_numberOfKeys++] = xform[j]; - - if ( attno == 1 ) - so->numberOfFirstKeys = new_numberOfKeys; - - if ( i == numberOfKeys ) - break; - - /* initialization for new attno */ - attno = cur->sk_attno; - memset(xform, 0, nbytes); - map = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation), - BTMaxStrategyNumber, - attno); - /* haven't looked at any strategies yet */ - for (j = 0; j <= BTMaxStrategyNumber; j++) - init[j] = 0; + if (cur->sk_flags & SK_ISNULL) + so->qual_ok = 0; + so->numberOfFirstKeys = 1; + return; } - for (j = BTMaxStrategyNumber; --j >= 0; ) - { - if (cur->sk_procedure == map->entry[j].sk_procedure) - break; - } - - /* have we seen one of these before? */ - if (init[j]) - { - /* yup, use the appropriate value */ - test = - (long) FMGR_PTR2(cur->sk_func, cur->sk_procedure, - cur->sk_argument, xform[j].sk_argument); - if (test) - xform[j].sk_argument = cur->sk_argument; - else if ( j == (BTEqualStrategyNumber - 1) ) - so->qual_ok = 0; /* key == a && key == b, but a != b */ - } else + /* get space for the modified array of keys */ + nbytes = BTMaxStrategyNumber * sizeof(ScanKeyData); + xform = (ScanKey) palloc(nbytes); + + memset(xform, 0, nbytes); + map = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation), + BTMaxStrategyNumber, + attno); + for (j = 0; j <= BTMaxStrategyNumber; j++) + init[j] = 0; + + /* check each key passed in */ + for (i = 0;;) { - /* nope, use this value */ - memmove(&xform[j], cur, sizeof(*cur)); - init[j] = 1; + if (i < numberOfKeys) + cur = &key[i]; + + if (cur->sk_flags & SK_ISNULL) /* see comments above */ + so->qual_ok = 0; + + if (i == numberOfKeys || cur->sk_attno != attno) + { + if (cur->sk_attno != attno + 1 && i < numberOfKeys) + { + elog(WARN, "_bt_orderkeys: key(s) for attribute %d missed", attno + 1); + } + + /* + * If = has been specified, no other key will be used. In case + * of key < 2 && key == 1 and so on we have to set qual_ok to + * 0 + */ + if (init[BTEqualStrategyNumber - 1]) + { + ScanKeyData *eq, + *chk; + + eq = &xform[BTEqualStrategyNumber - 1]; + for (j = BTMaxStrategyNumber; --j >= 0;) + { + if (j == (BTEqualStrategyNumber - 1) || init[j] == 0) + continue; + chk = &xform[j]; + test = (long) fmgr(chk->sk_procedure, eq->sk_argument, chk->sk_argument); + if (!test) + so->qual_ok = 0; + } + init[BTLessStrategyNumber - 1] = 0; + init[BTLessEqualStrategyNumber - 1] = 0; + init[BTGreaterEqualStrategyNumber - 1] = 0; + init[BTGreaterStrategyNumber - 1] = 0; + } + + /* only one of <, <= */ + if (init[BTLessStrategyNumber - 1] + && init[BTLessEqualStrategyNumber - 1]) + { + ScanKeyData *lt, + *le; + + lt = &xform[BTLessStrategyNumber - 1]; + le = &xform[BTLessEqualStrategyNumber - 1]; + + /* + * DO NOT use the cached function stuff here -- this is + * key ordering, happens only when the user expresses a + * hokey qualification, and gets executed only once, + * anyway. The transform maps are hard-coded, and can't + * be initialized in the correct way. + */ + test = (long) fmgr(le->sk_procedure, lt->sk_argument, le->sk_argument); + if (test) + init[BTLessEqualStrategyNumber - 1] = 0; + else + init[BTLessStrategyNumber - 1] = 0; + } + + /* only one of >, >= */ + if (init[BTGreaterStrategyNumber - 1] + && init[BTGreaterEqualStrategyNumber - 1]) + { + ScanKeyData *gt, + *ge; + + gt = &xform[BTGreaterStrategyNumber - 1]; + ge = &xform[BTGreaterEqualStrategyNumber - 1]; + + /* see note above on function cache */ + test = (long) fmgr(ge->sk_procedure, gt->sk_argument, ge->sk_argument); + if (test) + init[BTGreaterEqualStrategyNumber - 1] = 0; + else + init[BTGreaterStrategyNumber - 1] = 0; + } + + /* okay, reorder and count */ + for (j = BTMaxStrategyNumber; --j >= 0;) + if (init[j]) + key[new_numberOfKeys++] = xform[j]; + + if (attno == 1) + so->numberOfFirstKeys = new_numberOfKeys; + + if (i == numberOfKeys) + break; + + /* initialization for new attno */ + attno = cur->sk_attno; + memset(xform, 0, nbytes); + map = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation), + BTMaxStrategyNumber, + attno); + /* haven't looked at any strategies yet */ + for (j = 0; j <= BTMaxStrategyNumber; j++) + init[j] = 0; + } + + for (j = BTMaxStrategyNumber; --j >= 0;) + { + if (cur->sk_procedure == map->entry[j].sk_procedure) + break; + } + + /* have we seen one of these before? */ + if (init[j]) + { + /* yup, use the appropriate value */ + test = + (long) FMGR_PTR2(cur->sk_func, cur->sk_procedure, + cur->sk_argument, xform[j].sk_argument); + if (test) + xform[j].sk_argument = cur->sk_argument; + else if (j == (BTEqualStrategyNumber - 1)) + so->qual_ok = 0;/* key == a && key == b, but a != b */ + } + else + { + /* nope, use this value */ + memmove(&xform[j], cur, sizeof(*cur)); + init[j] = 1; + } + + i++; } - - i++; - } - - so->numberOfKeys = new_numberOfKeys; - - pfree(xform); + + so->numberOfKeys = new_numberOfKeys; + + pfree(xform); } BTItem _bt_formitem(IndexTuple itup) { - int nbytes_btitem; - BTItem btitem; - Size tuplen; - extern Oid newoid(); - - /* see comments in btbuild - - if (itup->t_info & INDEX_NULL_MASK) - elog(WARN, "btree indices cannot include null keys"); - */ - - /* make a copy of the index tuple with room for the sequence number */ - tuplen = IndexTupleSize(itup); - nbytes_btitem = tuplen + - (sizeof(BTItemData) - sizeof(IndexTupleData)); - - btitem = (BTItem) palloc(nbytes_btitem); - memmove((char *) &(btitem->bti_itup), (char *) itup, tuplen); - + int nbytes_btitem; + BTItem btitem; + Size tuplen; + extern Oid newoid(); + + /* + * see comments in btbuild + * + * if (itup->t_info & INDEX_NULL_MASK) elog(WARN, "btree indices cannot + * include null keys"); + */ + + /* make a copy of the index tuple with room for the sequence number */ + tuplen = IndexTupleSize(itup); + nbytes_btitem = tuplen + + (sizeof(BTItemData) - sizeof(IndexTupleData)); + + btitem = (BTItem) palloc(nbytes_btitem); + memmove((char *) &(btitem->bti_itup), (char *) itup, tuplen); + #ifndef BTREE_VERSION_1 - btitem->bti_oid = newoid(); + btitem->bti_oid = newoid(); #endif - return (btitem); + return (btitem); } #ifdef NOT_USED bool _bt_checkqual(IndexScanDesc scan, IndexTuple itup) { - BTScanOpaque so; - - so = (BTScanOpaque) scan->opaque; - if (so->numberOfKeys > 0) - return (index_keytest(itup, RelationGetTupleDescriptor(scan->relation), - so->numberOfKeys, so->keyData)); - else - return (true); + BTScanOpaque so; + + so = (BTScanOpaque) scan->opaque; + if (so->numberOfKeys > 0) + return (index_keytest(itup, RelationGetTupleDescriptor(scan->relation), + so->numberOfKeys, so->keyData)); + else + return (true); } + #endif #ifdef NOT_USED bool _bt_checkforkeys(IndexScanDesc scan, IndexTuple itup, Size keysz) { - BTScanOpaque so; - - so = (BTScanOpaque) scan->opaque; - if ( keysz > 0 && so->numberOfKeys >= keysz ) - return (index_keytest(itup, RelationGetTupleDescriptor(scan->relation), - keysz, so->keyData)); - else - return (true); + BTScanOpaque so; + + so = (BTScanOpaque) scan->opaque; + if (keysz > 0 && so->numberOfKeys >= keysz) + return (index_keytest(itup, RelationGetTupleDescriptor(scan->relation), + keysz, so->keyData)); + else + return (true); } + #endif bool -_bt_checkkeys (IndexScanDesc scan, IndexTuple tuple, Size *keysok) +_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size * keysok) { - BTScanOpaque so = (BTScanOpaque) scan->opaque; - Size keysz = so->numberOfKeys; - TupleDesc tupdesc; - ScanKey key; - Datum datum; - bool isNull; - int test; - - *keysok = 0; - if ( keysz == 0 ) - return (true); - - key = so->keyData; - tupdesc = RelationGetTupleDescriptor(scan->relation); - - IncrIndexProcessed(); - - while (keysz > 0) - { - datum = index_getattr(tuple, - key[0].sk_attno, - tupdesc, - &isNull); - - /* btree doesn't support 'A is null' clauses, yet */ - if ( isNull || key[0].sk_flags & SK_ISNULL ) + BTScanOpaque so = (BTScanOpaque) scan->opaque; + Size keysz = so->numberOfKeys; + TupleDesc tupdesc; + ScanKey key; + Datum datum; + bool isNull; + int test; + + *keysok = 0; + if (keysz == 0) + return (true); + + key = so->keyData; + tupdesc = RelationGetTupleDescriptor(scan->relation); + + IncrIndexProcessed(); + + while (keysz > 0) { - return (false); - } + datum = index_getattr(tuple, + key[0].sk_attno, + tupdesc, + &isNull); - if (key[0].sk_flags & SK_COMMUTE) { - test = (int) (*(key[0].sk_func)) - (DatumGetPointer(key[0].sk_argument), - datum); - } else { - test = (int) (*(key[0].sk_func)) - (datum, - DatumGetPointer(key[0].sk_argument)); - } - - if (!test == !(key[0].sk_flags & SK_NEGATE)) { - return (false); + /* btree doesn't support 'A is null' clauses, yet */ + if (isNull || key[0].sk_flags & SK_ISNULL) + { + return (false); + } + + if (key[0].sk_flags & SK_COMMUTE) + { + test = (int) (*(key[0].sk_func)) + (DatumGetPointer(key[0].sk_argument), + datum); + } + else + { + test = (int) (*(key[0].sk_func)) + (datum, + DatumGetPointer(key[0].sk_argument)); + } + + if (!test == !(key[0].sk_flags & SK_NEGATE)) + { + return (false); + } + + keysz -= 1; + key++; + (*keysok)++; } - - keysz -= 1; - key++; - (*keysok)++; - } - - return (true); + + return (true); } diff --git a/src/backend/access/rtree/rtget.c b/src/backend/access/rtree/rtget.c index 09f10f1aa9..eaf16c1ae9 100644 --- a/src/backend/access/rtree/rtget.c +++ b/src/backend/access/rtree/rtget.c @@ -1,19 +1,19 @@ /*------------------------------------------------------------------------- * * rtget.c-- - * fetch tuples from an rtree scan. + * fetch tuples from an rtree scan. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtget.c,v 1.7 1996/11/21 06:13:43 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtget.c,v 1.8 1997/09/07 04:39:11 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <storage/bufmgr.h> #include <access/sdir.h> #include <access/relscan.h> @@ -21,14 +21,15 @@ #include <access/rtree.h> #include <storage/bufpage.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -static OffsetNumber findnext(IndexScanDesc s, Page p, OffsetNumber n, - ScanDirection dir); +static OffsetNumber +findnext(IndexScanDesc s, Page p, OffsetNumber n, + ScanDirection dir); static RetrieveIndexResult rtscancache(IndexScanDesc s, ScanDirection dir); static RetrieveIndexResult rtfirst(IndexScanDesc s, ScanDirection dir); static RetrieveIndexResult rtnext(IndexScanDesc s, ScanDirection dir); @@ -38,278 +39,315 @@ static ItemPointer rtheapptr(Relation r, ItemPointer itemp); RetrieveIndexResult rtgettuple(IndexScanDesc s, ScanDirection dir) { - RetrieveIndexResult res; - - /* if we have it cached in the scan desc, just return the value */ - if ((res = rtscancache(s, dir)) != (RetrieveIndexResult) NULL) + RetrieveIndexResult res; + + /* if we have it cached in the scan desc, just return the value */ + if ((res = rtscancache(s, dir)) != (RetrieveIndexResult) NULL) + return (res); + + /* not cached, so we'll have to do some work */ + if (ItemPointerIsValid(&(s->currentItemData))) + { + res = rtnext(s, dir); + } + else + { + res = rtfirst(s, dir); + } return (res); - - /* not cached, so we'll have to do some work */ - if (ItemPointerIsValid(&(s->currentItemData))) { - res = rtnext(s, dir); - } else { - res = rtfirst(s, dir); - } - return (res); } -static RetrieveIndexResult +static RetrieveIndexResult rtfirst(IndexScanDesc s, ScanDirection dir) { - Buffer b; - Page p; - OffsetNumber n; - OffsetNumber maxoff; - RetrieveIndexResult res; - RTreePageOpaque po; - RTreeScanOpaque so; - RTSTACK *stk; - BlockNumber blk; - IndexTuple it; - - b = ReadBuffer(s->relation, P_ROOT); - p = BufferGetPage(b); - po = (RTreePageOpaque) PageGetSpecialPointer(p); - so = (RTreeScanOpaque) s->opaque; - - for (;;) { - maxoff = PageGetMaxOffsetNumber(p); - if (ScanDirectionIsBackward(dir)) - n = findnext(s, p, maxoff, dir); - else - n = findnext(s, p, FirstOffsetNumber, dir); - - while (n < FirstOffsetNumber || n > maxoff) { - - ReleaseBuffer(b); - if (so->s_stack == (RTSTACK *) NULL) - return ((RetrieveIndexResult) NULL); - - stk = so->s_stack; - b = ReadBuffer(s->relation, stk->rts_blk); - p = BufferGetPage(b); - po = (RTreePageOpaque) PageGetSpecialPointer(p); - maxoff = PageGetMaxOffsetNumber(p); - - if (ScanDirectionIsBackward(dir)) { - n = OffsetNumberPrev(stk->rts_child); - } else { - n = OffsetNumberNext(stk->rts_child); - } - so->s_stack = stk->rts_parent; - pfree(stk); - - n = findnext(s, p, n, dir); - } - if (po->flags & F_LEAF) { - ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n); - - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - - res = FormRetrieveIndexResult(&(s->currentItemData), &(it->t_tid)); - - ReleaseBuffer(b); - return (res); - } else { - stk = (RTSTACK *) palloc(sizeof(RTSTACK)); - stk->rts_child = n; - stk->rts_blk = BufferGetBlockNumber(b); - stk->rts_parent = so->s_stack; - so->s_stack = stk; - - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - blk = ItemPointerGetBlockNumber(&(it->t_tid)); - - ReleaseBuffer(b); - b = ReadBuffer(s->relation, blk); - p = BufferGetPage(b); - po = (RTreePageOpaque) PageGetSpecialPointer(p); + Buffer b; + Page p; + OffsetNumber n; + OffsetNumber maxoff; + RetrieveIndexResult res; + RTreePageOpaque po; + RTreeScanOpaque so; + RTSTACK *stk; + BlockNumber blk; + IndexTuple it; + + b = ReadBuffer(s->relation, P_ROOT); + p = BufferGetPage(b); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + so = (RTreeScanOpaque) s->opaque; + + for (;;) + { + maxoff = PageGetMaxOffsetNumber(p); + if (ScanDirectionIsBackward(dir)) + n = findnext(s, p, maxoff, dir); + else + n = findnext(s, p, FirstOffsetNumber, dir); + + while (n < FirstOffsetNumber || n > maxoff) + { + + ReleaseBuffer(b); + if (so->s_stack == (RTSTACK *) NULL) + return ((RetrieveIndexResult) NULL); + + stk = so->s_stack; + b = ReadBuffer(s->relation, stk->rts_blk); + p = BufferGetPage(b); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + maxoff = PageGetMaxOffsetNumber(p); + + if (ScanDirectionIsBackward(dir)) + { + n = OffsetNumberPrev(stk->rts_child); + } + else + { + n = OffsetNumberNext(stk->rts_child); + } + so->s_stack = stk->rts_parent; + pfree(stk); + + n = findnext(s, p, n, dir); + } + if (po->flags & F_LEAF) + { + ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n); + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + + res = FormRetrieveIndexResult(&(s->currentItemData), &(it->t_tid)); + + ReleaseBuffer(b); + return (res); + } + else + { + stk = (RTSTACK *) palloc(sizeof(RTSTACK)); + stk->rts_child = n; + stk->rts_blk = BufferGetBlockNumber(b); + stk->rts_parent = so->s_stack; + so->s_stack = stk; + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + blk = ItemPointerGetBlockNumber(&(it->t_tid)); + + ReleaseBuffer(b); + b = ReadBuffer(s->relation, blk); + p = BufferGetPage(b); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + } } - } } -static RetrieveIndexResult +static RetrieveIndexResult rtnext(IndexScanDesc s, ScanDirection dir) { - Buffer b; - Page p; - OffsetNumber n; - OffsetNumber maxoff; - RetrieveIndexResult res; - RTreePageOpaque po; - RTreeScanOpaque so; - RTSTACK *stk; - BlockNumber blk; - IndexTuple it; - - blk = ItemPointerGetBlockNumber(&(s->currentItemData)); - n = ItemPointerGetOffsetNumber(&(s->currentItemData)); - - if (ScanDirectionIsForward(dir)) { - n = OffsetNumberNext(n); - } else { - n = OffsetNumberPrev(n); - } - - b = ReadBuffer(s->relation, blk); - p = BufferGetPage(b); - po = (RTreePageOpaque) PageGetSpecialPointer(p); - so = (RTreeScanOpaque) s->opaque; - - for (;;) { - maxoff = PageGetMaxOffsetNumber(p); - n = findnext(s, p, n, dir); - - while (n < FirstOffsetNumber || n > maxoff) { - - ReleaseBuffer(b); - if (so->s_stack == (RTSTACK *) NULL) - return ((RetrieveIndexResult) NULL); - - stk = so->s_stack; - b = ReadBuffer(s->relation, stk->rts_blk); - p = BufferGetPage(b); - maxoff = PageGetMaxOffsetNumber(p); - po = (RTreePageOpaque) PageGetSpecialPointer(p); - - if (ScanDirectionIsBackward(dir)) { - n = OffsetNumberPrev(stk->rts_child); - } else { - n = OffsetNumberNext(stk->rts_child); - } - so->s_stack = stk->rts_parent; - pfree(stk); - - n = findnext(s, p, n, dir); + Buffer b; + Page p; + OffsetNumber n; + OffsetNumber maxoff; + RetrieveIndexResult res; + RTreePageOpaque po; + RTreeScanOpaque so; + RTSTACK *stk; + BlockNumber blk; + IndexTuple it; + + blk = ItemPointerGetBlockNumber(&(s->currentItemData)); + n = ItemPointerGetOffsetNumber(&(s->currentItemData)); + + if (ScanDirectionIsForward(dir)) + { + n = OffsetNumberNext(n); + } + else + { + n = OffsetNumberPrev(n); } - if (po->flags & F_LEAF) { - ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n); - - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - - res = FormRetrieveIndexResult(&(s->currentItemData), &(it->t_tid)); - - ReleaseBuffer(b); - return (res); - } else { - stk = (RTSTACK *) palloc(sizeof(RTSTACK)); - stk->rts_child = n; - stk->rts_blk = BufferGetBlockNumber(b); - stk->rts_parent = so->s_stack; - so->s_stack = stk; - - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - blk = ItemPointerGetBlockNumber(&(it->t_tid)); - - ReleaseBuffer(b); - b = ReadBuffer(s->relation, blk); - p = BufferGetPage(b); - po = (RTreePageOpaque) PageGetSpecialPointer(p); - - if (ScanDirectionIsBackward(dir)) { - n = PageGetMaxOffsetNumber(p); - } else { - n = FirstOffsetNumber; - } + + b = ReadBuffer(s->relation, blk); + p = BufferGetPage(b); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + so = (RTreeScanOpaque) s->opaque; + + for (;;) + { + maxoff = PageGetMaxOffsetNumber(p); + n = findnext(s, p, n, dir); + + while (n < FirstOffsetNumber || n > maxoff) + { + + ReleaseBuffer(b); + if (so->s_stack == (RTSTACK *) NULL) + return ((RetrieveIndexResult) NULL); + + stk = so->s_stack; + b = ReadBuffer(s->relation, stk->rts_blk); + p = BufferGetPage(b); + maxoff = PageGetMaxOffsetNumber(p); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + + if (ScanDirectionIsBackward(dir)) + { + n = OffsetNumberPrev(stk->rts_child); + } + else + { + n = OffsetNumberNext(stk->rts_child); + } + so->s_stack = stk->rts_parent; + pfree(stk); + + n = findnext(s, p, n, dir); + } + if (po->flags & F_LEAF) + { + ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n); + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + + res = FormRetrieveIndexResult(&(s->currentItemData), &(it->t_tid)); + + ReleaseBuffer(b); + return (res); + } + else + { + stk = (RTSTACK *) palloc(sizeof(RTSTACK)); + stk->rts_child = n; + stk->rts_blk = BufferGetBlockNumber(b); + stk->rts_parent = so->s_stack; + so->s_stack = stk; + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + blk = ItemPointerGetBlockNumber(&(it->t_tid)); + + ReleaseBuffer(b); + b = ReadBuffer(s->relation, blk); + p = BufferGetPage(b); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + + if (ScanDirectionIsBackward(dir)) + { + n = PageGetMaxOffsetNumber(p); + } + else + { + n = FirstOffsetNumber; + } + } } - } } -static OffsetNumber +static OffsetNumber findnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir) { - OffsetNumber maxoff; - IndexTuple it; - RTreePageOpaque po; - RTreeScanOpaque so; - - maxoff = PageGetMaxOffsetNumber(p); - po = (RTreePageOpaque) PageGetSpecialPointer(p); - so = (RTreeScanOpaque) s->opaque; - - /* - * If we modified the index during the scan, we may have a pointer to - * a ghost tuple, before the scan. If this is the case, back up one. - */ - - if (so->s_flags & RTS_CURBEFORE) { - so->s_flags &= ~RTS_CURBEFORE; - n = OffsetNumberPrev(n); - } - - while (n >= FirstOffsetNumber && n <= maxoff) { - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - if (po->flags & F_LEAF) { - if (index_keytest(it, - RelationGetTupleDescriptor(s->relation), - s->numberOfKeys, s->keyData)) - break; - } else { - if (index_keytest(it, - RelationGetTupleDescriptor(s->relation), - so->s_internalNKey, so->s_internalKey)) - break; + OffsetNumber maxoff; + IndexTuple it; + RTreePageOpaque po; + RTreeScanOpaque so; + + maxoff = PageGetMaxOffsetNumber(p); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + so = (RTreeScanOpaque) s->opaque; + + /* + * If we modified the index during the scan, we may have a pointer to + * a ghost tuple, before the scan. If this is the case, back up one. + */ + + if (so->s_flags & RTS_CURBEFORE) + { + so->s_flags &= ~RTS_CURBEFORE; + n = OffsetNumberPrev(n); } - - if (ScanDirectionIsBackward(dir)) { - n = OffsetNumberPrev(n); - } else { - n = OffsetNumberNext(n); + + while (n >= FirstOffsetNumber && n <= maxoff) + { + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + if (po->flags & F_LEAF) + { + if (index_keytest(it, + RelationGetTupleDescriptor(s->relation), + s->numberOfKeys, s->keyData)) + break; + } + else + { + if (index_keytest(it, + RelationGetTupleDescriptor(s->relation), + so->s_internalNKey, so->s_internalKey)) + break; + } + + if (ScanDirectionIsBackward(dir)) + { + n = OffsetNumberPrev(n); + } + else + { + n = OffsetNumberNext(n); + } } - } - - return (n); + + return (n); } -static RetrieveIndexResult +static RetrieveIndexResult rtscancache(IndexScanDesc s, ScanDirection dir) { - RetrieveIndexResult res; - ItemPointer ip; - - if (!(ScanDirectionIsNoMovement(dir) - && ItemPointerIsValid(&(s->currentItemData)))) { - - return ((RetrieveIndexResult) NULL); - } - - ip = rtheapptr(s->relation, &(s->currentItemData)); - - if (ItemPointerIsValid(ip)) - res = FormRetrieveIndexResult(&(s->currentItemData), ip); - else - res = (RetrieveIndexResult) NULL; - - pfree (ip); - - return (res); + RetrieveIndexResult res; + ItemPointer ip; + + if (!(ScanDirectionIsNoMovement(dir) + && ItemPointerIsValid(&(s->currentItemData)))) + { + + return ((RetrieveIndexResult) NULL); + } + + ip = rtheapptr(s->relation, &(s->currentItemData)); + + if (ItemPointerIsValid(ip)) + res = FormRetrieveIndexResult(&(s->currentItemData), ip); + else + res = (RetrieveIndexResult) NULL; + + pfree(ip); + + return (res); } /* - * rtheapptr returns the item pointer to the tuple in the heap relation - * for which itemp is the index relation item pointer. + * rtheapptr returns the item pointer to the tuple in the heap relation + * for which itemp is the index relation item pointer. */ -static ItemPointer +static ItemPointer rtheapptr(Relation r, ItemPointer itemp) { - Buffer b; - Page p; - IndexTuple it; - ItemPointer ip; - OffsetNumber n; - - ip = (ItemPointer) palloc(sizeof(ItemPointerData)); - if (ItemPointerIsValid(itemp)) { - b = ReadBuffer(r, ItemPointerGetBlockNumber(itemp)); - p = BufferGetPage(b); - n = ItemPointerGetOffsetNumber(itemp); - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - memmove((char *) ip, (char *) &(it->t_tid), - sizeof(ItemPointerData)); - ReleaseBuffer(b); - } else { - ItemPointerSetInvalid(ip); - } - - return (ip); + Buffer b; + Page p; + IndexTuple it; + ItemPointer ip; + OffsetNumber n; + + ip = (ItemPointer) palloc(sizeof(ItemPointerData)); + if (ItemPointerIsValid(itemp)) + { + b = ReadBuffer(r, ItemPointerGetBlockNumber(itemp)); + p = BufferGetPage(b); + n = ItemPointerGetOffsetNumber(itemp); + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + memmove((char *) ip, (char *) &(it->t_tid), + sizeof(ItemPointerData)); + ReleaseBuffer(b); + } + else + { + ItemPointerSetInvalid(ip); + } + + return (ip); } diff --git a/src/backend/access/rtree/rtproc.c b/src/backend/access/rtree/rtproc.c index ac7a3abfec..4b7a9f2a26 100644 --- a/src/backend/access/rtree/rtproc.c +++ b/src/backend/access/rtree/rtproc.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * rtproc.c-- - * pg_amproc entries for rtrees. + * pg_amproc entries for rtrees. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtproc.c,v 1.7 1997/04/22 17:31:23 scrappy Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtproc.c,v 1.8 1997/09/07 04:39:16 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -17,136 +17,139 @@ #include <utils/builtins.h> #include <utils/geo_decls.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif BOX -*rt_box_union(BOX *a, BOX *b) +* rt_box_union(BOX * a, BOX * b) { - BOX *n; - - if ((n = (BOX *) palloc(sizeof (*n))) == (BOX *) NULL) - elog(WARN, "Cannot allocate box for union"); - - n->high.x = Max(a->high.x, b->high.x); - n->high.y = Max(a->high.y, b->high.y); - n->low.x = Min(a->low.x, b->low.x); - n->low.y = Min(a->low.y, b->low.y); - - return (n); + BOX *n; + + if ((n = (BOX *) palloc(sizeof(*n))) == (BOX *) NULL) + elog(WARN, "Cannot allocate box for union"); + + n->high.x = Max(a->high.x, b->high.x); + n->high.y = Max(a->high.y, b->high.y); + n->low.x = Min(a->low.x, b->low.x); + n->low.y = Min(a->low.y, b->low.y); + + return (n); } -BOX * -rt_box_inter(BOX *a, BOX *b) +BOX * +rt_box_inter(BOX * a, BOX * b) { - BOX *n; - - if ((n = (BOX *) palloc(sizeof (*n))) == (BOX *) NULL) - elog(WARN, "Cannot allocate box for union"); - - n->high.x = Min(a->high.x, b->high.x); - n->high.y = Min(a->high.y, b->high.y); - n->low.x = Max(a->low.x, b->low.x); - n->low.y = Max(a->low.y, b->low.y); - - if (n->high.x < n->low.x || n->high.y < n->low.y) { - pfree(n); - return ((BOX *) NULL); - } - - return (n); + BOX *n; + + if ((n = (BOX *) palloc(sizeof(*n))) == (BOX *) NULL) + elog(WARN, "Cannot allocate box for union"); + + n->high.x = Min(a->high.x, b->high.x); + n->high.y = Min(a->high.y, b->high.y); + n->low.x = Max(a->low.x, b->low.x); + n->low.y = Max(a->low.y, b->low.y); + + if (n->high.x < n->low.x || n->high.y < n->low.y) + { + pfree(n); + return ((BOX *) NULL); + } + + return (n); } void -rt_box_size(BOX *a, float *size) +rt_box_size(BOX * a, float *size) { - if (a == (BOX *) NULL || a->high.x <= a->low.x || a->high.y <= a->low.y) - *size = 0.0; - else - *size = (float) ((a->high.x - a->low.x) * (a->high.y - a->low.y)); - - return; + if (a == (BOX *) NULL || a->high.x <= a->low.x || a->high.y <= a->low.y) + *size = 0.0; + else + *size = (float) ((a->high.x - a->low.x) * (a->high.y - a->low.y)); + + return; } /* - * rt_bigbox_size() -- Compute a size for big boxes. + * rt_bigbox_size() -- Compute a size for big boxes. * - * In an earlier release of the system, this routine did something - * different from rt_box_size. We now use floats, rather than ints, - * as the return type for the size routine, so we no longer need to - * have a special return type for big boxes. + * In an earlier release of the system, this routine did something + * different from rt_box_size. We now use floats, rather than ints, + * as the return type for the size routine, so we no longer need to + * have a special return type for big boxes. */ void -rt_bigbox_size(BOX *a, float *size) +rt_bigbox_size(BOX * a, float *size) { - rt_box_size(a, size); + rt_box_size(a, size); } -POLYGON * -rt_poly_union(POLYGON *a, POLYGON *b) +POLYGON * +rt_poly_union(POLYGON * a, POLYGON * b) { - POLYGON *p; - - p = (POLYGON *)PALLOCTYPE(POLYGON); - - if (!PointerIsValid(p)) - elog(WARN, "Cannot allocate polygon for union"); - - memset((char *) p, 0, sizeof(POLYGON)); /* zero any holes */ - p->size = sizeof(POLYGON); - p->npts = 0; - p->boundbox.high.x = Max(a->boundbox.high.x, b->boundbox.high.x); - p->boundbox.high.y = Max(a->boundbox.high.y, b->boundbox.high.y); - p->boundbox.low.x = Min(a->boundbox.low.x, b->boundbox.low.x); - p->boundbox.low.y = Min(a->boundbox.low.y, b->boundbox.low.y); - return p; + POLYGON *p; + + p = (POLYGON *) PALLOCTYPE(POLYGON); + + if (!PointerIsValid(p)) + elog(WARN, "Cannot allocate polygon for union"); + + memset((char *) p, 0, sizeof(POLYGON)); /* zero any holes */ + p->size = sizeof(POLYGON); + p->npts = 0; + p->boundbox.high.x = Max(a->boundbox.high.x, b->boundbox.high.x); + p->boundbox.high.y = Max(a->boundbox.high.y, b->boundbox.high.y); + p->boundbox.low.x = Min(a->boundbox.low.x, b->boundbox.low.x); + p->boundbox.low.y = Min(a->boundbox.low.y, b->boundbox.low.y); + return p; } void -rt_poly_size(POLYGON *a, float *size) +rt_poly_size(POLYGON * a, float *size) { - double xdim, ydim; - - size = (float *) palloc(sizeof(float)); - if (a == (POLYGON *) NULL || - a->boundbox.high.x <= a->boundbox.low.x || - a->boundbox.high.y <= a->boundbox.low.y) - *size = 0.0; - else { - xdim = (a->boundbox.high.x - a->boundbox.low.x); - ydim = (a->boundbox.high.y - a->boundbox.low.y); - - *size = (float) (xdim * ydim); - } - - return; + double xdim, + ydim; + + size = (float *) palloc(sizeof(float)); + if (a == (POLYGON *) NULL || + a->boundbox.high.x <= a->boundbox.low.x || + a->boundbox.high.y <= a->boundbox.low.y) + *size = 0.0; + else + { + xdim = (a->boundbox.high.x - a->boundbox.low.x); + ydim = (a->boundbox.high.y - a->boundbox.low.y); + + *size = (float) (xdim * ydim); + } + + return; } -POLYGON * -rt_poly_inter(POLYGON *a, POLYGON *b) +POLYGON * +rt_poly_inter(POLYGON * a, POLYGON * b) { - POLYGON *p; - - p = (POLYGON *) PALLOCTYPE(POLYGON); - - if (!PointerIsValid(p)) - elog(WARN, "Cannot allocate polygon for intersection"); - - memset((char *) p, 0, sizeof(POLYGON)); /* zero any holes */ - p->size = sizeof(POLYGON); - p->npts = 0; - p->boundbox.high.x = Min(a->boundbox.high.x, b->boundbox.high.x); - p->boundbox.high.y = Min(a->boundbox.high.y, b->boundbox.high.y); - p->boundbox.low.x = Max(a->boundbox.low.x, b->boundbox.low.x); - p->boundbox.low.y = Max(a->boundbox.low.y, b->boundbox.low.y); - - if (p->boundbox.high.x < p->boundbox.low.x || p->boundbox.high.y < p->boundbox.low.y) + POLYGON *p; + + p = (POLYGON *) PALLOCTYPE(POLYGON); + + if (!PointerIsValid(p)) + elog(WARN, "Cannot allocate polygon for intersection"); + + memset((char *) p, 0, sizeof(POLYGON)); /* zero any holes */ + p->size = sizeof(POLYGON); + p->npts = 0; + p->boundbox.high.x = Min(a->boundbox.high.x, b->boundbox.high.x); + p->boundbox.high.y = Min(a->boundbox.high.y, b->boundbox.high.y); + p->boundbox.low.x = Max(a->boundbox.low.x, b->boundbox.low.x); + p->boundbox.low.y = Max(a->boundbox.low.y, b->boundbox.low.y); + + if (p->boundbox.high.x < p->boundbox.low.x || p->boundbox.high.y < p->boundbox.low.y) { - pfree(p); - return ((POLYGON *) NULL); + pfree(p); + return ((POLYGON *) NULL); } - - return (p); + + return (p); } diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c index 4cd0580c97..ae92ea2013 100644 --- a/src/backend/access/rtree/rtree.c +++ b/src/backend/access/rtree/rtree.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * rtree.c-- - * interface routines for the postgres rtree indexed access method. + * interface routines for the postgres rtree indexed access method. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.13 1997/08/12 22:51:54 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.14 1997/09/07 04:39:22 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -27,886 +27,983 @@ #include <storage/bufpage.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif -typedef struct SPLITVEC { - OffsetNumber *spl_left; - int spl_nleft; - char *spl_ldatum; - OffsetNumber *spl_right; - int spl_nright; - char *spl_rdatum; -} SPLITVEC; - -typedef struct RTSTATE { - func_ptr unionFn; /* union function */ - func_ptr sizeFn; /* size function */ - func_ptr interFn; /* intersection function */ -} RTSTATE; +typedef struct SPLITVEC +{ + OffsetNumber *spl_left; + int spl_nleft; + char *spl_ldatum; + OffsetNumber *spl_right; + int spl_nright; + char *spl_rdatum; +} SPLITVEC; + +typedef struct RTSTATE +{ + func_ptr unionFn; /* union function */ + func_ptr sizeFn; /* size function */ + func_ptr interFn; /* intersection function */ +} RTSTATE; /* non-export function prototypes */ -static InsertIndexResult rtdoinsert(Relation r, IndexTuple itup, - RTSTATE *rtstate); -static void rttighten(Relation r, RTSTACK *stk, char *datum, int att_size, - RTSTATE *rtstate); -static InsertIndexResult dosplit(Relation r, Buffer buffer, RTSTACK *stack, - IndexTuple itup, RTSTATE *rtstate); -static void rtintinsert(Relation r, RTSTACK *stk, IndexTuple ltup, - IndexTuple rtup, RTSTATE *rtstate); -static void rtnewroot(Relation r, IndexTuple lt, IndexTuple rt); -static void picksplit(Relation r, Page page, SPLITVEC *v, IndexTuple itup, - RTSTATE *rtstate); -static void RTInitBuffer(Buffer b, uint32 f); -static OffsetNumber choose(Relation r, Page p, IndexTuple it, - RTSTATE *rtstate); -static int nospace(Page p, IndexTuple it); -static void initRtstate(RTSTATE *rtstate, Relation index); +static InsertIndexResult +rtdoinsert(Relation r, IndexTuple itup, + RTSTATE * rtstate); +static void +rttighten(Relation r, RTSTACK * stk, char *datum, int att_size, + RTSTATE * rtstate); +static InsertIndexResult +dosplit(Relation r, Buffer buffer, RTSTACK * stack, + IndexTuple itup, RTSTATE * rtstate); +static void +rtintinsert(Relation r, RTSTACK * stk, IndexTuple ltup, + IndexTuple rtup, RTSTATE * rtstate); +static void rtnewroot(Relation r, IndexTuple lt, IndexTuple rt); +static void +picksplit(Relation r, Page page, SPLITVEC * v, IndexTuple itup, + RTSTATE * rtstate); +static void RTInitBuffer(Buffer b, uint32 f); +static OffsetNumber +choose(Relation r, Page p, IndexTuple it, + RTSTATE * rtstate); +static int nospace(Page p, IndexTuple it); +static void initRtstate(RTSTATE * rtstate, Relation index); void rtbuild(Relation heap, - Relation index, - int natts, - AttrNumber *attnum, - IndexStrategy istrat, - uint16 pcount, - Datum *params, - FuncIndexInfo *finfo, - PredInfo *predInfo) + Relation index, + int natts, + AttrNumber * attnum, + IndexStrategy istrat, + uint16 pcount, + Datum * params, + FuncIndexInfo * finfo, + PredInfo * predInfo) { - HeapScanDesc scan; - Buffer buffer; - AttrNumber i; - HeapTuple htup; - IndexTuple itup; - TupleDesc hd, id; - InsertIndexResult res; - Datum *d; - bool *nulls; - int nb, nh, ni; + HeapScanDesc scan; + Buffer buffer; + AttrNumber i; + HeapTuple htup; + IndexTuple itup; + TupleDesc hd, + id; + InsertIndexResult res; + Datum *d; + bool *nulls; + int nb, + nh, + ni; + #ifndef OMIT_PARTIAL_INDEX - ExprContext *econtext; - TupleTable tupleTable; - TupleTableSlot *slot; + ExprContext *econtext; + TupleTable tupleTable; + TupleTableSlot *slot; + #endif - Oid hrelid, irelid; - Node *pred, *oldPred; - RTSTATE rtState; - - initRtstate(&rtState, index); - - /* rtrees only know how to do stupid locking now */ - RelationSetLockForWrite(index); - - pred = predInfo->pred; - oldPred = predInfo->oldPred; - - /* - * We expect to be called exactly once for any index relation. - * If that's not the case, big trouble's what we have. - */ - - if (oldPred == NULL && (nb = RelationGetNumberOfBlocks(index)) != 0) - elog(WARN, "%s already contains data", index->rd_rel->relname.data); - - /* initialize the root page (if this is a new index) */ - if (oldPred == NULL) { - buffer = ReadBuffer(index, P_NEW); - RTInitBuffer(buffer, F_LEAF); - WriteBuffer(buffer); - } - - /* init the tuple descriptors and get set for a heap scan */ - hd = RelationGetTupleDescriptor(heap); - id = RelationGetTupleDescriptor(index); - d = (Datum *)palloc(natts * sizeof (*d)); - nulls = (bool *)palloc(natts * sizeof (*nulls)); - - /* - * If this is a predicate (partial) index, we will need to evaluate the - * predicate using ExecQual, which requires the current tuple to be in a - * slot of a TupleTable. In addition, ExecQual must have an ExprContext - * referring to that slot. Here, we initialize dummy TupleTable and - * ExprContext objects for this purpose. --Nels, Feb '92 - */ + Oid hrelid, + irelid; + Node *pred, + *oldPred; + RTSTATE rtState; + + initRtstate(&rtState, index); + + /* rtrees only know how to do stupid locking now */ + RelationSetLockForWrite(index); + + pred = predInfo->pred; + oldPred = predInfo->oldPred; + + /* + * We expect to be called exactly once for any index relation. If + * that's not the case, big trouble's what we have. + */ + + if (oldPred == NULL && (nb = RelationGetNumberOfBlocks(index)) != 0) + elog(WARN, "%s already contains data", index->rd_rel->relname.data); + + /* initialize the root page (if this is a new index) */ + if (oldPred == NULL) + { + buffer = ReadBuffer(index, P_NEW); + RTInitBuffer(buffer, F_LEAF); + WriteBuffer(buffer); + } + + /* init the tuple descriptors and get set for a heap scan */ + hd = RelationGetTupleDescriptor(heap); + id = RelationGetTupleDescriptor(index); + d = (Datum *) palloc(natts * sizeof(*d)); + nulls = (bool *) palloc(natts * sizeof(*nulls)); + + /* + * If this is a predicate (partial) index, we will need to evaluate + * the predicate using ExecQual, which requires the current tuple to + * be in a slot of a TupleTable. In addition, ExecQual must have an + * ExprContext referring to that slot. Here, we initialize dummy + * TupleTable and ExprContext objects for this purpose. --Nels, Feb + * '92 + */ #ifndef OMIT_PARTIAL_INDEX - if (pred != NULL || oldPred != NULL) { - tupleTable = ExecCreateTupleTable(1); - slot = ExecAllocTableSlot(tupleTable); - econtext = makeNode(ExprContext); - FillDummyExprContext(econtext, slot, hd, buffer); - } + if (pred != NULL || oldPred != NULL) + { + tupleTable = ExecCreateTupleTable(1); + slot = ExecAllocTableSlot(tupleTable); + econtext = makeNode(ExprContext); + FillDummyExprContext(econtext, slot, hd, buffer); + } else { econtext = NULL; tupleTable = NULL; slot = NULL; } -#endif /* OMIT_PARTIAL_INDEX */ - scan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); - htup = heap_getnext(scan, 0, &buffer); - - /* count the tuples as we insert them */ - nh = ni = 0; - - for (; HeapTupleIsValid(htup); htup = heap_getnext(scan, 0, &buffer)) { - - nh++; - - /* - * If oldPred != NULL, this is an EXTEND INDEX command, so skip - * this tuple if it was already in the existing partial index - */ - if (oldPred != NULL) { +#endif /* OMIT_PARTIAL_INDEX */ + scan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); + htup = heap_getnext(scan, 0, &buffer); + + /* count the tuples as we insert them */ + nh = ni = 0; + + for (; HeapTupleIsValid(htup); htup = heap_getnext(scan, 0, &buffer)) + { + + nh++; + + /* + * If oldPred != NULL, this is an EXTEND INDEX command, so skip + * this tuple if it was already in the existing partial index + */ + if (oldPred != NULL) + { #ifndef OMIT_PARTIAL_INDEX - /*SetSlotContents(slot, htup); */ - slot->val = htup; - if (ExecQual((List*)oldPred, econtext) == true) { + /* SetSlotContents(slot, htup); */ + slot->val = htup; + if (ExecQual((List *) oldPred, econtext) == true) + { + ni++; + continue; + } +#endif /* OMIT_PARTIAL_INDEX */ + } + + /* + * Skip this tuple if it doesn't satisfy the partial-index + * predicate + */ + if (pred != NULL) + { +#ifndef OMIT_PARTIAL_INDEX + /* SetSlotContents(slot, htup); */ + slot->val = htup; + if (ExecQual((List *) pred, econtext) == false) + continue; +#endif /* OMIT_PARTIAL_INDEX */ + } + ni++; - continue; - } -#endif /* OMIT_PARTIAL_INDEX */ + + /* + * For the current heap tuple, extract all the attributes we use + * in this index, and note which are null. + */ + + for (i = 1; i <= natts; i++) + { + int attoff; + bool attnull; + + /* + * Offsets are from the start of the tuple, and are + * zero-based; indices are one-based. The next call returns i + * - 1. That's data hiding for you. + */ + + attoff = AttrNumberGetAttrOffset(i); + + /* + * d[attoff] = HeapTupleGetAttributeValue(htup, buffer, + */ + d[attoff] = GetIndexValue(htup, + hd, + attoff, + attnum, + finfo, + &attnull, + buffer); + nulls[attoff] = (attnull ? 'n' : ' '); + } + + /* form an index tuple and point it at the heap tuple */ + itup = index_formtuple(id, &d[0], nulls); + itup->t_tid = htup->t_ctid; + + /* + * Since we already have the index relation locked, we call + * rtdoinsert directly. Normal access method calls dispatch + * through rtinsert, which locks the relation for write. This is + * the right thing to do if you're inserting single tups, but not + * when you're initializing the whole index at once. + */ + + res = rtdoinsert(index, itup, &rtState); + pfree(itup); + pfree(res); } - - /* Skip this tuple if it doesn't satisfy the partial-index predicate */ - if (pred != NULL) { + + /* okay, all heap tuples are indexed */ + heap_endscan(scan); + RelationUnsetLockForWrite(index); + + if (pred != NULL || oldPred != NULL) + { #ifndef OMIT_PARTIAL_INDEX - /*SetSlotContents(slot, htup); */ - slot->val = htup; - if (ExecQual((List*)pred, econtext) == false) - continue; -#endif /* OMIT_PARTIAL_INDEX */ + ExecDestroyTupleTable(tupleTable, true); + pfree(econtext); +#endif /* OMIT_PARTIAL_INDEX */ } - - ni++; - + /* - * For the current heap tuple, extract all the attributes - * we use in this index, and note which are null. + * Since we just counted the tuples in the heap, we update its stats + * in pg_relation to guarantee that the planner takes advantage of the + * index we just created. UpdateStats() does a + * CommandCounterIncrement(), which flushes changed entries from the + * system relcache. The act of constructing an index changes these + * heap and index tuples in the system catalogs, so they need to be + * flushed. We close them to guarantee that they will be. */ - - for (i = 1; i <= natts; i++) { - int attoff; - bool attnull; - - /* - * Offsets are from the start of the tuple, and are - * zero-based; indices are one-based. The next call - * returns i - 1. That's data hiding for you. - */ - - attoff = AttrNumberGetAttrOffset(i); - /* - d[attoff] = HeapTupleGetAttributeValue(htup, buffer, - */ - d[attoff] = GetIndexValue(htup, - hd, - attoff, - attnum, - finfo, - &attnull, - buffer); - nulls[attoff] = (attnull ? 'n' : ' '); + + hrelid = heap->rd_id; + irelid = index->rd_id; + heap_close(heap); + index_close(index); + + UpdateStats(hrelid, nh, true); + UpdateStats(irelid, ni, false); + + if (oldPred != NULL) + { + if (ni == nh) + pred = NULL; + UpdateIndexPredicate(irelid, oldPred, pred); } - - /* form an index tuple and point it at the heap tuple */ - itup = index_formtuple(id, &d[0], nulls); - itup->t_tid = htup->t_ctid; - - /* - * Since we already have the index relation locked, we - * call rtdoinsert directly. Normal access method calls - * dispatch through rtinsert, which locks the relation - * for write. This is the right thing to do if you're - * inserting single tups, but not when you're initializing - * the whole index at once. - */ - - res = rtdoinsert(index, itup, &rtState); - pfree(itup); - pfree(res); - } - - /* okay, all heap tuples are indexed */ - heap_endscan(scan); - RelationUnsetLockForWrite(index); - - if (pred != NULL || oldPred != NULL) { -#ifndef OMIT_PARTIAL_INDEX - ExecDestroyTupleTable(tupleTable, true); - pfree(econtext); -#endif /* OMIT_PARTIAL_INDEX */ - } - - /* - * Since we just counted the tuples in the heap, we update its - * stats in pg_relation to guarantee that the planner takes - * advantage of the index we just created. UpdateStats() does a - * CommandCounterIncrement(), which flushes changed entries from - * the system relcache. The act of constructing an index changes - * these heap and index tuples in the system catalogs, so they - * need to be flushed. We close them to guarantee that they - * will be. - */ - - hrelid = heap->rd_id; - irelid = index->rd_id; - heap_close(heap); - index_close(index); - - UpdateStats(hrelid, nh, true); - UpdateStats(irelid, ni, false); - - if (oldPred != NULL) { - if (ni == nh) pred = NULL; - UpdateIndexPredicate(irelid, oldPred, pred); - } - - /* be tidy */ - pfree(nulls); - pfree(d); + + /* be tidy */ + pfree(nulls); + pfree(d); } /* - * rtinsert -- wrapper for rtree tuple insertion. + * rtinsert -- wrapper for rtree tuple insertion. * - * This is the public interface routine for tuple insertion in rtrees. - * It doesn't do any work; just locks the relation and passes the buck. + * This is the public interface routine for tuple insertion in rtrees. + * It doesn't do any work; just locks the relation and passes the buck. */ InsertIndexResult -rtinsert(Relation r, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation heapRel) +rtinsert(Relation r, Datum * datum, char *nulls, ItemPointer ht_ctid, Relation heapRel) { - InsertIndexResult res; - IndexTuple itup; - RTSTATE rtState; - - /* generate an index tuple */ - itup = index_formtuple(RelationGetTupleDescriptor(r), datum, nulls); - itup->t_tid = *ht_ctid; - initRtstate(&rtState, r); - - RelationSetLockForWrite(r); - res = rtdoinsert(r, itup, &rtState); - - /* XXX two-phase locking -- don't unlock the relation until EOT */ - return (res); + InsertIndexResult res; + IndexTuple itup; + RTSTATE rtState; + + /* generate an index tuple */ + itup = index_formtuple(RelationGetTupleDescriptor(r), datum, nulls); + itup->t_tid = *ht_ctid; + initRtstate(&rtState, r); + + RelationSetLockForWrite(r); + res = rtdoinsert(r, itup, &rtState); + + /* XXX two-phase locking -- don't unlock the relation until EOT */ + return (res); } -static InsertIndexResult -rtdoinsert(Relation r, IndexTuple itup, RTSTATE *rtstate) +static InsertIndexResult +rtdoinsert(Relation r, IndexTuple itup, RTSTATE * rtstate) { - Page page; - Buffer buffer; - BlockNumber blk; - IndexTuple which; - OffsetNumber l; - RTSTACK *stack; - InsertIndexResult res; - RTreePageOpaque opaque; - char *datum; - - blk = P_ROOT; - buffer = InvalidBuffer; - stack = (RTSTACK *) NULL; - - do { - /* let go of current buffer before getting next */ - if (buffer != InvalidBuffer) - ReleaseBuffer(buffer); - - /* get next buffer */ - buffer = ReadBuffer(r, blk); - page = (Page) BufferGetPage(buffer); - - opaque = (RTreePageOpaque) PageGetSpecialPointer(page); - if (!(opaque->flags & F_LEAF)) { - RTSTACK *n; - ItemId iid; - - n = (RTSTACK *) palloc(sizeof(RTSTACK)); - n->rts_parent = stack; - n->rts_blk = blk; - n->rts_child = choose(r, page, itup, rtstate); - stack = n; - - iid = PageGetItemId(page, n->rts_child); - which = (IndexTuple) PageGetItem(page, iid); - blk = ItemPointerGetBlockNumber(&(which->t_tid)); + Page page; + Buffer buffer; + BlockNumber blk; + IndexTuple which; + OffsetNumber l; + RTSTACK *stack; + InsertIndexResult res; + RTreePageOpaque opaque; + char *datum; + + blk = P_ROOT; + buffer = InvalidBuffer; + stack = (RTSTACK *) NULL; + + do + { + /* let go of current buffer before getting next */ + if (buffer != InvalidBuffer) + ReleaseBuffer(buffer); + + /* get next buffer */ + buffer = ReadBuffer(r, blk); + page = (Page) BufferGetPage(buffer); + + opaque = (RTreePageOpaque) PageGetSpecialPointer(page); + if (!(opaque->flags & F_LEAF)) + { + RTSTACK *n; + ItemId iid; + + n = (RTSTACK *) palloc(sizeof(RTSTACK)); + n->rts_parent = stack; + n->rts_blk = blk; + n->rts_child = choose(r, page, itup, rtstate); + stack = n; + + iid = PageGetItemId(page, n->rts_child); + which = (IndexTuple) PageGetItem(page, iid); + blk = ItemPointerGetBlockNumber(&(which->t_tid)); + } + } while (!(opaque->flags & F_LEAF)); + + if (nospace(page, itup)) + { + /* need to do a split */ + res = dosplit(r, buffer, stack, itup, rtstate); + freestack(stack); + WriteBuffer(buffer); /* don't forget to release buffer! */ + return (res); + } + + /* add the item and write the buffer */ + if (PageIsEmpty(page)) + { + l = PageAddItem(page, (Item) itup, IndexTupleSize(itup), + FirstOffsetNumber, + LP_USED); } - } while (!(opaque->flags & F_LEAF)); - - if (nospace(page, itup)) { - /* need to do a split */ - res = dosplit(r, buffer, stack, itup, rtstate); + else + { + l = PageAddItem(page, (Item) itup, IndexTupleSize(itup), + OffsetNumberNext(PageGetMaxOffsetNumber(page)), + LP_USED); + } + + WriteBuffer(buffer); + + datum = (((char *) itup) + sizeof(IndexTupleData)); + + /* now expand the page boundary in the parent to include the new child */ + rttighten(r, stack, datum, + (IndexTupleSize(itup) - sizeof(IndexTupleData)), rtstate); freestack(stack); - WriteBuffer(buffer); /* don't forget to release buffer! */ + + /* build and return an InsertIndexResult for this insertion */ + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + ItemPointerSet(&(res->pointerData), blk, l); + return (res); - } - - /* add the item and write the buffer */ - if (PageIsEmpty(page)) { - l = PageAddItem(page, (Item) itup, IndexTupleSize(itup), - FirstOffsetNumber, - LP_USED); - } else { - l = PageAddItem(page, (Item) itup, IndexTupleSize(itup), - OffsetNumberNext(PageGetMaxOffsetNumber(page)), - LP_USED); - } - - WriteBuffer(buffer); - - datum = (((char *) itup) + sizeof(IndexTupleData)); - - /* now expand the page boundary in the parent to include the new child */ - rttighten(r, stack, datum, - (IndexTupleSize(itup) - sizeof(IndexTupleData)), rtstate); - freestack(stack); - - /* build and return an InsertIndexResult for this insertion */ - res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); - ItemPointerSet(&(res->pointerData), blk, l); - - return (res); } static void rttighten(Relation r, - RTSTACK *stk, - char *datum, - int att_size, - RTSTATE *rtstate) + RTSTACK * stk, + char *datum, + int att_size, + RTSTATE * rtstate) { - char *oldud; - char *tdatum; - Page p; - float old_size, newd_size; - Buffer b; - - if (stk == (RTSTACK *) NULL) - return; - - b = ReadBuffer(r, stk->rts_blk); - p = BufferGetPage(b); - - oldud = (char *) PageGetItem(p, PageGetItemId(p, stk->rts_child)); - oldud += sizeof(IndexTupleData); - - (*rtstate->sizeFn)(oldud, &old_size); - datum = (char *) (*rtstate->unionFn)(oldud, datum); - - (*rtstate->sizeFn)(datum, &newd_size); - - if (newd_size != old_size) { - TupleDesc td = RelationGetTupleDescriptor(r); - - if (td->attrs[0]->attlen < 0) { - /* - * This is an internal page, so 'oldud' had better be a - * union (constant-length) key, too. (See comment below.) - */ - Assert(VARSIZE(datum) == VARSIZE(oldud)); - memmove(oldud, datum, VARSIZE(datum)); - } else { - memmove(oldud, datum, att_size); + char *oldud; + char *tdatum; + Page p; + float old_size, + newd_size; + Buffer b; + + if (stk == (RTSTACK *) NULL) + return; + + b = ReadBuffer(r, stk->rts_blk); + p = BufferGetPage(b); + + oldud = (char *) PageGetItem(p, PageGetItemId(p, stk->rts_child)); + oldud += sizeof(IndexTupleData); + + (*rtstate->sizeFn) (oldud, &old_size); + datum = (char *) (*rtstate->unionFn) (oldud, datum); + + (*rtstate->sizeFn) (datum, &newd_size); + + if (newd_size != old_size) + { + TupleDesc td = RelationGetTupleDescriptor(r); + + if (td->attrs[0]->attlen < 0) + { + + /* + * This is an internal page, so 'oldud' had better be a union + * (constant-length) key, too. (See comment below.) + */ + Assert(VARSIZE(datum) == VARSIZE(oldud)); + memmove(oldud, datum, VARSIZE(datum)); + } + else + { + memmove(oldud, datum, att_size); + } + WriteBuffer(b); + + /* + * The user may be defining an index on variable-sized data (like + * polygons). If so, we need to get a constant-sized datum for + * insertion on the internal page. We do this by calling the + * union proc, which is guaranteed to return a rectangle. + */ + + tdatum = (char *) (*rtstate->unionFn) (datum, datum); + rttighten(r, stk->rts_parent, tdatum, att_size, rtstate); + pfree(tdatum); } - WriteBuffer(b); - - /* - * The user may be defining an index on variable-sized data (like - * polygons). If so, we need to get a constant-sized datum for - * insertion on the internal page. We do this by calling the union - * proc, which is guaranteed to return a rectangle. - */ - - tdatum = (char *) (*rtstate->unionFn)(datum, datum); - rttighten(r, stk->rts_parent, tdatum, att_size, rtstate); - pfree(tdatum); - } else { - ReleaseBuffer(b); - } - pfree(datum); + else + { + ReleaseBuffer(b); + } + pfree(datum); } /* - * dosplit -- split a page in the tree. + * dosplit -- split a page in the tree. * - * This is the quadratic-cost split algorithm Guttman describes in - * his paper. The reason we chose it is that you can implement this - * with less information about the data types on which you're operating. + * This is the quadratic-cost split algorithm Guttman describes in + * his paper. The reason we chose it is that you can implement this + * with less information about the data types on which you're operating. */ -static InsertIndexResult +static InsertIndexResult dosplit(Relation r, - Buffer buffer, - RTSTACK *stack, - IndexTuple itup, - RTSTATE *rtstate) + Buffer buffer, + RTSTACK * stack, + IndexTuple itup, + RTSTATE * rtstate) { - Page p; - Buffer leftbuf, rightbuf; - Page left, right; - ItemId itemid; - IndexTuple item; - IndexTuple ltup, rtup; - OffsetNumber maxoff; - OffsetNumber i; - OffsetNumber leftoff, rightoff; - BlockNumber lbknum, rbknum; - BlockNumber bufblock; - RTreePageOpaque opaque; - int blank; - InsertIndexResult res; - char *isnull; - SPLITVEC v; - TupleDesc tupDesc; - - isnull = (char *) palloc(r->rd_rel->relnatts); - for (blank = 0; blank < r->rd_rel->relnatts; blank++) - isnull[blank] = ' '; - p = (Page) BufferGetPage(buffer); - opaque = (RTreePageOpaque) PageGetSpecialPointer(p); - - /* - * The root of the tree is the first block in the relation. If - * we're about to split the root, we need to do some hocus-pocus - * to enforce this guarantee. - */ - - if (BufferGetBlockNumber(buffer) == P_ROOT) { - leftbuf = ReadBuffer(r, P_NEW); - RTInitBuffer(leftbuf, opaque->flags); - lbknum = BufferGetBlockNumber(leftbuf); - left = (Page) BufferGetPage(leftbuf); - } else { - leftbuf = buffer; - IncrBufferRefCount(buffer); - lbknum = BufferGetBlockNumber(buffer); - left = (Page) PageGetTempPage(p, sizeof(RTreePageOpaqueData)); - } - - rightbuf = ReadBuffer(r, P_NEW); - RTInitBuffer(rightbuf, opaque->flags); - rbknum = BufferGetBlockNumber(rightbuf); - right = (Page) BufferGetPage(rightbuf); - - picksplit(r, p, &v, itup, rtstate); - - leftoff = rightoff = FirstOffsetNumber; - maxoff = PageGetMaxOffsetNumber(p); - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - itemid = PageGetItemId(p, i); - item = (IndexTuple) PageGetItem(p, itemid); - - if (i == *(v.spl_left)) { - PageAddItem(left, (Item) item, IndexTupleSize(item), - leftoff, LP_USED); - leftoff = OffsetNumberNext(leftoff); - v.spl_left++; /* advance in left split vector */ - } else { - PageAddItem(right, (Item) item, IndexTupleSize(item), - rightoff, LP_USED); - rightoff = OffsetNumberNext(rightoff); - v.spl_right++; /* advance in right split vector */ + Page p; + Buffer leftbuf, + rightbuf; + Page left, + right; + ItemId itemid; + IndexTuple item; + IndexTuple ltup, + rtup; + OffsetNumber maxoff; + OffsetNumber i; + OffsetNumber leftoff, + rightoff; + BlockNumber lbknum, + rbknum; + BlockNumber bufblock; + RTreePageOpaque opaque; + int blank; + InsertIndexResult res; + char *isnull; + SPLITVEC v; + TupleDesc tupDesc; + + isnull = (char *) palloc(r->rd_rel->relnatts); + for (blank = 0; blank < r->rd_rel->relnatts; blank++) + isnull[blank] = ' '; + p = (Page) BufferGetPage(buffer); + opaque = (RTreePageOpaque) PageGetSpecialPointer(p); + + /* + * The root of the tree is the first block in the relation. If we're + * about to split the root, we need to do some hocus-pocus to enforce + * this guarantee. + */ + + if (BufferGetBlockNumber(buffer) == P_ROOT) + { + leftbuf = ReadBuffer(r, P_NEW); + RTInitBuffer(leftbuf, opaque->flags); + lbknum = BufferGetBlockNumber(leftbuf); + left = (Page) BufferGetPage(leftbuf); } - } - - /* build an InsertIndexResult for this insertion */ - res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); - - /* now insert the new index tuple */ - if (*(v.spl_left) != FirstOffsetNumber) { - PageAddItem(left, (Item) itup, IndexTupleSize(itup), - leftoff, LP_USED); - leftoff = OffsetNumberNext(leftoff); - ItemPointerSet(&(res->pointerData), lbknum, leftoff); - } else { - PageAddItem(right, (Item) itup, IndexTupleSize(itup), - rightoff, LP_USED); - rightoff = OffsetNumberNext(rightoff); - ItemPointerSet(&(res->pointerData), rbknum, rightoff); - } - - if ((bufblock = BufferGetBlockNumber(buffer)) != P_ROOT) { - PageRestoreTempPage(left, p); - } - WriteBuffer(leftbuf); - WriteBuffer(rightbuf); - - /* - * Okay, the page is split. We have three things left to do: - * - * 1) Adjust any active scans on this index to cope with changes - * we introduced in its structure by splitting this page. - * - * 2) "Tighten" the bounding box of the pointer to the left - * page in the parent node in the tree, if any. Since we - * moved a bunch of stuff off the left page, we expect it - * to get smaller. This happens in the internal insertion - * routine. - * - * 3) Insert a pointer to the right page in the parent. This - * may cause the parent to split. If it does, we need to - * repeat steps one and two for each split node in the tree. - */ - - /* adjust active scans */ - rtadjscans(r, RTOP_SPLIT, bufblock, FirstOffsetNumber); - - tupDesc = r->rd_att; - ltup = (IndexTuple) index_formtuple(tupDesc, - (Datum *) &(v.spl_ldatum), isnull); - rtup = (IndexTuple) index_formtuple(tupDesc, - (Datum *) &(v.spl_rdatum), isnull); - pfree(isnull); - - /* set pointers to new child pages in the internal index tuples */ - ItemPointerSet(&(ltup->t_tid), lbknum, 1); - ItemPointerSet(&(rtup->t_tid), rbknum, 1); - - rtintinsert(r, stack, ltup, rtup, rtstate); - - pfree(ltup); - pfree(rtup); - - return (res); + else + { + leftbuf = buffer; + IncrBufferRefCount(buffer); + lbknum = BufferGetBlockNumber(buffer); + left = (Page) PageGetTempPage(p, sizeof(RTreePageOpaqueData)); + } + + rightbuf = ReadBuffer(r, P_NEW); + RTInitBuffer(rightbuf, opaque->flags); + rbknum = BufferGetBlockNumber(rightbuf); + right = (Page) BufferGetPage(rightbuf); + + picksplit(r, p, &v, itup, rtstate); + + leftoff = rightoff = FirstOffsetNumber; + maxoff = PageGetMaxOffsetNumber(p); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + itemid = PageGetItemId(p, i); + item = (IndexTuple) PageGetItem(p, itemid); + + if (i == *(v.spl_left)) + { + PageAddItem(left, (Item) item, IndexTupleSize(item), + leftoff, LP_USED); + leftoff = OffsetNumberNext(leftoff); + v.spl_left++; /* advance in left split vector */ + } + else + { + PageAddItem(right, (Item) item, IndexTupleSize(item), + rightoff, LP_USED); + rightoff = OffsetNumberNext(rightoff); + v.spl_right++; /* advance in right split vector */ + } + } + + /* build an InsertIndexResult for this insertion */ + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + + /* now insert the new index tuple */ + if (*(v.spl_left) != FirstOffsetNumber) + { + PageAddItem(left, (Item) itup, IndexTupleSize(itup), + leftoff, LP_USED); + leftoff = OffsetNumberNext(leftoff); + ItemPointerSet(&(res->pointerData), lbknum, leftoff); + } + else + { + PageAddItem(right, (Item) itup, IndexTupleSize(itup), + rightoff, LP_USED); + rightoff = OffsetNumberNext(rightoff); + ItemPointerSet(&(res->pointerData), rbknum, rightoff); + } + + if ((bufblock = BufferGetBlockNumber(buffer)) != P_ROOT) + { + PageRestoreTempPage(left, p); + } + WriteBuffer(leftbuf); + WriteBuffer(rightbuf); + + /* + * Okay, the page is split. We have three things left to do: + * + * 1) Adjust any active scans on this index to cope with changes we + * introduced in its structure by splitting this page. + * + * 2) "Tighten" the bounding box of the pointer to the left page in the + * parent node in the tree, if any. Since we moved a bunch of stuff + * off the left page, we expect it to get smaller. This happens in + * the internal insertion routine. + * + * 3) Insert a pointer to the right page in the parent. This may cause + * the parent to split. If it does, we need to repeat steps one and + * two for each split node in the tree. + */ + + /* adjust active scans */ + rtadjscans(r, RTOP_SPLIT, bufblock, FirstOffsetNumber); + + tupDesc = r->rd_att; + ltup = (IndexTuple) index_formtuple(tupDesc, + (Datum *) & (v.spl_ldatum), isnull); + rtup = (IndexTuple) index_formtuple(tupDesc, + (Datum *) & (v.spl_rdatum), isnull); + pfree(isnull); + + /* set pointers to new child pages in the internal index tuples */ + ItemPointerSet(&(ltup->t_tid), lbknum, 1); + ItemPointerSet(&(rtup->t_tid), rbknum, 1); + + rtintinsert(r, stack, ltup, rtup, rtstate); + + pfree(ltup); + pfree(rtup); + + return (res); } static void rtintinsert(Relation r, - RTSTACK *stk, - IndexTuple ltup, - IndexTuple rtup, - RTSTATE *rtstate) + RTSTACK * stk, + IndexTuple ltup, + IndexTuple rtup, + RTSTATE * rtstate) { - IndexTuple old; - Buffer b; - Page p; - char *ldatum, *rdatum, *newdatum; - InsertIndexResult res; - - if (stk == (RTSTACK *) NULL) { - rtnewroot(r, ltup, rtup); - return; - } - - b = ReadBuffer(r, stk->rts_blk); - p = BufferGetPage(b); - old = (IndexTuple) PageGetItem(p, PageGetItemId(p, stk->rts_child)); - - /* - * This is a hack. Right now, we force rtree keys to be constant size. - * To fix this, need delete the old key and add both left and right - * for the two new pages. The insertion of left may force a split if - * the new left key is bigger than the old key. - */ - - if (IndexTupleSize(old) != IndexTupleSize(ltup)) - elog(WARN, "Variable-length rtree keys are not supported."); - - /* install pointer to left child */ - memmove(old, ltup,IndexTupleSize(ltup)); - - if (nospace(p, rtup)) { - newdatum = (((char *) ltup) + sizeof(IndexTupleData)); - rttighten(r, stk->rts_parent, newdatum, - (IndexTupleSize(ltup) - sizeof(IndexTupleData)), rtstate); - res = dosplit(r, b, stk->rts_parent, rtup, rtstate); - WriteBuffer(b); /* don't forget to release buffer! - 01/31/94 */ - pfree(res); - } else { - PageAddItem(p, (Item) rtup, IndexTupleSize(rtup), - PageGetMaxOffsetNumber(p), LP_USED); - WriteBuffer(b); - ldatum = (((char *) ltup) + sizeof(IndexTupleData)); - rdatum = (((char *) rtup) + sizeof(IndexTupleData)); - newdatum = (char *) (*rtstate->unionFn)(ldatum, rdatum); - - rttighten(r, stk->rts_parent, newdatum, - (IndexTupleSize(rtup) - sizeof(IndexTupleData)), rtstate); - - pfree(newdatum); - } + IndexTuple old; + Buffer b; + Page p; + char *ldatum, + *rdatum, + *newdatum; + InsertIndexResult res; + + if (stk == (RTSTACK *) NULL) + { + rtnewroot(r, ltup, rtup); + return; + } + + b = ReadBuffer(r, stk->rts_blk); + p = BufferGetPage(b); + old = (IndexTuple) PageGetItem(p, PageGetItemId(p, stk->rts_child)); + + /* + * This is a hack. Right now, we force rtree keys to be constant + * size. To fix this, need delete the old key and add both left and + * right for the two new pages. The insertion of left may force a + * split if the new left key is bigger than the old key. + */ + + if (IndexTupleSize(old) != IndexTupleSize(ltup)) + elog(WARN, "Variable-length rtree keys are not supported."); + + /* install pointer to left child */ + memmove(old, ltup, IndexTupleSize(ltup)); + + if (nospace(p, rtup)) + { + newdatum = (((char *) ltup) + sizeof(IndexTupleData)); + rttighten(r, stk->rts_parent, newdatum, + (IndexTupleSize(ltup) - sizeof(IndexTupleData)), rtstate); + res = dosplit(r, b, stk->rts_parent, rtup, rtstate); + WriteBuffer(b); /* don't forget to release buffer! - + * 01/31/94 */ + pfree(res); + } + else + { + PageAddItem(p, (Item) rtup, IndexTupleSize(rtup), + PageGetMaxOffsetNumber(p), LP_USED); + WriteBuffer(b); + ldatum = (((char *) ltup) + sizeof(IndexTupleData)); + rdatum = (((char *) rtup) + sizeof(IndexTupleData)); + newdatum = (char *) (*rtstate->unionFn) (ldatum, rdatum); + + rttighten(r, stk->rts_parent, newdatum, + (IndexTupleSize(rtup) - sizeof(IndexTupleData)), rtstate); + + pfree(newdatum); + } } static void rtnewroot(Relation r, IndexTuple lt, IndexTuple rt) { - Buffer b; - Page p; - - b = ReadBuffer(r, P_ROOT); - RTInitBuffer(b, 0); - p = BufferGetPage(b); - PageAddItem(p, (Item) lt, IndexTupleSize(lt), - FirstOffsetNumber, LP_USED); - PageAddItem(p, (Item) rt, IndexTupleSize(rt), - OffsetNumberNext(FirstOffsetNumber), LP_USED); - WriteBuffer(b); + Buffer b; + Page p; + + b = ReadBuffer(r, P_ROOT); + RTInitBuffer(b, 0); + p = BufferGetPage(b); + PageAddItem(p, (Item) lt, IndexTupleSize(lt), + FirstOffsetNumber, LP_USED); + PageAddItem(p, (Item) rt, IndexTupleSize(rt), + OffsetNumberNext(FirstOffsetNumber), LP_USED); + WriteBuffer(b); } static void picksplit(Relation r, - Page page, - SPLITVEC *v, - IndexTuple itup, - RTSTATE *rtstate) + Page page, + SPLITVEC * v, + IndexTuple itup, + RTSTATE * rtstate) { - OffsetNumber maxoff; - OffsetNumber i, j; - IndexTuple item_1, item_2; - char *datum_alpha, *datum_beta; - char *datum_l, *datum_r; - char *union_d, *union_dl, *union_dr; - char *inter_d; - bool firsttime; - float size_alpha, size_beta, size_union, size_inter; - float size_waste, waste; - float size_l, size_r; - int nbytes; - OffsetNumber seed_1 = 0, seed_2 = 0; - OffsetNumber *left, *right; - - maxoff = PageGetMaxOffsetNumber(page); - - nbytes = (maxoff + 2) * sizeof(OffsetNumber); - v->spl_left = (OffsetNumber *) palloc(nbytes); - v->spl_right = (OffsetNumber *) palloc(nbytes); - - firsttime = true; - waste = 0.0; - - for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) { - item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); - datum_alpha = ((char *) item_1) + sizeof(IndexTupleData); - for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) { - item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, j)); - datum_beta = ((char *) item_2) + sizeof(IndexTupleData); - - /* compute the wasted space by unioning these guys */ - union_d = (char *)(rtstate->unionFn)(datum_alpha, datum_beta); - (rtstate->sizeFn)(union_d, &size_union); - inter_d = (char *)(rtstate->interFn)(datum_alpha, datum_beta); - (rtstate->sizeFn)(inter_d, &size_inter); - size_waste = size_union - size_inter; - - pfree(union_d); - - if (inter_d != (char *) NULL) - pfree(inter_d); - - /* - * are these a more promising split that what we've - * already seen? - */ - - if (size_waste > waste || firsttime) { - waste = size_waste; - seed_1 = i; - seed_2 = j; - firsttime = false; - } + OffsetNumber maxoff; + OffsetNumber i, + j; + IndexTuple item_1, + item_2; + char *datum_alpha, + *datum_beta; + char *datum_l, + *datum_r; + char *union_d, + *union_dl, + *union_dr; + char *inter_d; + bool firsttime; + float size_alpha, + size_beta, + size_union, + size_inter; + float size_waste, + waste; + float size_l, + size_r; + int nbytes; + OffsetNumber seed_1 = 0, + seed_2 = 0; + OffsetNumber *left, + *right; + + maxoff = PageGetMaxOffsetNumber(page); + + nbytes = (maxoff + 2) * sizeof(OffsetNumber); + v->spl_left = (OffsetNumber *) palloc(nbytes); + v->spl_right = (OffsetNumber *) palloc(nbytes); + + firsttime = true; + waste = 0.0; + + for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) + { + item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); + datum_alpha = ((char *) item_1) + sizeof(IndexTupleData); + for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) + { + item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, j)); + datum_beta = ((char *) item_2) + sizeof(IndexTupleData); + + /* compute the wasted space by unioning these guys */ + union_d = (char *) (rtstate->unionFn) (datum_alpha, datum_beta); + (rtstate->sizeFn) (union_d, &size_union); + inter_d = (char *) (rtstate->interFn) (datum_alpha, datum_beta); + (rtstate->sizeFn) (inter_d, &size_inter); + size_waste = size_union - size_inter; + + pfree(union_d); + + if (inter_d != (char *) NULL) + pfree(inter_d); + + /* + * are these a more promising split that what we've already + * seen? + */ + + if (size_waste > waste || firsttime) + { + waste = size_waste; + seed_1 = i; + seed_2 = j; + firsttime = false; + } + } } - } - - left = v->spl_left; - v->spl_nleft = 0; - right = v->spl_right; - v->spl_nright = 0; - - item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_1)); - datum_alpha = ((char *) item_1) + sizeof(IndexTupleData); - datum_l = (char *)(*rtstate->unionFn)(datum_alpha, datum_alpha); - (*rtstate->sizeFn)(datum_l, &size_l); - item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_2)); - datum_beta = ((char *) item_2) + sizeof(IndexTupleData); - datum_r = (char *)(*rtstate->unionFn)(datum_beta, datum_beta); - (*rtstate->sizeFn)(datum_r, &size_r); - - /* - * Now split up the regions between the two seeds. An important - * property of this split algorithm is that the split vector v - * has the indices of items to be split in order in its left and - * right vectors. We exploit this property by doing a merge in - * the code that actually splits the page. - * - * For efficiency, we also place the new index tuple in this loop. - * This is handled at the very end, when we have placed all the - * existing tuples and i == maxoff + 1. - */ - - maxoff = OffsetNumberNext(maxoff); - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - + + left = v->spl_left; + v->spl_nleft = 0; + right = v->spl_right; + v->spl_nright = 0; + + item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_1)); + datum_alpha = ((char *) item_1) + sizeof(IndexTupleData); + datum_l = (char *) (*rtstate->unionFn) (datum_alpha, datum_alpha); + (*rtstate->sizeFn) (datum_l, &size_l); + item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_2)); + datum_beta = ((char *) item_2) + sizeof(IndexTupleData); + datum_r = (char *) (*rtstate->unionFn) (datum_beta, datum_beta); + (*rtstate->sizeFn) (datum_r, &size_r); + /* - * If we've already decided where to place this item, just - * put it on the right list. Otherwise, we need to figure - * out which page needs the least enlargement in order to - * store the item. + * Now split up the regions between the two seeds. An important + * property of this split algorithm is that the split vector v has the + * indices of items to be split in order in its left and right + * vectors. We exploit this property by doing a merge in the code + * that actually splits the page. + * + * For efficiency, we also place the new index tuple in this loop. This + * is handled at the very end, when we have placed all the existing + * tuples and i == maxoff + 1. */ - - if (i == seed_1) { - *left++ = i; - v->spl_nleft++; - continue; - } else if (i == seed_2) { - *right++ = i; - v->spl_nright++; - continue; - } - - /* okay, which page needs least enlargement? */ - if (i == maxoff) { - item_1 = itup; - } else { - item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); - } - - datum_alpha = ((char *) item_1) + sizeof(IndexTupleData); - union_dl = (char *)(*rtstate->unionFn)(datum_l, datum_alpha); - union_dr = (char *)(*rtstate->unionFn)(datum_r, datum_alpha); - (*rtstate->sizeFn)(union_dl, &size_alpha); - (*rtstate->sizeFn)(union_dr, &size_beta); - - /* pick which page to add it to */ - if (size_alpha - size_l < size_beta - size_r) { - pfree(datum_l); - pfree(union_dr); - datum_l = union_dl; - size_l = size_alpha; - *left++ = i; - v->spl_nleft++; - } else { - pfree(datum_r); - pfree(union_dl); - datum_r = union_dr; - size_r = size_alpha; - *right++ = i; - v->spl_nright++; + + maxoff = OffsetNumberNext(maxoff); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + + /* + * If we've already decided where to place this item, just put it + * on the right list. Otherwise, we need to figure out which page + * needs the least enlargement in order to store the item. + */ + + if (i == seed_1) + { + *left++ = i; + v->spl_nleft++; + continue; + } + else if (i == seed_2) + { + *right++ = i; + v->spl_nright++; + continue; + } + + /* okay, which page needs least enlargement? */ + if (i == maxoff) + { + item_1 = itup; + } + else + { + item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); + } + + datum_alpha = ((char *) item_1) + sizeof(IndexTupleData); + union_dl = (char *) (*rtstate->unionFn) (datum_l, datum_alpha); + union_dr = (char *) (*rtstate->unionFn) (datum_r, datum_alpha); + (*rtstate->sizeFn) (union_dl, &size_alpha); + (*rtstate->sizeFn) (union_dr, &size_beta); + + /* pick which page to add it to */ + if (size_alpha - size_l < size_beta - size_r) + { + pfree(datum_l); + pfree(union_dr); + datum_l = union_dl; + size_l = size_alpha; + *left++ = i; + v->spl_nleft++; + } + else + { + pfree(datum_r); + pfree(union_dl); + datum_r = union_dr; + size_r = size_alpha; + *right++ = i; + v->spl_nright++; + } } - } - *left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */ - - v->spl_ldatum = datum_l; - v->spl_rdatum = datum_r; + *left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */ + + v->spl_ldatum = datum_l; + v->spl_rdatum = datum_r; } static void RTInitBuffer(Buffer b, uint32 f) { - RTreePageOpaque opaque; - Page page; - Size pageSize; - - pageSize = BufferGetPageSize(b); - - page = BufferGetPage(b); - memset(page, 0, (int) pageSize); - PageInit(page, pageSize, sizeof(RTreePageOpaqueData)); - - opaque = (RTreePageOpaque) PageGetSpecialPointer(page); - opaque->flags = f; + RTreePageOpaque opaque; + Page page; + Size pageSize; + + pageSize = BufferGetPageSize(b); + + page = BufferGetPage(b); + memset(page, 0, (int) pageSize); + PageInit(page, pageSize, sizeof(RTreePageOpaqueData)); + + opaque = (RTreePageOpaque) PageGetSpecialPointer(page); + opaque->flags = f; } -static OffsetNumber -choose(Relation r, Page p, IndexTuple it, RTSTATE *rtstate) +static OffsetNumber +choose(Relation r, Page p, IndexTuple it, RTSTATE * rtstate) { - OffsetNumber maxoff; - OffsetNumber i; - char *ud, *id; - char *datum; - float usize, dsize; - OffsetNumber which; - float which_grow; - - id = ((char *) it) + sizeof(IndexTupleData); - maxoff = PageGetMaxOffsetNumber(p); - which_grow = -1.0; - which = -1; - - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - datum = (char *) PageGetItem(p, PageGetItemId(p, i)); - datum += sizeof(IndexTupleData); - (*rtstate->sizeFn)(datum, &dsize); - ud = (char *) (*rtstate->unionFn)(datum, id); - (*rtstate->sizeFn)(ud, &usize); - pfree(ud); - if (which_grow < 0 || usize - dsize < which_grow) { - which = i; - which_grow = usize - dsize; - if (which_grow == 0) - break; + OffsetNumber maxoff; + OffsetNumber i; + char *ud, + *id; + char *datum; + float usize, + dsize; + OffsetNumber which; + float which_grow; + + id = ((char *) it) + sizeof(IndexTupleData); + maxoff = PageGetMaxOffsetNumber(p); + which_grow = -1.0; + which = -1; + + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + datum = (char *) PageGetItem(p, PageGetItemId(p, i)); + datum += sizeof(IndexTupleData); + (*rtstate->sizeFn) (datum, &dsize); + ud = (char *) (*rtstate->unionFn) (datum, id); + (*rtstate->sizeFn) (ud, &usize); + pfree(ud); + if (which_grow < 0 || usize - dsize < which_grow) + { + which = i; + which_grow = usize - dsize; + if (which_grow == 0) + break; + } } - } - - return (which); + + return (which); } static int nospace(Page p, IndexTuple it) { - return (PageGetFreeSpace(p) < IndexTupleSize(it)); + return (PageGetFreeSpace(p) < IndexTupleSize(it)); } void -freestack(RTSTACK *s) +freestack(RTSTACK * s) { - RTSTACK *p; - - while (s != (RTSTACK *) NULL) { - p = s->rts_parent; - pfree(s); - s = p; - } + RTSTACK *p; + + while (s != (RTSTACK *) NULL) + { + p = s->rts_parent; + pfree(s); + s = p; + } } -char * +char * rtdelete(Relation r, ItemPointer tid) { - BlockNumber blkno; - OffsetNumber offnum; - Buffer buf; - Page page; - - /* must write-lock on delete */ - RelationSetLockForWrite(r); - - blkno = ItemPointerGetBlockNumber(tid); - offnum = ItemPointerGetOffsetNumber(tid); - - /* adjust any scans that will be affected by this deletion */ - rtadjscans(r, RTOP_DEL, blkno, offnum); - - /* delete the index tuple */ - buf = ReadBuffer(r, blkno); - page = BufferGetPage(buf); - - PageIndexTupleDelete(page, offnum); - - WriteBuffer(buf); - - /* XXX -- two-phase locking, don't release the write lock */ - return ((char *) NULL); + BlockNumber blkno; + OffsetNumber offnum; + Buffer buf; + Page page; + + /* must write-lock on delete */ + RelationSetLockForWrite(r); + + blkno = ItemPointerGetBlockNumber(tid); + offnum = ItemPointerGetOffsetNumber(tid); + + /* adjust any scans that will be affected by this deletion */ + rtadjscans(r, RTOP_DEL, blkno, offnum); + + /* delete the index tuple */ + buf = ReadBuffer(r, blkno); + page = BufferGetPage(buf); + + PageIndexTupleDelete(page, offnum); + + WriteBuffer(buf); + + /* XXX -- two-phase locking, don't release the write lock */ + return ((char *) NULL); } -static void initRtstate(RTSTATE *rtstate, Relation index) +static void +initRtstate(RTSTATE * rtstate, Relation index) { - RegProcedure union_proc, size_proc, inter_proc; - func_ptr user_fn; - int pronargs; - - union_proc = index_getprocid(index, 1, RT_UNION_PROC); - size_proc = index_getprocid(index, 1, RT_SIZE_PROC); - inter_proc = index_getprocid(index, 1, RT_INTER_PROC); - fmgr_info(union_proc, &user_fn, &pronargs); - rtstate->unionFn = user_fn; - fmgr_info(size_proc, &user_fn, &pronargs); - rtstate->sizeFn = user_fn; - fmgr_info(inter_proc, &user_fn, &pronargs); - rtstate->interFn = user_fn; - return; + RegProcedure union_proc, + size_proc, + inter_proc; + func_ptr user_fn; + int pronargs; + + union_proc = index_getprocid(index, 1, RT_UNION_PROC); + size_proc = index_getprocid(index, 1, RT_SIZE_PROC); + inter_proc = index_getprocid(index, 1, RT_INTER_PROC); + fmgr_info(union_proc, &user_fn, &pronargs); + rtstate->unionFn = user_fn; + fmgr_info(size_proc, &user_fn, &pronargs); + rtstate->sizeFn = user_fn; + fmgr_info(inter_proc, &user_fn, &pronargs); + rtstate->interFn = user_fn; + return; } #ifdef RTDEBUG @@ -914,48 +1011,52 @@ static void initRtstate(RTSTATE *rtstate, Relation index) void _rtdump(Relation r) { - Buffer buf; - Page page; - OffsetNumber offnum, maxoff; - BlockNumber blkno; - BlockNumber nblocks; - RTreePageOpaque po; - IndexTuple itup; - BlockNumber itblkno; - OffsetNumber itoffno; - char *datum; - char *itkey; - - nblocks = RelationGetNumberOfBlocks(r); - for (blkno = 0; blkno < nblocks; blkno++) { - buf = ReadBuffer(r, blkno); - page = BufferGetPage(buf); - po = (RTreePageOpaque) PageGetSpecialPointer(page); - maxoff = PageGetMaxOffsetNumber(page); - printf("Page %d maxoff %d <%s>\n", blkno, maxoff, - (po->flags & F_LEAF ? "LEAF" : "INTERNAL")); - - if (PageIsEmpty(page)) { - ReleaseBuffer(buf); - continue; - } - - for (offnum = FirstOffsetNumber; - offnum <= maxoff; - offnum = OffsetNumberNext(offnum)) { - itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); - itblkno = ItemPointerGetBlockNumber(&(itup->t_tid)); - itoffno = ItemPointerGetOffsetNumber(&(itup->t_tid)); - datum = ((char *) itup); - datum += sizeof(IndexTupleData); - itkey = (char *) box_out((BOX *) datum); - printf("\t[%d] size %d heap <%d,%d> key:%s\n", - offnum, IndexTupleSize(itup), itblkno, itoffno, itkey); - pfree(itkey); + Buffer buf; + Page page; + OffsetNumber offnum, + maxoff; + BlockNumber blkno; + BlockNumber nblocks; + RTreePageOpaque po; + IndexTuple itup; + BlockNumber itblkno; + OffsetNumber itoffno; + char *datum; + char *itkey; + + nblocks = RelationGetNumberOfBlocks(r); + for (blkno = 0; blkno < nblocks; blkno++) + { + buf = ReadBuffer(r, blkno); + page = BufferGetPage(buf); + po = (RTreePageOpaque) PageGetSpecialPointer(page); + maxoff = PageGetMaxOffsetNumber(page); + printf("Page %d maxoff %d <%s>\n", blkno, maxoff, + (po->flags & F_LEAF ? "LEAF" : "INTERNAL")); + + if (PageIsEmpty(page)) + { + ReleaseBuffer(buf); + continue; + } + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); + itblkno = ItemPointerGetBlockNumber(&(itup->t_tid)); + itoffno = ItemPointerGetOffsetNumber(&(itup->t_tid)); + datum = ((char *) itup); + datum += sizeof(IndexTupleData); + itkey = (char *) box_out((BOX *) datum); + printf("\t[%d] size %d heap <%d,%d> key:%s\n", + offnum, IndexTupleSize(itup), itblkno, itoffno, itkey); + pfree(itkey); + } + + ReleaseBuffer(buf); } - - ReleaseBuffer(buf); - } } -#endif /* defined RTDEBUG */ +#endif /* defined RTDEBUG */ diff --git a/src/backend/access/rtree/rtscan.c b/src/backend/access/rtree/rtscan.c index bb8e1dcc71..26590059d6 100644 --- a/src/backend/access/rtree/rtscan.c +++ b/src/backend/access/rtree/rtscan.c @@ -1,19 +1,19 @@ /*------------------------------------------------------------------------- * * rtscan.c-- - * routines to manage scans on index relations + * routines to manage scans on index relations * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtscan.c,v 1.10 1997/05/20 10:29:30 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtscan.c,v 1.11 1997/09/07 04:39:24 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <storage/bufmgr.h> #include <access/genam.h> #include <storage/lmgr.h> @@ -21,377 +21,411 @@ #include <access/rtree.h> #include <access/rtstrat.h> #ifndef HAVE_MEMMOVE -# include <regex/utils.h> +#include <regex/utils.h> #else -# include <string.h> +#include <string.h> #endif - + /* routines defined and used here */ -static void rtregscan(IndexScanDesc s); -static void rtdropscan(IndexScanDesc s); -static void rtadjone(IndexScanDesc s, int op, BlockNumber blkno, - OffsetNumber offnum); -static void adjuststack(RTSTACK *stk, BlockNumber blkno, +static void rtregscan(IndexScanDesc s); +static void rtdropscan(IndexScanDesc s); +static void +rtadjone(IndexScanDesc s, int op, BlockNumber blkno, + OffsetNumber offnum); +static void +adjuststack(RTSTACK * stk, BlockNumber blkno, OffsetNumber offnum); -static void adjustiptr(IndexScanDesc s, ItemPointer iptr, - int op, BlockNumber blkno, OffsetNumber offnum); +static void +adjustiptr(IndexScanDesc s, ItemPointer iptr, + int op, BlockNumber blkno, OffsetNumber offnum); /* - * Whenever we start an rtree scan in a backend, we register it in private - * space. Then if the rtree index gets updated, we check all registered - * scans and adjust them if the tuple they point at got moved by the - * update. We only need to do this in private space, because when we update - * an rtree we have a write lock on the tree, so no other process can have - * any locks at all on it. A single transaction can have write and read - * locks on the same object, so that's why we need to handle this case. + * Whenever we start an rtree scan in a backend, we register it in private + * space. Then if the rtree index gets updated, we check all registered + * scans and adjust them if the tuple they point at got moved by the + * update. We only need to do this in private space, because when we update + * an rtree we have a write lock on the tree, so no other process can have + * any locks at all on it. A single transaction can have write and read + * locks on the same object, so that's why we need to handle this case. */ -typedef struct RTScanListData { - IndexScanDesc rtsl_scan; - struct RTScanListData *rtsl_next; -} RTScanListData; +typedef struct RTScanListData +{ + IndexScanDesc rtsl_scan; + struct RTScanListData *rtsl_next; +} RTScanListData; -typedef RTScanListData *RTScanList; +typedef RTScanListData *RTScanList; /* pointer to list of local scans on rtrees */ static RTScanList RTScans = (RTScanList) NULL; - + IndexScanDesc rtbeginscan(Relation r, - bool fromEnd, - uint16 nkeys, - ScanKey key) + bool fromEnd, + uint16 nkeys, + ScanKey key) { - IndexScanDesc s; - - RelationSetLockForRead(r); - s = RelationGetIndexScan(r, fromEnd, nkeys, key); - rtregscan(s); - - return (s); + IndexScanDesc s; + + RelationSetLockForRead(r); + s = RelationGetIndexScan(r, fromEnd, nkeys, key); + rtregscan(s); + + return (s); } void rtrescan(IndexScanDesc s, bool fromEnd, ScanKey key) { - RTreeScanOpaque p; - RegProcedure internal_proc; - int i; - - if (!IndexScanIsValid(s)) { - elog(WARN, "rtrescan: invalid scan."); - return; - } - - /* - * Clear all the pointers. - */ - - ItemPointerSetInvalid(&s->previousItemData); - ItemPointerSetInvalid(&s->currentItemData); - ItemPointerSetInvalid(&s->nextItemData); - ItemPointerSetInvalid(&s->previousMarkData); - ItemPointerSetInvalid(&s->currentMarkData); - ItemPointerSetInvalid(&s->nextMarkData); - - /* - * Set flags. - */ - if (RelationGetNumberOfBlocks(s->relation) == 0) { - s->flags = ScanUnmarked; - } else if (fromEnd) { - s->flags = ScanUnmarked | ScanUncheckedPrevious; - } else { - s->flags = ScanUnmarked | ScanUncheckedNext; - } - - s->scanFromEnd = fromEnd; - - if (s->numberOfKeys > 0) { - memmove(s->keyData, - key, - s->numberOfKeys * sizeof(ScanKeyData)); - } - - p = (RTreeScanOpaque) s->opaque; - if (p != (RTreeScanOpaque) NULL) { - freestack(p->s_stack); - freestack(p->s_markstk); - p->s_stack = p->s_markstk = (RTSTACK *) NULL; - p->s_flags = 0x0; - for (i = 0; i < s->numberOfKeys; i++) + RTreeScanOpaque p; + RegProcedure internal_proc; + int i; + + if (!IndexScanIsValid(s)) + { + elog(WARN, "rtrescan: invalid scan."); + return; + } + + /* + * Clear all the pointers. + */ + + ItemPointerSetInvalid(&s->previousItemData); + ItemPointerSetInvalid(&s->currentItemData); + ItemPointerSetInvalid(&s->nextItemData); + ItemPointerSetInvalid(&s->previousMarkData); + ItemPointerSetInvalid(&s->currentMarkData); + ItemPointerSetInvalid(&s->nextMarkData); + + /* + * Set flags. + */ + if (RelationGetNumberOfBlocks(s->relation) == 0) + { + s->flags = ScanUnmarked; + } + else if (fromEnd) + { + s->flags = ScanUnmarked | ScanUncheckedPrevious; + } + else + { + s->flags = ScanUnmarked | ScanUncheckedNext; + } + + s->scanFromEnd = fromEnd; + + if (s->numberOfKeys > 0) { - p->s_internalKey[i].sk_argument = s->keyData[i].sk_argument; + memmove(s->keyData, + key, + s->numberOfKeys * sizeof(ScanKeyData)); } - } else { - /* initialize opaque data */ - p = (RTreeScanOpaque) palloc(sizeof(RTreeScanOpaqueData)); - p->s_stack = p->s_markstk = (RTSTACK *) NULL; - p->s_internalNKey = s->numberOfKeys; - p->s_flags = 0x0; - s->opaque = p; - if (s->numberOfKeys > 0) { - p->s_internalKey = - (ScanKey) palloc(sizeof(ScanKeyData) * s->numberOfKeys); - - /* - * Scans on internal pages use different operators than they - * do on leaf pages. For example, if the user wants all boxes - * that exactly match (x1,y1,x2,y2), then on internal pages - * we need to find all boxes that contain (x1,y1,x2,y2). - */ - - for (i = 0; i < s->numberOfKeys; i++) { - p->s_internalKey[i].sk_argument = s->keyData[i].sk_argument; - internal_proc = RTMapOperator(s->relation, - s->keyData[i].sk_attno, - s->keyData[i].sk_procedure); - ScanKeyEntryInitialize(&(p->s_internalKey[i]), - s->keyData[i].sk_flags, - s->keyData[i].sk_attno, - internal_proc, - s->keyData[i].sk_argument); - } + + p = (RTreeScanOpaque) s->opaque; + if (p != (RTreeScanOpaque) NULL) + { + freestack(p->s_stack); + freestack(p->s_markstk); + p->s_stack = p->s_markstk = (RTSTACK *) NULL; + p->s_flags = 0x0; + for (i = 0; i < s->numberOfKeys; i++) + { + p->s_internalKey[i].sk_argument = s->keyData[i].sk_argument; + } + } + else + { + /* initialize opaque data */ + p = (RTreeScanOpaque) palloc(sizeof(RTreeScanOpaqueData)); + p->s_stack = p->s_markstk = (RTSTACK *) NULL; + p->s_internalNKey = s->numberOfKeys; + p->s_flags = 0x0; + s->opaque = p; + if (s->numberOfKeys > 0) + { + p->s_internalKey = + (ScanKey) palloc(sizeof(ScanKeyData) * s->numberOfKeys); + + /* + * Scans on internal pages use different operators than they + * do on leaf pages. For example, if the user wants all boxes + * that exactly match (x1,y1,x2,y2), then on internal pages we + * need to find all boxes that contain (x1,y1,x2,y2). + */ + + for (i = 0; i < s->numberOfKeys; i++) + { + p->s_internalKey[i].sk_argument = s->keyData[i].sk_argument; + internal_proc = RTMapOperator(s->relation, + s->keyData[i].sk_attno, + s->keyData[i].sk_procedure); + ScanKeyEntryInitialize(&(p->s_internalKey[i]), + s->keyData[i].sk_flags, + s->keyData[i].sk_attno, + internal_proc, + s->keyData[i].sk_argument); + } + } } - } } void rtmarkpos(IndexScanDesc s) { - RTreeScanOpaque p; - RTSTACK *o, *n, *tmp; - - s->currentMarkData = s->currentItemData; - p = (RTreeScanOpaque) s->opaque; - if (p->s_flags & RTS_CURBEFORE) - p->s_flags |= RTS_MRKBEFORE; - else - p->s_flags &= ~RTS_MRKBEFORE; - - o = (RTSTACK *) NULL; - n = p->s_stack; - - /* copy the parent stack from the current item data */ - while (n != (RTSTACK *) NULL) { - tmp = (RTSTACK *) palloc(sizeof(RTSTACK)); - tmp->rts_child = n->rts_child; - tmp->rts_blk = n->rts_blk; - tmp->rts_parent = o; - o = tmp; - n = n->rts_parent; - } - - freestack(p->s_markstk); - p->s_markstk = o; + RTreeScanOpaque p; + RTSTACK *o, + *n, + *tmp; + + s->currentMarkData = s->currentItemData; + p = (RTreeScanOpaque) s->opaque; + if (p->s_flags & RTS_CURBEFORE) + p->s_flags |= RTS_MRKBEFORE; + else + p->s_flags &= ~RTS_MRKBEFORE; + + o = (RTSTACK *) NULL; + n = p->s_stack; + + /* copy the parent stack from the current item data */ + while (n != (RTSTACK *) NULL) + { + tmp = (RTSTACK *) palloc(sizeof(RTSTACK)); + tmp->rts_child = n->rts_child; + tmp->rts_blk = n->rts_blk; + tmp->rts_parent = o; + o = tmp; + n = n->rts_parent; + } + + freestack(p->s_markstk); + p->s_markstk = o; } void rtrestrpos(IndexScanDesc s) { - RTreeScanOpaque p; - RTSTACK *o, *n, *tmp; - - s->currentItemData = s->currentMarkData; - p = (RTreeScanOpaque) s->opaque; - if (p->s_flags & RTS_MRKBEFORE) - p->s_flags |= RTS_CURBEFORE; - else - p->s_flags &= ~RTS_CURBEFORE; - - o = (RTSTACK *) NULL; - n = p->s_markstk; - - /* copy the parent stack from the current item data */ - while (n != (RTSTACK *) NULL) { - tmp = (RTSTACK *) palloc(sizeof(RTSTACK)); - tmp->rts_child = n->rts_child; - tmp->rts_blk = n->rts_blk; - tmp->rts_parent = o; - o = tmp; - n = n->rts_parent; - } - - freestack(p->s_stack); - p->s_stack = o; + RTreeScanOpaque p; + RTSTACK *o, + *n, + *tmp; + + s->currentItemData = s->currentMarkData; + p = (RTreeScanOpaque) s->opaque; + if (p->s_flags & RTS_MRKBEFORE) + p->s_flags |= RTS_CURBEFORE; + else + p->s_flags &= ~RTS_CURBEFORE; + + o = (RTSTACK *) NULL; + n = p->s_markstk; + + /* copy the parent stack from the current item data */ + while (n != (RTSTACK *) NULL) + { + tmp = (RTSTACK *) palloc(sizeof(RTSTACK)); + tmp->rts_child = n->rts_child; + tmp->rts_blk = n->rts_blk; + tmp->rts_parent = o; + o = tmp; + n = n->rts_parent; + } + + freestack(p->s_stack); + p->s_stack = o; } void rtendscan(IndexScanDesc s) { - RTreeScanOpaque p; - - p = (RTreeScanOpaque) s->opaque; - - if (p != (RTreeScanOpaque) NULL) { - freestack(p->s_stack); - freestack(p->s_markstk); - pfree (s->opaque); - } - - rtdropscan(s); - /* XXX don't unset read lock -- two-phase locking */ + RTreeScanOpaque p; + + p = (RTreeScanOpaque) s->opaque; + + if (p != (RTreeScanOpaque) NULL) + { + freestack(p->s_stack); + freestack(p->s_markstk); + pfree(s->opaque); + } + + rtdropscan(s); + /* XXX don't unset read lock -- two-phase locking */ } static void rtregscan(IndexScanDesc s) { - RTScanList l; - - l = (RTScanList) palloc(sizeof(RTScanListData)); - l->rtsl_scan = s; - l->rtsl_next = RTScans; - RTScans = l; + RTScanList l; + + l = (RTScanList) palloc(sizeof(RTScanListData)); + l->rtsl_scan = s; + l->rtsl_next = RTScans; + RTScans = l; } static void rtdropscan(IndexScanDesc s) { - RTScanList l; - RTScanList prev; - - prev = (RTScanList) NULL; - - for (l = RTScans; - l != (RTScanList) NULL && l->rtsl_scan != s; - l = l->rtsl_next) { - prev = l; - } - - if (l == (RTScanList) NULL) - elog(WARN, "rtree scan list corrupted -- cannot find 0x%lx", s); - - if (prev == (RTScanList) NULL) - RTScans = l->rtsl_next; - else - prev->rtsl_next = l->rtsl_next; - - pfree(l); + RTScanList l; + RTScanList prev; + + prev = (RTScanList) NULL; + + for (l = RTScans; + l != (RTScanList) NULL && l->rtsl_scan != s; + l = l->rtsl_next) + { + prev = l; + } + + if (l == (RTScanList) NULL) + elog(WARN, "rtree scan list corrupted -- cannot find 0x%lx", s); + + if (prev == (RTScanList) NULL) + RTScans = l->rtsl_next; + else + prev->rtsl_next = l->rtsl_next; + + pfree(l); } void rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum) { - RTScanList l; - Oid relid; - - relid = r->rd_id; - for (l = RTScans; l != (RTScanList) NULL; l = l->rtsl_next) { - if (l->rtsl_scan->relation->rd_id == relid) - rtadjone(l->rtsl_scan, op, blkno, offnum); - } + RTScanList l; + Oid relid; + + relid = r->rd_id; + for (l = RTScans; l != (RTScanList) NULL; l = l->rtsl_next) + { + if (l->rtsl_scan->relation->rd_id == relid) + rtadjone(l->rtsl_scan, op, blkno, offnum); + } } /* - * rtadjone() -- adjust one scan for update. + * rtadjone() -- adjust one scan for update. * - * By here, the scan passed in is on a modified relation. Op tells - * us what the modification is, and blkno and offind tell us what - * block and offset index were affected. This routine checks the - * current and marked positions, and the current and marked stacks, - * to see if any stored location needs to be changed because of the - * update. If so, we make the change here. + * By here, the scan passed in is on a modified relation. Op tells + * us what the modification is, and blkno and offind tell us what + * block and offset index were affected. This routine checks the + * current and marked positions, and the current and marked stacks, + * to see if any stored location needs to be changed because of the + * update. If so, we make the change here. */ static void rtadjone(IndexScanDesc s, - int op, - BlockNumber blkno, - OffsetNumber offnum) + int op, + BlockNumber blkno, + OffsetNumber offnum) { - RTreeScanOpaque so; - - adjustiptr(s, &(s->currentItemData), op, blkno, offnum); - adjustiptr(s, &(s->currentMarkData), op, blkno, offnum); - - so = (RTreeScanOpaque) s->opaque; - - if (op == RTOP_SPLIT) { - adjuststack(so->s_stack, blkno, offnum); - adjuststack(so->s_markstk, blkno, offnum); - } + RTreeScanOpaque so; + + adjustiptr(s, &(s->currentItemData), op, blkno, offnum); + adjustiptr(s, &(s->currentMarkData), op, blkno, offnum); + + so = (RTreeScanOpaque) s->opaque; + + if (op == RTOP_SPLIT) + { + adjuststack(so->s_stack, blkno, offnum); + adjuststack(so->s_markstk, blkno, offnum); + } } /* - * adjustiptr() -- adjust current and marked item pointers in the scan + * adjustiptr() -- adjust current and marked item pointers in the scan * - * Depending on the type of update and the place it happened, we - * need to do nothing, to back up one record, or to start over on - * the same page. + * Depending on the type of update and the place it happened, we + * need to do nothing, to back up one record, or to start over on + * the same page. */ static void adjustiptr(IndexScanDesc s, - ItemPointer iptr, - int op, - BlockNumber blkno, - OffsetNumber offnum) + ItemPointer iptr, + int op, + BlockNumber blkno, + OffsetNumber offnum) { - OffsetNumber curoff; - RTreeScanOpaque so; - - if (ItemPointerIsValid(iptr)) { - if (ItemPointerGetBlockNumber(iptr) == blkno) { - curoff = ItemPointerGetOffsetNumber(iptr); - so = (RTreeScanOpaque) s->opaque; - - switch (op) { - case RTOP_DEL: - /* back up one if we need to */ - if (curoff >= offnum) { - - if (curoff > FirstOffsetNumber) { - /* just adjust the item pointer */ - ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff)); - } else { - /* remember that we're before the current tuple */ - ItemPointerSet(iptr, blkno, FirstOffsetNumber); - if (iptr == &(s->currentItemData)) - so->s_flags |= RTS_CURBEFORE; - else - so->s_flags |= RTS_MRKBEFORE; - } + OffsetNumber curoff; + RTreeScanOpaque so; + + if (ItemPointerIsValid(iptr)) + { + if (ItemPointerGetBlockNumber(iptr) == blkno) + { + curoff = ItemPointerGetOffsetNumber(iptr); + so = (RTreeScanOpaque) s->opaque; + + switch (op) + { + case RTOP_DEL: + /* back up one if we need to */ + if (curoff >= offnum) + { + + if (curoff > FirstOffsetNumber) + { + /* just adjust the item pointer */ + ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff)); + } + else + { + /* remember that we're before the current tuple */ + ItemPointerSet(iptr, blkno, FirstOffsetNumber); + if (iptr == &(s->currentItemData)) + so->s_flags |= RTS_CURBEFORE; + else + so->s_flags |= RTS_MRKBEFORE; + } + } + break; + + case RTOP_SPLIT: + /* back to start of page on split */ + ItemPointerSet(iptr, blkno, FirstOffsetNumber); + if (iptr == &(s->currentItemData)) + so->s_flags &= ~RTS_CURBEFORE; + else + so->s_flags &= ~RTS_MRKBEFORE; + break; + + default: + elog(WARN, "Bad operation in rtree scan adjust: %d", op); + } } - break; - - case RTOP_SPLIT: - /* back to start of page on split */ - ItemPointerSet(iptr, blkno, FirstOffsetNumber); - if (iptr == &(s->currentItemData)) - so->s_flags &= ~RTS_CURBEFORE; - else - so->s_flags &= ~RTS_MRKBEFORE; - break; - - default: - elog(WARN, "Bad operation in rtree scan adjust: %d", op); - } } - } } /* - * adjuststack() -- adjust the supplied stack for a split on a page in - * the index we're scanning. + * adjuststack() -- adjust the supplied stack for a split on a page in + * the index we're scanning. * - * If a page on our parent stack has split, we need to back up to the - * beginning of the page and rescan it. The reason for this is that - * the split algorithm for rtrees doesn't order tuples in any useful - * way on a single page. This means on that a split, we may wind up - * looking at some heap tuples more than once. This is handled in the - * access method update code for heaps; if we've modified the tuple we - * are looking at already in this transaction, we ignore the update - * request. + * If a page on our parent stack has split, we need to back up to the + * beginning of the page and rescan it. The reason for this is that + * the split algorithm for rtrees doesn't order tuples in any useful + * way on a single page. This means on that a split, we may wind up + * looking at some heap tuples more than once. This is handled in the + * access method update code for heaps; if we've modified the tuple we + * are looking at already in this transaction, we ignore the update + * request. */ /*ARGSUSED*/ static void -adjuststack(RTSTACK *stk, - BlockNumber blkno, - OffsetNumber offnum) +adjuststack(RTSTACK * stk, + BlockNumber blkno, + OffsetNumber offnum) { - while (stk != (RTSTACK *) NULL) { - if (stk->rts_blk == blkno) - stk->rts_child = FirstOffsetNumber; - - stk = stk->rts_parent; - } + while (stk != (RTSTACK *) NULL) + { + if (stk->rts_blk == blkno) + stk->rts_child = FirstOffsetNumber; + + stk = stk->rts_parent; + } } diff --git a/src/backend/access/rtree/rtstrat.c b/src/backend/access/rtree/rtstrat.c index 7025a30999..c71059d3f0 100644 --- a/src/backend/access/rtree/rtstrat.c +++ b/src/backend/access/rtree/rtstrat.c @@ -1,241 +1,243 @@ /*------------------------------------------------------------------------- * * rtstrat.c-- - * strategy map data for rtrees. + * strategy map data for rtrees. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtstrat.c,v 1.6 1997/08/19 21:29:52 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtstrat.c,v 1.7 1997/09/07 04:39:26 momjian Exp $ * *------------------------------------------------------------------------- */ #include <postgres.h> - + #include <utils/rel.h> #include <access/rtree.h> #include <access/istrat.h> -static StrategyNumber RelationGetRTStrategy(Relation r, - AttrNumber attnum, RegProcedure proc); +static StrategyNumber +RelationGetRTStrategy(Relation r, + AttrNumber attnum, RegProcedure proc); /* - * Note: negate, commute, and negatecommute all assume that operators are - * ordered as follows in the strategy map: + * Note: negate, commute, and negatecommute all assume that operators are + * ordered as follows in the strategy map: * - * left, left-or-overlap, overlap, right-or-overlap, right, same, - * contains, contained-by + * left, left-or-overlap, overlap, right-or-overlap, right, same, + * contains, contained-by * - * The negate, commute, and negatecommute arrays are used by the planner - * to plan indexed scans over data that appears in the qualificiation in - * a boolean negation, or whose operands appear in the wrong order. For - * example, if the operator "<%" means "contains", and the user says + * The negate, commute, and negatecommute arrays are used by the planner + * to plan indexed scans over data that appears in the qualificiation in + * a boolean negation, or whose operands appear in the wrong order. For + * example, if the operator "<%" means "contains", and the user says * - * where not rel.box <% "(10,10,20,20)"::box + * where not rel.box <% "(10,10,20,20)"::box * - * the planner can plan an index scan by noting that rtree indices have - * an operator in their operator class for negating <%. + * the planner can plan an index scan by noting that rtree indices have + * an operator in their operator class for negating <%. * - * Similarly, if the user says something like + * Similarly, if the user says something like * - * where "(10,10,20,20)"::box <% rel.box + * where "(10,10,20,20)"::box <% rel.box * - * the planner can see that the rtree index on rel.box has an operator in - * its opclass for commuting <%, and plan the scan using that operator. - * This added complexity in the access methods makes the planner a lot easier - * to write. + * the planner can see that the rtree index on rel.box has an operator in + * its opclass for commuting <%, and plan the scan using that operator. + * This added complexity in the access methods makes the planner a lot easier + * to write. */ /* if a op b, what operator tells us if (not a op b)? */ -static StrategyNumber RTNegate[RTNStrategies] = { - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy - }; +static StrategyNumber RTNegate[RTNStrategies] = { + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy +}; /* if a op_1 b, what is the operator op_2 such that b op_2 a? */ -static StrategyNumber RTCommute[RTNStrategies] = { - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy - }; +static StrategyNumber RTCommute[RTNStrategies] = { + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy +}; /* if a op_1 b, what is the operator op_2 such that (b !op_2 a)? */ -static StrategyNumber RTNegateCommute[RTNStrategies] = { - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy, - InvalidStrategy - }; +static StrategyNumber RTNegateCommute[RTNStrategies] = { + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy +}; /* - * Now do the TermData arrays. These exist in case the user doesn't give - * us a full set of operators for a particular operator class. The idea - * is that by making multiple comparisons using any one of the supplied - * operators, we can decide whether two n-dimensional polygons are equal. - * For example, if a contains b and b contains a, we may conclude that - * a and b are equal. - * - * The presence of the TermData arrays in all this is a historical accident. - * Early in the development of the POSTGRES access methods, it was believed - * that writing functions was harder than writing arrays. This is wrong; - * TermData is hard to understand and hard to get right. In general, when - * someone populates a new operator class, the populate it completely. If - * Mike Hirohama had forced Cimarron Taylor to populate the strategy map - * for btree int2_ops completely in 1988, you wouldn't have to deal with - * all this now. Too bad for you. - * - * Since you can't necessarily do this in all cases (for example, you can't - * do it given only "intersects" or "disjoint"), TermData arrays for some - * operators don't appear below. - * - * Note that if you DO supply all the operators required in a given opclass - * by inserting them into the pg_opclass system catalog, you can get away - * without doing all this TermData stuff. Since the rtree code is intended - * to be a reference for access method implementors, I'm doing TermData - * correctly here. - * - * Note on style: these are all actually of type StrategyTermData, but - * since those have variable-length data at the end of the struct we can't - * properly initialize them if we declare them to be what they are. + * Now do the TermData arrays. These exist in case the user doesn't give + * us a full set of operators for a particular operator class. The idea + * is that by making multiple comparisons using any one of the supplied + * operators, we can decide whether two n-dimensional polygons are equal. + * For example, if a contains b and b contains a, we may conclude that + * a and b are equal. + * + * The presence of the TermData arrays in all this is a historical accident. + * Early in the development of the POSTGRES access methods, it was believed + * that writing functions was harder than writing arrays. This is wrong; + * TermData is hard to understand and hard to get right. In general, when + * someone populates a new operator class, the populate it completely. If + * Mike Hirohama had forced Cimarron Taylor to populate the strategy map + * for btree int2_ops completely in 1988, you wouldn't have to deal with + * all this now. Too bad for you. + * + * Since you can't necessarily do this in all cases (for example, you can't + * do it given only "intersects" or "disjoint"), TermData arrays for some + * operators don't appear below. + * + * Note that if you DO supply all the operators required in a given opclass + * by inserting them into the pg_opclass system catalog, you can get away + * without doing all this TermData stuff. Since the rtree code is intended + * to be a reference for access method implementors, I'm doing TermData + * correctly here. + * + * Note on style: these are all actually of type StrategyTermData, but + * since those have variable-length data at the end of the struct we can't + * properly initialize them if we declare them to be what they are. */ /* if you only have "contained-by", how do you determine equality? */ -static uint16 RTContainedByTermData[] = { - 2, /* make two comparisons */ - RTContainedByStrategyNumber, /* use "a contained-by b" */ - 0x0, /* without any magic */ - RTContainedByStrategyNumber, /* then use contained-by, */ - SK_COMMUTE /* swapping a and b */ - }; +static uint16 RTContainedByTermData[] = { + 2, /* make two comparisons */ + RTContainedByStrategyNumber,/* use "a contained-by b" */ + 0x0, /* without any magic */ + RTContainedByStrategyNumber,/* then use contained-by, */ + SK_COMMUTE /* swapping a and b */ +}; /* if you only have "contains", how do you determine equality? */ -static uint16 RTContainsTermData[] = { - 2, /* make two comparisons */ - RTContainsStrategyNumber, /* use "a contains b" */ - 0x0, /* without any magic */ - RTContainsStrategyNumber, /* then use contains again, */ - SK_COMMUTE /* swapping a and b */ - }; +static uint16 RTContainsTermData[] = { + 2, /* make two comparisons */ + RTContainsStrategyNumber, /* use "a contains b" */ + 0x0, /* without any magic */ + RTContainsStrategyNumber, /* then use contains again, */ + SK_COMMUTE /* swapping a and b */ +}; /* now put all that together in one place for the planner */ static StrategyTerm RTEqualExpressionData[] = { - (StrategyTerm) RTContainedByTermData, - (StrategyTerm) RTContainsTermData, - NULL - }; + (StrategyTerm) RTContainedByTermData, + (StrategyTerm) RTContainsTermData, + NULL +}; /* - * If you were sufficiently attentive to detail, you would go through - * the ExpressionData pain above for every one of the seven strategies - * we defined. I am not. Now we declare the StrategyEvaluationData - * structure that gets shipped around to help the planner and the access - * method decide what sort of scan it should do, based on (a) what the - * user asked for, (b) what operators are defined for a particular opclass, - * and (c) the reams of information we supplied above. - * - * The idea of all of this initialized data is to make life easier on the - * user when he defines a new operator class to use this access method. - * By filling in all the data, we let him get away with leaving holes in his - * operator class, and still let him use the index. The added complexity - * in the access methods just isn't worth the trouble, though. + * If you were sufficiently attentive to detail, you would go through + * the ExpressionData pain above for every one of the seven strategies + * we defined. I am not. Now we declare the StrategyEvaluationData + * structure that gets shipped around to help the planner and the access + * method decide what sort of scan it should do, based on (a) what the + * user asked for, (b) what operators are defined for a particular opclass, + * and (c) the reams of information we supplied above. + * + * The idea of all of this initialized data is to make life easier on the + * user when he defines a new operator class to use this access method. + * By filling in all the data, we let him get away with leaving holes in his + * operator class, and still let him use the index. The added complexity + * in the access methods just isn't worth the trouble, though. */ static StrategyEvaluationData RTEvaluationData = { - RTNStrategies, /* # of strategies */ - (StrategyTransformMap) RTNegate, /* how to do (not qual) */ - (StrategyTransformMap) RTCommute, /* how to swap operands */ - (StrategyTransformMap) RTNegateCommute, /* how to do both */ - { - NULL, /* express left */ - NULL, /* express overleft */ - NULL, /* express over */ - NULL, /* express overright */ - NULL, /* express right */ - (StrategyExpression) RTEqualExpressionData, /* express same */ - NULL, /* express contains */ - NULL, /* express contained-by */ - NULL, - NULL, - NULL - } + RTNStrategies, /* # of strategies */ + (StrategyTransformMap) RTNegate, /* how to do (not qual) */ + (StrategyTransformMap) RTCommute, /* how to swap operands */ + (StrategyTransformMap) RTNegateCommute, /* how to do both */ + { + NULL, /* express left */ + NULL, /* express overleft */ + NULL, /* express over */ + NULL, /* express overright */ + NULL, /* express right */ + (StrategyExpression) RTEqualExpressionData, /* express same */ + NULL, /* express contains */ + NULL, /* express contained-by */ + NULL, + NULL, + NULL + } }; /* - * Okay, now something peculiar to rtrees that doesn't apply to most other - * indexing structures: When we're searching a tree for a given value, we - * can't do the same sorts of comparisons on internal node entries as we - * do at leaves. The reason is that if we're looking for (say) all boxes - * that are the same as (0,0,10,10), then we need to find all leaf pages - * that overlap that region. So internally we search for overlap, and at - * the leaf we search for equality. - * - * This array maps leaf search operators to the internal search operators. - * We assume the normal ordering on operators: - * - * left, left-or-overlap, overlap, right-or-overlap, right, same, - * contains, contained-by + * Okay, now something peculiar to rtrees that doesn't apply to most other + * indexing structures: When we're searching a tree for a given value, we + * can't do the same sorts of comparisons on internal node entries as we + * do at leaves. The reason is that if we're looking for (say) all boxes + * that are the same as (0,0,10,10), then we need to find all leaf pages + * that overlap that region. So internally we search for overlap, and at + * the leaf we search for equality. + * + * This array maps leaf search operators to the internal search operators. + * We assume the normal ordering on operators: + * + * left, left-or-overlap, overlap, right-or-overlap, right, same, + * contains, contained-by */ static StrategyNumber RTOperMap[RTNStrategies] = { - RTOverLeftStrategyNumber, - RTOverLeftStrategyNumber, - RTOverlapStrategyNumber, - RTOverRightStrategyNumber, - RTOverRightStrategyNumber, - RTContainsStrategyNumber, - RTContainsStrategyNumber, - RTOverlapStrategyNumber - }; + RTOverLeftStrategyNumber, + RTOverLeftStrategyNumber, + RTOverlapStrategyNumber, + RTOverRightStrategyNumber, + RTOverRightStrategyNumber, + RTContainsStrategyNumber, + RTContainsStrategyNumber, + RTOverlapStrategyNumber +}; -static StrategyNumber +static StrategyNumber RelationGetRTStrategy(Relation r, - AttrNumber attnum, - RegProcedure proc) + AttrNumber attnum, + RegProcedure proc) { - return (RelationGetStrategy(r, attnum, &RTEvaluationData, proc)); + return (RelationGetStrategy(r, attnum, &RTEvaluationData, proc)); } #ifdef NOT_USED bool RelationInvokeRTStrategy(Relation r, - AttrNumber attnum, - StrategyNumber s, - Datum left, - Datum right) + AttrNumber attnum, + StrategyNumber s, + Datum left, + Datum right) { - return (RelationInvokeStrategy(r, &RTEvaluationData, attnum, s, - left, right)); + return (RelationInvokeStrategy(r, &RTEvaluationData, attnum, s, + left, right)); } + #endif RegProcedure RTMapOperator(Relation r, - AttrNumber attnum, - RegProcedure proc) + AttrNumber attnum, + RegProcedure proc) { - StrategyNumber procstrat; - StrategyMap strategyMap; - - procstrat = RelationGetRTStrategy(r, attnum, proc); - strategyMap = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(r), - RTNStrategies, - attnum); - - return (strategyMap->entry[RTOperMap[procstrat - 1] - 1].sk_procedure); + StrategyNumber procstrat; + StrategyMap strategyMap; + + procstrat = RelationGetRTStrategy(r, attnum, proc); + strategyMap = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(r), + RTNStrategies, + attnum); + + return (strategyMap->entry[RTOperMap[procstrat - 1] - 1].sk_procedure); } diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index 9087e50bc4..6d721fe96a 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -1,18 +1,18 @@ /*------------------------------------------------------------------------- * * transam.c-- - * postgres transaction log/time interface routines + * postgres transaction log/time interface routines * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/transam.c,v 1.9 1997/08/19 21:29:59 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/transam.c,v 1.10 1997/09/07 04:39:29 momjian Exp $ * * NOTES - * This file contains the high level access-method interface to the - * transaction system. - * + * This file contains the high level access-method interface to the + * transaction system. + * *------------------------------------------------------------------------- */ @@ -26,659 +26,671 @@ #include <storage/spin.h> #include <commands/vacuum.h> -static int RecoveryCheckingEnabled(void); -static void TransRecover(Relation logRelation); -static bool TransactionLogTest(TransactionId transactionId, XidStatus status); -static void TransactionLogUpdate(TransactionId transactionId, - XidStatus status); +static int RecoveryCheckingEnabled(void); +static void TransRecover(Relation logRelation); +static bool TransactionLogTest(TransactionId transactionId, XidStatus status); +static void +TransactionLogUpdate(TransactionId transactionId, + XidStatus status); /* ---------------- - * global variables holding pointers to relations used - * by the transaction system. These are initialized by - * InitializeTransactionLog(). + * global variables holding pointers to relations used + * by the transaction system. These are initialized by + * InitializeTransactionLog(). * ---------------- */ -Relation LogRelation = (Relation) NULL; -Relation TimeRelation = (Relation) NULL; -Relation VariableRelation = (Relation) NULL; +Relation LogRelation = (Relation) NULL; +Relation TimeRelation = (Relation) NULL; +Relation VariableRelation = (Relation) NULL; /* ---------------- - * global variables holding cached transaction id's and statuses. + * global variables holding cached transaction id's and statuses. * ---------------- */ TransactionId cachedGetCommitTimeXid; AbsoluteTime cachedGetCommitTime; TransactionId cachedTestXid; -XidStatus cachedTestXidStatus; +XidStatus cachedTestXidStatus; /* ---------------- - * transaction system constants + * transaction system constants * ---------------- */ /* ---------------------------------------------------------------- - * transaction system constants + * transaction system constants * - * read the comments for GetNewTransactionId in order to - * understand the initial values for AmiTransactionId and - * FirstTransactionId. -cim 3/23/90 + * read the comments for GetNewTransactionId in order to + * understand the initial values for AmiTransactionId and + * FirstTransactionId. -cim 3/23/90 * ---------------------------------------------------------------- */ -TransactionId NullTransactionId = (TransactionId) 0; +TransactionId NullTransactionId = (TransactionId) 0; -TransactionId AmiTransactionId = (TransactionId) 512; +TransactionId AmiTransactionId = (TransactionId) 512; -TransactionId FirstTransactionId = (TransactionId) 514; +TransactionId FirstTransactionId = (TransactionId) 514; /* ---------------- - * transaction recovery state variables - * - * When the transaction system is initialized, we may - * need to do recovery checking. This decision is decided - * by the postmaster or the user by supplying the backend - * with a special flag. In general, we want to do recovery - * checking whenever we are running without a postmaster - * or when the number of backends running under the postmaster - * goes from zero to one. -cim 3/21/90 + * transaction recovery state variables + * + * When the transaction system is initialized, we may + * need to do recovery checking. This decision is decided + * by the postmaster or the user by supplying the backend + * with a special flag. In general, we want to do recovery + * checking whenever we are running without a postmaster + * or when the number of backends running under the postmaster + * goes from zero to one. -cim 3/21/90 * ---------------- */ -int RecoveryCheckingEnableState = 0; +int RecoveryCheckingEnableState = 0; /* ------------------ - * spinlock for oid generation + * spinlock for oid generation * ----------------- */ -extern int OidGenLockId; +extern int OidGenLockId; /* ---------------- - * globals that must be reset at abort + * globals that must be reset at abort * ---------------- */ -extern bool BuildingBtree; +extern bool BuildingBtree; /* ---------------- - * recovery checking accessors + * recovery checking accessors * ---------------- */ static int RecoveryCheckingEnabled(void) -{ - return RecoveryCheckingEnableState; +{ + return RecoveryCheckingEnableState; } #ifdef NOT_USED static void SetRecoveryCheckingEnabled(bool state) -{ - RecoveryCheckingEnableState = (state == true); +{ + RecoveryCheckingEnableState = (state == true); } + #endif /* ---------------------------------------------------------------- - * postgres log/time access method interface - * - * TransactionLogTest - * TransactionLogUpdate - * ======== - * these functions do work for the interface - * functions - they search/retrieve and append/update - * information in the log and time relations. + * postgres log/time access method interface + * + * TransactionLogTest + * TransactionLogUpdate + * ======== + * these functions do work for the interface + * functions - they search/retrieve and append/update + * information in the log and time relations. * ---------------------------------------------------------------- */ /* -------------------------------- - * TransactionLogTest + * TransactionLogTest * -------------------------------- */ -static bool /* true/false: does transaction id have specified status? */ -TransactionLogTest(TransactionId transactionId, /* transaction id to test */ - XidStatus status) /* transaction status */ +static bool /* true/false: does transaction id have + * specified status? */ +TransactionLogTest(TransactionId transactionId, /* transaction id to test */ + XidStatus status) /* transaction status */ { - BlockNumber blockNumber; - XidStatus xidstatus; /* recorded status of xid */ - bool fail = false; /* success/failure */ - - /* ---------------- - * during initialization consider all transactions - * as having been committed - * ---------------- - */ - if (! RelationIsValid(LogRelation)) - return (bool) (status == XID_COMMIT); - - /* ---------------- - * before going to the buffer manager, check our single - * item cache to see if we didn't just check the transaction - * status a moment ago. - * ---------------- - */ - if (TransactionIdEquals(transactionId, cachedTestXid)) - return (bool) - (status == cachedTestXidStatus); - - /* ---------------- - * compute the item pointer corresponding to the - * page containing our transaction id. We save the item in - * our cache to speed up things if we happen to ask for the - * same xid's status more than once. - * ---------------- - */ - TransComputeBlockNumber(LogRelation, transactionId, &blockNumber); - xidstatus = TransBlockNumberGetXidStatus(LogRelation, - blockNumber, - transactionId, - &fail); - - if (! fail) { - TransactionIdStore(transactionId, &cachedTestXid); - cachedTestXidStatus = xidstatus; - return (bool) - (status == xidstatus); - } - - /* ---------------- - * here the block didn't contain the information we wanted - * ---------------- - */ - elog(WARN, "TransactionLogTest: failed to get xidstatus"); - - /* - * so lint is happy... - */ - return(false); + BlockNumber blockNumber; + XidStatus xidstatus; /* recorded status of xid */ + bool fail = false; /* success/failure */ + + /* ---------------- + * during initialization consider all transactions + * as having been committed + * ---------------- + */ + if (!RelationIsValid(LogRelation)) + return (bool) (status == XID_COMMIT); + + /* ---------------- + * before going to the buffer manager, check our single + * item cache to see if we didn't just check the transaction + * status a moment ago. + * ---------------- + */ + if (TransactionIdEquals(transactionId, cachedTestXid)) + return (bool) + (status == cachedTestXidStatus); + + /* ---------------- + * compute the item pointer corresponding to the + * page containing our transaction id. We save the item in + * our cache to speed up things if we happen to ask for the + * same xid's status more than once. + * ---------------- + */ + TransComputeBlockNumber(LogRelation, transactionId, &blockNumber); + xidstatus = TransBlockNumberGetXidStatus(LogRelation, + blockNumber, + transactionId, + &fail); + + if (!fail) + { + TransactionIdStore(transactionId, &cachedTestXid); + cachedTestXidStatus = xidstatus; + return (bool) + (status == xidstatus); + } + + /* ---------------- + * here the block didn't contain the information we wanted + * ---------------- + */ + elog(WARN, "TransactionLogTest: failed to get xidstatus"); + + /* + * so lint is happy... + */ + return (false); } /* -------------------------------- - * TransactionLogUpdate + * TransactionLogUpdate * -------------------------------- */ static void -TransactionLogUpdate(TransactionId transactionId, /* trans id to update */ - XidStatus status) /* new trans status */ +TransactionLogUpdate(TransactionId transactionId, /* trans id to update */ + XidStatus status) /* new trans status */ { - BlockNumber blockNumber; - bool fail = false; /* success/failure */ - AbsoluteTime currentTime; /* time of this transaction */ - - /* ---------------- - * during initialization we don't record any updates. - * ---------------- - */ - if (! RelationIsValid(LogRelation)) - return; - - /* ---------------- - * get the transaction commit time - * ---------------- - */ - currentTime = getSystemTime(); - - /* ---------------- - * update the log relation - * ---------------- - */ - TransComputeBlockNumber(LogRelation, transactionId, &blockNumber); - TransBlockNumberSetXidStatus(LogRelation, - blockNumber, - transactionId, - status, - &fail); - - /* ---------------- - * update (invalidate) our single item TransactionLogTest cache. - * ---------------- - */ - TransactionIdStore(transactionId, &cachedTestXid); - cachedTestXidStatus = status; - - /* ---------------- - * now we update the time relation, if necessary - * (we only record commit times) - * ---------------- - */ - if (RelationIsValid(TimeRelation) && status == XID_COMMIT) { - TransComputeBlockNumber(TimeRelation, transactionId, &blockNumber); - TransBlockNumberSetCommitTime(TimeRelation, - blockNumber, - transactionId, - currentTime, - &fail); + BlockNumber blockNumber; + bool fail = false; /* success/failure */ + AbsoluteTime currentTime;/* time of this transaction */ + + /* ---------------- + * during initialization we don't record any updates. + * ---------------- + */ + if (!RelationIsValid(LogRelation)) + return; + /* ---------------- - * update (invalidate) our single item GetCommitTime cache. + * get the transaction commit time * ---------------- */ - TransactionIdStore(transactionId, &cachedGetCommitTimeXid); - cachedGetCommitTime = currentTime; - } - - /* ---------------- - * now we update the "last committed transaction" field - * in the variable relation if we are recording a commit. - * ---------------- - */ - if (RelationIsValid(VariableRelation) && status == XID_COMMIT) - UpdateLastCommittedXid(transactionId); + currentTime = getSystemTime(); + + /* ---------------- + * update the log relation + * ---------------- + */ + TransComputeBlockNumber(LogRelation, transactionId, &blockNumber); + TransBlockNumberSetXidStatus(LogRelation, + blockNumber, + transactionId, + status, + &fail); + + /* ---------------- + * update (invalidate) our single item TransactionLogTest cache. + * ---------------- + */ + TransactionIdStore(transactionId, &cachedTestXid); + cachedTestXidStatus = status; + + /* ---------------- + * now we update the time relation, if necessary + * (we only record commit times) + * ---------------- + */ + if (RelationIsValid(TimeRelation) && status == XID_COMMIT) + { + TransComputeBlockNumber(TimeRelation, transactionId, &blockNumber); + TransBlockNumberSetCommitTime(TimeRelation, + blockNumber, + transactionId, + currentTime, + &fail); + /* ---------------- + * update (invalidate) our single item GetCommitTime cache. + * ---------------- + */ + TransactionIdStore(transactionId, &cachedGetCommitTimeXid); + cachedGetCommitTime = currentTime; + } + + /* ---------------- + * now we update the "last committed transaction" field + * in the variable relation if we are recording a commit. + * ---------------- + */ + if (RelationIsValid(VariableRelation) && status == XID_COMMIT) + UpdateLastCommittedXid(transactionId); } /* -------------------------------- - * TransactionIdGetCommitTime + * TransactionIdGetCommitTime * -------------------------------- */ -AbsoluteTime /* commit time of transaction id */ -TransactionIdGetCommitTime(TransactionId transactionId) /* transaction id to test */ +AbsoluteTime /* commit time of transaction id */ +TransactionIdGetCommitTime(TransactionId transactionId) /* transaction id to + * test */ { - BlockNumber blockNumber; - AbsoluteTime commitTime; /* commit time */ - bool fail = false; /* success/failure */ - - /* ---------------- - * return invalid if we aren't running yet... - * ---------------- - */ - if (! RelationIsValid(TimeRelation)) - return INVALID_ABSTIME; - - /* ---------------- - * before going to the buffer manager, check our single - * item cache to see if we didn't just get the commit time - * a moment ago. - * ---------------- - */ - if (TransactionIdEquals(transactionId, cachedGetCommitTimeXid)) - return cachedGetCommitTime; - - /* ---------------- - * compute the item pointer corresponding to the - * page containing our transaction commit time - * ---------------- - */ - TransComputeBlockNumber(TimeRelation, transactionId, &blockNumber); - commitTime = TransBlockNumberGetCommitTime(TimeRelation, - blockNumber, - transactionId, - &fail); - - /* ---------------- - * update our cache and return the transaction commit time - * ---------------- - */ - if (! fail) { - TransactionIdStore(transactionId, &cachedGetCommitTimeXid); - cachedGetCommitTime = commitTime; - return commitTime; - } else - return INVALID_ABSTIME; + BlockNumber blockNumber; + AbsoluteTime commitTime; /* commit time */ + bool fail = false; /* success/failure */ + + /* ---------------- + * return invalid if we aren't running yet... + * ---------------- + */ + if (!RelationIsValid(TimeRelation)) + return INVALID_ABSTIME; + + /* ---------------- + * before going to the buffer manager, check our single + * item cache to see if we didn't just get the commit time + * a moment ago. + * ---------------- + */ + if (TransactionIdEquals(transactionId, cachedGetCommitTimeXid)) + return cachedGetCommitTime; + + /* ---------------- + * compute the item pointer corresponding to the + * page containing our transaction commit time + * ---------------- + */ + TransComputeBlockNumber(TimeRelation, transactionId, &blockNumber); + commitTime = TransBlockNumberGetCommitTime(TimeRelation, + blockNumber, + transactionId, + &fail); + + /* ---------------- + * update our cache and return the transaction commit time + * ---------------- + */ + if (!fail) + { + TransactionIdStore(transactionId, &cachedGetCommitTimeXid); + cachedGetCommitTime = commitTime; + return commitTime; + } + else + return INVALID_ABSTIME; } /* ---------------------------------------------------------------- - * transaction recovery code + * transaction recovery code * ---------------------------------------------------------------- */ /* -------------------------------- - * TransRecover + * TransRecover * - * preform transaction recovery checking. + * preform transaction recovery checking. * - * Note: this should only be preformed if no other backends - * are running. This is known by the postmaster and - * conveyed by the postmaster passing a "do recovery checking" - * flag to the backend. + * Note: this should only be preformed if no other backends + * are running. This is known by the postmaster and + * conveyed by the postmaster passing a "do recovery checking" + * flag to the backend. * - * here we get the last recorded transaction from the log, - * get the "last" and "next" transactions from the variable relation - * and then preform some integrity tests: + * here we get the last recorded transaction from the log, + * get the "last" and "next" transactions from the variable relation + * and then preform some integrity tests: * - * 1) No transaction may exist higher then the "next" available - * transaction recorded in the variable relation. If this is the - * case then it means either the log or the variable relation - * has become corrupted. + * 1) No transaction may exist higher then the "next" available + * transaction recorded in the variable relation. If this is the + * case then it means either the log or the variable relation + * has become corrupted. * - * 2) The last committed transaction may not be higher then the - * next available transaction for the same reason. + * 2) The last committed transaction may not be higher then the + * next available transaction for the same reason. * - * 3) The last recorded transaction may not be lower then the - * last committed transaction. (the reverse is ok - it means - * that some transactions have aborted since the last commit) + * 3) The last recorded transaction may not be lower then the + * last committed transaction. (the reverse is ok - it means + * that some transactions have aborted since the last commit) * - * Here is what the proper situation looks like. The line - * represents the data stored in the log. 'c' indicates the - * transaction was recorded as committed, 'a' indicates an - * abortted transaction and '.' represents information not - * recorded. These may correspond to in progress transactions. + * Here is what the proper situation looks like. The line + * represents the data stored in the log. 'c' indicates the + * transaction was recorded as committed, 'a' indicates an + * abortted transaction and '.' represents information not + * recorded. These may correspond to in progress transactions. * - * c c a c . . a . . . . . . . . . . - * | | - * last next + * c c a c . . a . . . . . . . . . . + * | | + * last next * - * Since "next" is only incremented by GetNewTransactionId() which - * is called when transactions are started. Hence if there - * are commits or aborts after "next", then it means we committed - * or aborted BEFORE we started the transaction. This is the - * rational behind constraint (1). + * Since "next" is only incremented by GetNewTransactionId() which + * is called when transactions are started. Hence if there + * are commits or aborts after "next", then it means we committed + * or aborted BEFORE we started the transaction. This is the + * rational behind constraint (1). * - * Likewise, "last" should never greater then "next" for essentially - * the same reason - it would imply we committed before we started. - * This is the reasoning for (2). + * Likewise, "last" should never greater then "next" for essentially + * the same reason - it would imply we committed before we started. + * This is the reasoning for (2). * - * (3) implies we may never have a situation such as: + * (3) implies we may never have a situation such as: * - * c c a c . . a c . . . . . . . . . - * | | - * last next + * c c a c . . a c . . . . . . . . . + * | | + * last next * - * where there is a 'c' greater then "last". + * where there is a 'c' greater then "last". * - * Recovery checking is more difficult in the case where - * several backends are executing concurrently because the - * transactions may be executing in the other backends. - * So, we only do recovery stuff when the backend is explicitly - * passed a flag on the command line. + * Recovery checking is more difficult in the case where + * several backends are executing concurrently because the + * transactions may be executing in the other backends. + * So, we only do recovery stuff when the backend is explicitly + * passed a flag on the command line. * -------------------------------- */ static void TransRecover(Relation logRelation) { -#if 0 - /* ---------------- - * first get the last recorded transaction in the log. - * ---------------- - */ - TransGetLastRecordedTransaction(logRelation, logLastXid, &fail); - if (fail == true) - elog(WARN, "TransRecover: failed TransGetLastRecordedTransaction"); - - /* ---------------- - * next get the "last" and "next" variables - * ---------------- - */ - VariableRelationGetLastXid(&varLastXid); - VariableRelationGetNextXid(&varNextXid); - - /* ---------------- - * intregity test (1) - * ---------------- - */ - if (TransactionIdIsLessThan(varNextXid, logLastXid)) - elog(WARN, "TransRecover: varNextXid < logLastXid"); - - /* ---------------- - * intregity test (2) - * ---------------- - */ - - /* ---------------- - * intregity test (3) - * ---------------- - */ - - /* ---------------- - * here we have a valid " - * - * **** RESUME HERE **** - * ---------------- - */ - varNextXid = TransactionIdDup(varLastXid); - TransactionIdIncrement(&varNextXid); - - VarPut(var, VAR_PUT_LASTXID, varLastXid); - VarPut(var, VAR_PUT_NEXTXID, varNextXid); +#if 0 + /* ---------------- + * first get the last recorded transaction in the log. + * ---------------- + */ + TransGetLastRecordedTransaction(logRelation, logLastXid, &fail); + if (fail == true) + elog(WARN, "TransRecover: failed TransGetLastRecordedTransaction"); + + /* ---------------- + * next get the "last" and "next" variables + * ---------------- + */ + VariableRelationGetLastXid(&varLastXid); + VariableRelationGetNextXid(&varNextXid); + + /* ---------------- + * intregity test (1) + * ---------------- + */ + if (TransactionIdIsLessThan(varNextXid, logLastXid)) + elog(WARN, "TransRecover: varNextXid < logLastXid"); + + /* ---------------- + * intregity test (2) + * ---------------- + */ + + /* ---------------- + * intregity test (3) + * ---------------- + */ + + /* ---------------- + * here we have a valid " + * + * **** RESUME HERE **** + * ---------------- + */ + varNextXid = TransactionIdDup(varLastXid); + TransactionIdIncrement(&varNextXid); + + VarPut(var, VAR_PUT_LASTXID, varLastXid); + VarPut(var, VAR_PUT_NEXTXID, varNextXid); #endif } /* ---------------------------------------------------------------- - * Interface functions - * - * InitializeTransactionLog - * ======== - * this function (called near cinit) initializes - * the transaction log, time and variable relations. - * - * TransactionId DidCommit - * TransactionId DidAbort |