diff options
author | Marc G. Fournier | 1996-07-09 06:22:35 +0000 |
---|---|---|
committer | Marc G. Fournier | 1996-07-09 06:22:35 +0000 |
commit | d31084e9d1118b25fd16580d9d8c2924b5740dff (patch) | |
tree | 3179e66307d54df9c7b966543550e601eb55e668 |
Postgres95 1.01 Distribution - Virgin Sources
868 files changed, 242656 insertions, 0 deletions
diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000000..7e047c0cce --- /dev/null +++ b/src/Makefile @@ -0,0 +1,48 @@ +#------------------------------------------------------------------------- +# +# Makefile.inc-- +# Build and install postgres. +# +# Copyright (c) 1994, Regents of the University of California +# +# +# IDENTIFICATION +# $Header: /cvsroot/pgsql/src/Makefile,v 1.1.1.1 1996/07/09 06:21:07 scrappy Exp $ +# +# NOTES +# objdir - location of the objects and generated files (eg. obj) +# +#------------------------------------------------------------------------- + +SUBDIR= backend libpq bin + +FIND = find +# assuming gnu tar and split here +TAR = tar +SPLIT = split + +ETAGS = etags +XARGS = xargs + +ifeq ($(USE_TCL), true) +SUBDIR += libpgtcl +endif + +include mk/postgres.subdir.mk + +TAGS: + rm -f TAGS; \ + for i in backend libpq bin; do \ + $(FIND) $$i -name '*.[chyl]' -print | $(XARGS) $(ETAGS) -a ; \ + done + +# target to generate a backup tar file and split files that can be +# saved to 1.44M floppy +BACKUP: + rm -f BACKUP.filelist BACKUP.tgz; \ + $(FIND) . -not -path '*obj/*' -not -path '*data/*' -type f -print > BACKUP.filelist; \ + $(TAR) --files-from BACKUP.filelist -c -z -v -f BACKUP.tgz + $(SPLIT) --bytes=1400k BACKUP.tgz pgBACKUP. + +.PHONY: TAGS +.PHONY: BACKUP diff --git a/src/Makefile.global b/src/Makefile.global new file mode 100644 index 0000000000..1ecd62acce --- /dev/null +++ b/src/Makefile.global @@ -0,0 +1,306 @@ +#------------------------------------------------------------------------- +# +# Makefile.global-- +# global configuration for the Makefiles +# +# Copyright (c) 1994, Regents of the University of California +# +# +# IDENTIFICATION +# $Header: /cvsroot/pgsql/src/Attic/Makefile.global,v 1.1.1.1 1996/07/09 06:21:07 scrappy Exp $ +# +# NOTES +# This is seen by any Makefiles that include mk/postgres.mk. To +# override the default setting, create a Makefile.custom in this +# directory and put your defines there. (Makefile.custom is included +# at the end of this file.) +# +# If you change any of these defines you probably have to +# gmake clean; gmake +# since no dependecies are created for these. (of course you can +# be crafty and check what files really depend on them and just remake +# those). +# +#------------------------------------------------------------------------- + + +############################################################################## +# +# CONFIGURATION SECTION +# +# Following are settings pertaining to the postgres build and +# installation. The most important one is obviously the name +# of the port. + +# The name of the port. Valid choices are: +# alpha - DEC Alpha AXP on OSF/1 2.0 +# hpux - HP PA-RISC on HP-UX 9.0 +# sparc_solaris - SUN SPARC on Solaris 2.4 +# sparc - SUN SPARC on SunOS 4.1.3 +# ultrix4 - DEC MIPS on Ultrix 4.4 +# linux - Intel x86 on Linux 1.2 and Linux ELF +# (For non-ELF Linux, you need to comment out +# "LINUX_ELF=1" in src/mk/port/postgres.mk.linux) +# BSD44_derived - OSs derived from 4.4-lite BSD (NetBSD, FreeBSD) +# bsdi - BSD/OS 2.0 and 2.01 +# aix - IBM on AIX 3.2.5 +# irix5 - SGI MIPS on IRIX 5.3 +# Some hooks are provided for +# svr4 - Intel x86 on Intel SVR4 +# next - Motorola MC68K or Intel x86 on NeXTSTEP 3.2 +# but these are guaranteed not to work as of yet. +# +# XXX Note that you MUST set PORTNAME here (or on the command line) so +# that port-dependent variables are correctly set within this file. +# Makefile.custom does not take effect (for ifeq purposes) +# until after this file is processed! +# make sure that you have no whitespaces after the PORTNAME setting +# or the makefiles can get confused +PORTNAME= alpha + +# POSTGRESLOGIN is the login name of the user who gets special +# privileges within the database. By default it is "postgres", but +# you can change it to any existing login name (such as your own +# login if you are compiling a private version or don't have root +# access). +POSTGRESLOGIN= postgres + +# For convenience, POSTGRESDIR is where DATADIR, BINDIR, and LIBDIR +# and other target destinations are rooted. Of course, each of these is +# changable separately. +POSTGRESDIR= /private/postgres95 + +# SRCDIR specifies where the source files are. +SRCDIR= $(POSTGRESDIR)/src + +# DATADIR specifies where the postmaster expects to find its database. +# This may be overridden by command line options or the PGDATA environment +# variable. +DATADIR= $(POSTGRESDIR)/data + +# Where the postgres executables live (changeable by just putting them +# somewhere else and putting that directory in your shell PATH) +BINDIR= $(POSTGRESDIR)/bin + +# Where libpq.a gets installed. You must put it where your loader will +# look for it if you wish to use the -lpq convention. Otherwise you +# can just put the absolute pathname to the library at the end of your +# command line. +LIBDIR= $(POSTGRESDIR)/lib + +# This is the directory where IPC utilities ipcs and ipcrm are located +# +IPCSDIR= /usr/bin + +# Where the man pages (suitable for use with "man") get installed. +POSTMANDIR= $(POSTGRESDIR)/man + +# Where the formatted documents (e.g., the reference manual) get installed. +POSTDOCDIR= $(POSTGRESDIR)/doc + +# Where the header files necessary to build frontend programs get installed. +HEADERDIR= $(POSTGRESDIR)/include + +# NAMEDATALEN is the max length for system identifiers (e.g. table names, +# attribute names, function names, etc.) +# +# These MUST be set here. DO NOT COMMENT THESE OUT +# Setting these too high will result in excess space usage for system catalogs +# Setting them too low will make the system unusable. +# values between 16 and 64 that are multiples of four are recommended. +# +# NOTE also that databases with different NAMEDATALEN's cannot interoperate! +# +NAMEDATALEN = 32 +# OIDNAMELEN should be set to NAMEDATALEN + sizeof(Oid) +OIDNAMELEN = 36 + +CFLAGS+= -DNAMEDATALEN=$(NAMEDATALEN) -DOIDNAMELEN=$(OIDNAMELEN) + +############################################################################## +# +# FEATURES +# +# To disable a feature, comment out the entire definition +# (that is, prepend '#', don't set it to "0" or "no"). + +# Comment out ENFORCE_ALIGNMENT if you do NOT want unaligned access to +# multi-byte types to generate a bus error. +ENFORCE_ALIGNMENT= true + +# Comment out CDEBUG to turn off debugging and sanity-checking. +# +# XXX on MIPS, use -g3 if you want to compile with -O +CDEBUG= -g + +# turn this on if you prefer European style dates instead of American +# style dates +# EUROPEAN_DATES = 1 + +# Comment out PROFILE to disable profiling. +# +# XXX define on MIPS if you want to be able to use pixie. +# note that this disables dynamic loading! +#PROFILE= -p -non_shared + +# About the use of readline in psql: +# psql does not require the GNU readline and history libraries. Hence, we +# do not compile with them by default. However, there are hooks in the +# program which supports the use of GNU readline and history. Should you +# decide to use them, change USE_READLINE to true and change READLINE_INCDIR +# and READLINE_LIBDIR to reflect the location of the readline and histroy +# headers and libraries. +# +#USE_READLINE= true + +# directories for the readline and history libraries. +READLINE_INCDIR= /usr/local/include +HISTORY_INCDIR= /usr/local/include +READLINE_LIBDIR= /usr/local/lib +HISTORY_LIBDIR= /usr/local/lib + +# If you do not plan to use Host based authentication, +# comment out the following line +HBA = 1 + +ifdef HBA +HBAFLAGS= -DHBA +endif + + + +# If you plan to use Kerberos for authentication... +# +# Comment out KRBVERS if you do not use Kerberos. +# Set KRBVERS to "4" for Kerberos v4, "5" for Kerberos v5. +# XXX Edit the default Kerberos variables below! +# +#KRBVERS= 5 + + +# Globally pass Kerberos file locations. +# these are used in the postmaster and all libpq applications. +# +# Adjust KRBINCS and KRBLIBS to reflect where you have Kerberos +# include files and libraries installed. +# PG_KRB_SRVNAM is the name under which POSTGRES is registered in +# the Kerberos database (KDC). +# PG_KRB_SRVTAB is the location of the server's keytab file. +# +ifdef KRBVERS +KRBINCS= -I/usr/athena/include +KRBLIBS= -L/usr/athena/lib +KRBFLAGS+= $(KRBINCS) -DPG_KRB_SRVNAM='"postgres_dbms"' + ifeq ($(KRBVERS), 4) +KRBFLAGS+= -DKRB4 +KRBFLAGS+= -DPG_KRB_SRVTAB='"/etc/srvtab"' +KRBLIBS+= -lkrb -ldes + else + ifeq ($(KRBVERS), 5) +KRBFLAGS+= -DKRB5 +KRBFLAGS+= -DPG_KRB_SRVTAB='"FILE:/krb5/srvtab.postgres"' +KRBLIBS+= -lkrb5 -lcrypto -lcom_err -lisode + endif + endif +endif + +# +# location of Tcl/Tk headers and libraries +# +# Uncomment this to build the tcl utilities. +USE_TCL= true +# customize these to your site's needs +# +TCL_INCDIR= /usr/local/devel/tcl7.4/include +TCL_LIBDIR= /usr/local/devel/tcl7.4/lib +TCL_LIB = -ltcl7.4 +TK_INCDIR= /usr/local/devel/tk4.0/include +TK_LIBDIR= /usr/local/devel/tk4.0/lib +TK_LIB = -ltk4.0 + +# +# include port specific rules and variables. For instance: +# +# signal(2) handling - this is here because it affects some of +# the frontend commands as well as the backend server. +# +# Ultrix and SunOS provide BSD signal(2) semantics by default. +# +# SVID2 and POSIX signal(2) semantics differ from BSD signal(2) +# semantics. We can use the POSIX sigaction(2) on systems that +# allow us to request restartable signals (SA_RESTART). +# +# Some systems don't allow restartable signals at all unless we +# link to a special BSD library. +# +# We devoutly hope that there aren't any systems that provide +# neither POSIX signals nor BSD signals. The alternative +# is to do signal-handler reinstallation, which doesn't work well +# at all. +# +-include $(MKDIR)/port/postgres.mk.$(PORTNAME) + +############################################################################## +# +# Flags for CC and LD. (depend on CDEBUG and PROFILE) +# + +# Globally pass debugging/optimization/profiling flags based +# on the options selected above. +ifdef CDEBUG + CFLAGS+= $(CDEBUG) + LDFLAGS+= $(CDEBUG) +else + ifndef CFLAGS_OPT + CFLAGS_OPT= -O + endif + CFLAGS+= $(CFLAGS_OPT) +# +# Uncommenting this will make things go a LOT faster, but you will +# also lose a lot of useful error-checking. +# + CFLAGS+= -DNO_ASSERT_CHECKING +endif + +ifdef PROFILE +CFLAGS+= $(PROFILE) +LDFLAGS+= $(PROFILE) +endif + +# Globally pass PORTNAME +CFLAGS+= -DPORTNAME_$(PORTNAME) + +# Globally pass the default TCP port for postmaster(1). +CFLAGS+= -DPOSTPORT='"5432"' + +# include flags from mk/port/postgres.mk.$(PORTNAME) +CFLAGS+= $(CFLAGS_BE) +LDADD+= $(LDADD_BE) +LDFLAGS+= $(LDFLAGS_BE) + + +############################################################################## +# +# Miscellaneous configuration +# + +# This is the time, in seconds, at which a given backend server +# will wait on a lock before deciding to abort the transaction +# (this is what we do in lieu of deadlock detection). +# +# Low numbers are not recommended as they will tend to cause +# false aborts if many transactions are long-lived. +CFLAGS+= -DDEADLOCK_TIMEOUT=60 + +srcdir= $(SRCDIR) +includedir= $(HEADERDIR) +objdir= obj + + +############################################################################## +# +# Customization. +# +-include $(MKDIR)/../Makefile.custom + + diff --git a/src/backend/Makefile b/src/backend/Makefile new file mode 100644 index 0000000000..4cdc7adaf4 --- /dev/null +++ b/src/backend/Makefile @@ -0,0 +1,289 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for the postgres backend (and the postmaster) +# +# Copyright (c) 1994, Regents of the University of California +# +# +# IDENTIFICATION +# $Header: /cvsroot/pgsql/src/backend/Makefile,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $ +# +#------------------------------------------------------------------------- + +# +# The following turns on intermediate linking of partial objects to speed +# the link cycle during development. (To turn this off, put "BIGOBJS=false" +# in your custom makefile, ../Makefile.custom.) +BIGOBJS= true + + +PROG= postgres + +MKDIR= ../mk +include $(MKDIR)/postgres.mk + + +include $(CURDIR)/access/Makefile.inc +include $(CURDIR)/bootstrap/Makefile.inc +include $(CURDIR)/catalog/Makefile.inc +include $(CURDIR)/commands/Makefile.inc +include $(CURDIR)/executor/Makefile.inc +include $(CURDIR)/include/Makefile.inc +include $(CURDIR)/lib/Makefile.inc +include $(CURDIR)/libpq/Makefile.inc +include $(CURDIR)/main/Makefile.inc +include $(CURDIR)/nodes/Makefile.inc +include $(CURDIR)/optimizer/Makefile.inc +include $(CURDIR)/parser/Makefile.inc +include $(CURDIR)/port/Makefile.inc +include $(CURDIR)/postmaster/Makefile.inc +include $(CURDIR)/regex/Makefile.inc +include $(CURDIR)/rewrite/Makefile.inc +include $(CURDIR)/storage/Makefile.inc +include $(CURDIR)/tcop/Makefile.inc +include $(CURDIR)/tioga/Makefile.inc +include $(CURDIR)/utils/Makefile.inc + +SRCS:= ${SRCS_ACCESS} ${SRCS_BOOTSTRAP} $(SRCS_CATALOG) ${SRCS_COMMANDS} \ + ${SRCS_EXECUTOR} $(SRCS_LIB) $(SRCS_LIBPQ) ${SRCS_MAIN} \ + ${SRCS_NODES} ${SRCS_OPTIMIZER} ${SRCS_PARSER} ${SRCS_PORT} \ + $(SRCS_POSTMASTER) ${SRCS_REGEX} ${SRCS_REWRITE} ${SRCS_STORAGE} \ + ${SRCS_TCOP} ${SRCS_UTILS} + +ifeq ($(BIGOBJS), true) +OBJS= ACCESS.o BOOTSTRAP.o COMMANDS.o EXECUTOR.o MAIN.o MISC.o NODES.o \ + PARSER.o OPTIMIZER.o REGEX.o REWRITE.o STORAGE.o TCOP.o UTILS.o +CLEANFILES+= $(subst .s,.o,$(SRCS:.c=.o)) $(OBJS) +else +OBJS:= $(subst .s,.o,$(SRCS:%.c=$(objdir)/%.o)) +CLEANFILES+= $(notdir $(OBJS)) +endif + +############################################################################# +# +# TIOGA stuff +# +ifdef TIOGA +SRCS+= $(SRCS_TIOGA) + ifeq ($(BIGOBJS), true) +TIOGA.o: $(SRCS_TIOGA:%.c=$(objdir)/%.o) + $(make_partial) +OBJS+= TIOGA.o +CLEANFILES+= $(SRCS_TIOGA:%.c=%.o) TIOGA.o + else +OBJS+= $(SRCS_TIOGA:%.c=$(objdir)/%.o) + endif +endif + + +############################################################################# +# +# Compiling the postgres backend. +# +CFLAGS+= -DPOSTGRESDIR='"$(POSTGRESDIR)"' \ + -DPGDATADIR='"$(DATADIR)"' \ + -I$(CURDIR)/. -I$(CURDIR)/$(objdir) \ + -I$(CURDIR)/include \ + -I$(CURDIR)/port/$(PORTNAME) + +# turn this on if you prefer European style dates instead of American +# style dates +ifdef EUROPEAN_DATES +CFLAGS += -DEUROPEAN_STYLE +endif + +# kerberos flags +ifdef KRBVERS +CFLAGS+= $(KRBFLAGS) +LDADD+= $(KRBLIBS) +endif + +# host based access flags +ifdef HBA +CFLAGS+= $(HBAFLAGS) +endif + + + +# +# All systems except NEXTSTEP require the math library. +# Loader flags for system-dependent libraries are appended in +# src/backend/port/$(PORTNAME)/Makefile.inc +# +ifneq ($(PORTNAME), next) +LDADD+= -lm +endif + +# statically link in libc for linux +ifeq ($(PORTNAME), linux) +LDADD+= -lc +endif + +postgres: $(POSTGRES_DEPEND) $(OBJS) $(EXPORTS) + $(CC) $(LDFLAGS) -o $(objdir)/$(@F) $(addprefix $(objdir)/,$(notdir $(OBJS))) $(LDADD) + +# Make this target first if you are doing a parallel make. +# The targets in 'first' need to be made sequentially because of dependencies. +# Then, you can make 'all' with parallelism turned on. +first: $(POSTGRES_DEPEND) + + +############################################################################# +# +# Partial objects for platforms with slow linkers. +# +ifeq ($(BIGOBJS), true) + +OBJS_ACCESS:= $(SRCS_ACCESS:%.c=$(objdir)/%.o) +OBJS_BOOTSTRAP:= $(SRCS_BOOTSTRAP:%.c=$(objdir)/%.o) +OBJS_CATALOG:= $(SRCS_CATALOG:%.c=$(objdir)/%.o) +OBJS_COMMANDS:= $(SRCS_COMMANDS:%.c=$(objdir)/%.o) +OBJS_EXECUTOR:= $(SRCS_EXECUTOR:%.c=$(objdir)/%.o) +OBJS_MAIN:= $(SRCS_MAIN:%.c=$(objdir)/%.o) +OBJS_POSTMASTER:= $(SRCS_POSTMASTER:%.c=$(objdir)/%.o) +OBJS_LIB:= $(SRCS_LIB:%.c=$(objdir)/%.o) +OBJS_LIBPQ:= $(SRCS_LIBPQ:%.c=$(objdir)/%.o) +OBJS_PORT:= $(addprefix $(objdir)/,$(subst .s,.o,$(SRCS_PORT:.c=.o))) +OBJS_NODES:= $(SRCS_NODES:%.c=$(objdir)/%.o) +OBJS_PARSER:= $(SRCS_PARSER:%.c=$(objdir)/%.o) +OBJS_OPTIMIZER:= $(SRCS_OPTIMIZER:%.c=$(objdir)/%.o) +OBJS_REGEX:= $(SRCS_REGEX:%.c=$(objdir)/%.o) +OBJS_REWRITE:= $(SRCS_REWRITE:%.c=$(objdir)/%.o) +OBJS_STORAGE:= $(SRCS_STORAGE:%.c=$(objdir)/%.o) +OBJS_TCOP:= $(SRCS_TCOP:%.c=$(objdir)/%.o) +OBJS_UTILS:= $(SRCS_UTILS:%.c=$(objdir)/%.o) + +ACCESS.o: $(OBJS_ACCESS) + $(make_partial) +BOOTSTRAP.o: $(OBJS_BOOTSTRAP) + $(make_partial) +COMMANDS.o: $(OBJS_COMMANDS) + $(make_partial) +EXECUTOR.o: $(OBJS_EXECUTOR) + $(make_partial) +MAIN.o: $(OBJS_MAIN) $(OBJS_POSTMASTER) + $(make_partial) +MISC.o: $(OBJS_CATALOG) $(OBJS_LIB) $(OBJS_LIBPQ) $(OBJS_PORT) + $(make_partial) +NODES.o: $(OBJS_NODES) + $(make_partial) +PARSER.o: $(OBJS_PARSER) + $(make_partial) +OPTIMIZER.o: $(OBJS_OPTIMIZER) + $(make_partial) +REGEX.o: $(OBJS_REGEX) + $(make_partial) +REWRITE.o: $(OBJS_REWRITE) + $(make_partial) +STORAGE.o: $(OBJS_STORAGE) + $(make_partial) +TCOP.o: $(OBJS_TCOP) + $(make_partial) +UTILS.o: $(OBJS_UTILS) + $(make_partial) +endif + +############################################################################# +# +# Installation. +# +# Install the bki files to the data directory. We also copy a version +# of them that has "PGUID" intact, so one can change the value of the +# postgres userid before running initdb in the case of customizing the +# binary release (i.e., fixing up PGUID w/o recompiling the system). +# Those files are copied out as foo.source. The program newbki(1) can +# be run later to reset the postgres login id (but it must be run before +# initdb is run, or after clearing the data directory with +# cleardbdir(1)). [newbki distributed with v4r2 but not with Postgres95.] +# + +# NAMEDATALEN=`egrep "^#define NAMEDATALEN" $(CURDIR)/include/postgres.h | awk '{print $$3}'`; \ +# OIDNAMELEN=`egrep "^#define OIDNAMELEN" $(CURDIR)/include/postgres.h | awk '{print $$3}'`; \ + +install: beforeinstall pg_id $(BKIFILES) postgres + $(INSTALL) $(INSTL_EXE_OPTS) $(objdir)/postgres $(DESTDIR)$(BINDIR)/postgres + @rm -f $(DESTDIR)$(BINDIR)/postmaster + cd $(DESTDIR)$(BINDIR); ln -s postgres postmaster + @cd $(objdir); \ + PG_UID=`./pg_id $(POSTGRESLOGIN)`; \ + POSTGRESLOGIN=$(POSTGRESLOGIN);\ + echo "NAMEDATALEN = $(NAMEDATALEN)"; \ + echo "OIDNAMELEN = $(OIDNAMELEN)"; \ + case $$PG_UID in "NOUSER") \ + echo "Warning: no account named $(POSTGRESLOGIN), using yours";\ + POSTGRESLOGIN=`whoami`; \ + PG_UID=`./pg_id`;; \ + esac ;\ + for bki in $(BKIFILES); do \ + sed \ + -e "s/postgres PGUID/$$POSTGRESLOGIN $$PG_UID/" \ + -e "s/NAMEDATALEN/$(NAMEDATALEN)/g" \ + -e "s/OIDNAMELEN/$(OIDNAMELEN)/g" \ + -e "s/PGUID/$$PG_UID/" \ + < $$bki > $$bki.sed ; \ + echo "Installing $(DESTDIR)$(DATADIR)/files/$$bki."; \ + $(INSTALL) $(INSTLOPTS) \ + $$bki.sed $(DESTDIR)$(DATADIR)/files/$$bki; \ + rm -f $$bki.sed; \ + echo "Installing $(DESTDIR)$(DATADIR)/files/$$bki.source."; \ + $(INSTALL) $(INSTLOPTS) \ + $$bki $(DESTDIR)$(DATADIR)/files/$$bki.source; \ + done; + @echo "Installing $(DATADIR)/pg_hba"; + @cp $(srcdir)/libpq/pg_hba $(DATADIR) + @chmod 644 $(DATADIR)/pg_hba + + +# so we can get the UID of the postgres owner (w/o moving pg_id to +# src/tools). We just want the vanilla LDFLAGS for pg_id +IDLDFLAGS:= $(LDFLAGS) +ifeq ($(PORTNAME), hpux) +ifeq ($(CC), cc) +IDLDFLAGS+= -Aa -D_HPUX_SOURCE +endif +endif +pg_id: $(srcdir)/bin/pg_id/pg_id.c + $(CC) $(IDLDFLAGS) -o $(objdir)/$(@F) $< + +CLEANFILES+= pg_id postgres + + +############################################################################# +# +# Support for code development. +# + +# +# Build the file, "./ID", used by the "gid" (grep-for-identifier) tool +# +IDFILE= ID +.PHONY: $(IDFILE) +$(IDFILE): + $(CURDIR)/makeID $(PORTNAME) + +# +# Special rule to generate cpp'd version of a .c file. This is +# especially useful given all the hellish macro processing going on. +# The cpp'd version has a .C suffix. To create foo.C from foo.c, just +# type +# bmake foo.C +# +%.cpp: %.c + $(CC) -E $(CFLAGS) $(<:.C=.c) | cat -s | cb | tr -s '\012*' '\012' > $(objdir)/$(@F) + +cppall: $(SRCS:.c=.cpp) + +# +# To use Purify (SunOS only), define PURIFY to be the path (and +# options) with which to invoke the Purify loader. Only the executable +# needs to be loaded with Purify. +# +# PURIFY = /usr/sww/bin/purify -cache-dir=/usr/local/postgres/src/backend/purify-cache +#.if defined(PURIFY) +#${PROG}: $(POSTGRES_DEPEND) $(OBJS) $(EXPORTS) +# ${PURIFY} ${CC} ${LDFLAGS} -o $(objdir)/$(@F) $(addprefix $(objdir)/,$(notdir $(OBJS))) $(LDADD) +# +#CLEANFILES+= .purify* .pure .lock.*.o *_pure_*.o *.pure_*link* +#.endif + diff --git a/src/backend/access/Makefile.inc b/src/backend/access/Makefile.inc new file mode 100644 index 0000000000..6adc2c692b --- /dev/null +++ b/src/backend/access/Makefile.inc @@ -0,0 +1,35 @@ +#------------------------------------------------------------------------- +# +# Makefile.inc-- +# Makefile for the access methods module +# +# Copyright (c) 1994, Regents of the University of California +# +# +# IDENTIFICATION +# $Header: /cvsroot/pgsql/src/backend/access/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $ +# +#------------------------------------------------------------------------- + +accdir=$(CURDIR)/access +VPATH:=$(VPATH):$(accdir):\ + $(accdir)/common:$(accdir)/hash:$(accdir)/heap:$(accdir)/index:\ + $(accdir)/rtree:$(accdir)/nbtree:$(accdir)/transam + + +SUBSRCS= +include $(accdir)/common/Makefile.inc +include $(accdir)/hash/Makefile.inc +include $(accdir)/heap/Makefile.inc +include $(accdir)/index/Makefile.inc +include $(accdir)/rtree/Makefile.inc +include $(accdir)/nbtree/Makefile.inc +include $(accdir)/transam/Makefile.inc +SRCS_ACCESS:= $(SUBSRCS) + +HEADERS+= attnum.h funcindex.h genam.h hash.h \ + heapam.h hio.h htup.h ibit.h iqual.h istrat.h \ + itup.h nbtree.h printtup.h relscan.h rtree.h \ + sdir.h skey.h strat.h transam.h tupdesc.h tupmacs.h \ + valid.h xact.h + diff --git a/src/backend/access/attnum.h b/src/backend/access/attnum.h new file mode 100644 index 0000000000..7c999e58e9 --- /dev/null +++ b/src/backend/access/attnum.h @@ -0,0 +1,61 @@ +/*------------------------------------------------------------------------- + * + * attnum.h-- + * POSTGRES attribute number definitions. + * + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: attnum.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef ATTNUM_H +#define ATTNUM_H + +#include "c.h" + +/* + * user defined attribute numbers start at 1. -ay 2/95 + */ +typedef int16 AttrNumber; + +#define InvalidAttrNumber 0 + +/* ---------------- + * support macros + * ---------------- + */ +/* + * AttributeNumberIsValid -- + * True iff the attribute number is valid. + */ +#define AttributeNumberIsValid(attributeNumber) \ + ((bool) ((attributeNumber) != InvalidAttrNumber)) + +/* + * AttrNumberIsForUserDefinedAttr -- + * True iff the attribute number corresponds to an user defined attribute. + */ +#define AttrNumberIsForUserDefinedAttr(attributeNumber) \ + ((bool) ((attributeNumber) > 0)) + +/* + * AttrNumberGetAttrOffset -- + * Returns the attribute offset for an attribute number. + * + * Note: + * Assumes the attribute number is for an user defined attribute. + */ +#define AttrNumberGetAttrOffset(attNum) \ + (AssertMacro(AttrNumberIsForUserDefinedAttr(attNum)) ? \ + ((attNum - 1)) : 0) + +/* + * AttributeOffsetGetAttributeNumber -- + * Returns the attribute number for an attribute offset. + */ +#define AttrOffsetGetAttrNumber(attributeOffset) \ + ((AttrNumber) (1 + attributeOffset)) + +#endif /* ATTNUM_H */ diff --git a/src/backend/access/common/Makefile.inc b/src/backend/access/common/Makefile.inc new file mode 100644 index 0000000000..5d5dd47627 --- /dev/null +++ b/src/backend/access/common/Makefile.inc @@ -0,0 +1,16 @@ +#------------------------------------------------------------------------- +# +# Makefile.inc-- +# Makefile for access/common +# +# Copyright (c) 1994, Regents of the University of California +# +# +# IDENTIFICATION +# $Header: /cvsroot/pgsql/src/backend/access/common/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $ +# +#------------------------------------------------------------------------- + +SUBSRCS+= heaptuple.c heapvalid.c indextuple.c indexvalid.c printtup.c \ + scankey.c tupdesc.c + diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c new file mode 100644 index 0000000000..c3e72fb97e --- /dev/null +++ b/src/backend/access/common/heaptuple.c @@ -0,0 +1,1011 @@ +/*------------------------------------------------------------------------- + * + * heaptuple.c-- + * This file contains heap tuple accessor and mutator routines, as well + * as a few various tuple utilities. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/common/heaptuple.c,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $ + * + * NOTES + * The old interface functions have been converted to macros + * and moved to heapam.h + * + *------------------------------------------------------------------------- + */ +#include <string.h> + +#include "postgres.h" + +#include "access/htup.h" +#include "access/itup.h" +#include "access/tupmacs.h" +#include "access/skey.h" +#include "storage/ipc.h" +#include "storage/buf.h" +#include "storage/bufmgr.h" +#include "access/transam.h" +#include "storage/bufpage.h" /* for MAXTUPLEN */ +#include "storage/itemptr.h" +#include "utils/memutils.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/rel.h" +#include "utils/nabstime.h" + +/* this is so the sparcstation debugger works */ + +#ifndef NO_ASSERT_CHECKING +#ifdef sparc +#define register +#endif /* sparc */ +#endif /* NO_ASSERT_CHECKING */ + +/* ---------------------------------------------------------------- + * misc support routines + * ---------------------------------------------------------------- + */ + +/* ---------------- + * ComputeDataSize + * ---------------- + */ +Size +ComputeDataSize(TupleDesc tupleDesc, + Datum value[], + char nulls[]) +{ + uint32 length; + int i; + int numberOfAttributes = tupleDesc->natts; + AttributeTupleForm *att = tupleDesc->attrs; + + for (length = 0, i = 0; i < numberOfAttributes; i++) { + if (nulls[i] != ' ') continue; + + switch (att[i]->attlen) { + case -1: + /* + * This is the size of the disk representation and so + * must include the additional sizeof long. + */ + if (att[i]->attalign == 'd') { + length = DOUBLEALIGN(length) + + VARSIZE(DatumGetPointer(value[i])); + } else { + length = INTALIGN(length) + + VARSIZE(DatumGetPointer(value[i])); + } + break; + case sizeof(char): + length++; + break; + case sizeof(short): + length = SHORTALIGN(length + sizeof(short)); + break; + case sizeof(int32): + length = INTALIGN(length + sizeof(int32)); + break; + default: + if (att[i]->attlen < sizeof(int32)) + elog(WARN, "ComputeDataSize: attribute %d has len %d", + i, att[i]->attlen); + if (att[i]->attalign == 'd') + length = DOUBLEALIGN(length) + att[i]->attlen; + else + length = LONGALIGN(length) + att[i]->attlen; + break; + } + } + + return length; +} + +/* ---------------- + * DataFill + * ---------------- + */ +void +DataFill(char *data, + TupleDesc tupleDesc, + Datum value[], + char nulls[], + char *infomask, + bits8 bit[]) +{ + bits8 *bitP; + int bitmask; + uint32 length; + int i; + int numberOfAttributes = tupleDesc->natts; + AttributeTupleForm* att = tupleDesc->attrs; + + if (bit != NULL) { + bitP = &bit[-1]; + bitmask = CSIGNBIT; + } + + *infomask = 0; + + for (i = 0; i < numberOfAttributes; i++) { + if (bit != NULL) { + if (bitmask != CSIGNBIT) { + bitmask <<= 1; + } else { + bitP += 1; + *bitP = 0x0; + bitmask = 1; + } + + if (nulls[i] == 'n') { + *infomask |= HEAP_HASNULL; + continue; + } + + *bitP |= bitmask; + } + + switch (att[i]->attlen) { + case -1: + *infomask |= HEAP_HASVARLENA; + if (att[i]->attalign=='d') { + data = (char *) DOUBLEALIGN(data); + } else { + data = (char *) INTALIGN(data); + } + length = VARSIZE(DatumGetPointer(value[i])); + memmove(data, DatumGetPointer(value[i]),length); + data += length; + break; + case sizeof(char): + *data = att[i]->attbyval ? + DatumGetChar(value[i]) : *((char *) value[i]); + data += sizeof(char); + break; + case sizeof(int16): + data = (char *) SHORTALIGN(data); + * (short *) data = (att[i]->attbyval ? + DatumGetInt16(value[i]) : + *((short *) value[i])); + data += sizeof(short); + break; + case sizeof(int32): + data = (char *) INTALIGN(data); + * (int32 *) data = (att[i]->attbyval ? + DatumGetInt32(value[i]) : + *((int32 *) value[i])); + data += sizeof(int32); + break; + default: + if (att[i]->attlen < sizeof(int32)) + elog(WARN, "DataFill: attribute %d has len %d", + i, att[i]->attlen); + if (att[i]->attalign == 'd') { + data = (char *) DOUBLEALIGN(data); + memmove(data, DatumGetPointer(value[i]), + att[i]->attlen); + data += att[i]->attlen; + } else { + data = (char *) LONGALIGN(data); + memmove(data, DatumGetPointer(value[i]), + att[i]->attlen); + data += att[i]->attlen; + } + + } + } +} + +/* ---------------------------------------------------------------- + * heap tuple interface + * ---------------------------------------------------------------- + */ + +/* ---------------- + * heap_attisnull - returns 1 iff tuple attribute is not present + * ---------------- + */ +int +heap_attisnull(HeapTuple tup, int attnum) +{ + if (attnum > (int)tup->t_natts) + return (1); + + if (HeapTupleNoNulls(tup)) return(0); + + if (attnum > 0) { + return(att_isnull(attnum - 1, tup->t_bits)); + } else + switch (attnum) { + case SelfItemPointerAttributeNumber: + case ObjectIdAttributeNumber: + case MinTransactionIdAttributeNumber: + case MinCommandIdAttributeNumber: + case MaxTransactionIdAttributeNumber: + case MaxCommandIdAttributeNumber: + case ChainItemPointerAttributeNumber: + case AnchorItemPointerAttributeNumber: + case MinAbsoluteTimeAttributeNumber: + case MaxAbsoluteTimeAttributeNumber: + case VersionTypeAttributeNumber: + break; + + case 0: + elog(WARN, "heap_attisnull: zero attnum disallowed"); + + default: + elog(WARN, "heap_attisnull: undefined negative attnum"); + } + + return (0); +} + +/* ---------------------------------------------------------------- + * system attribute heap tuple support + * ---------------------------------------------------------------- + */ + +/* ---------------- + * heap_sysattrlen + * + * This routine returns the length of a system attribute. + * ---------------- + */ +int +heap_sysattrlen(AttrNumber attno) +{ + HeapTupleData *f = NULL; + int len; + + switch (attno) { + case SelfItemPointerAttributeNumber: + len = sizeof f->t_ctid; + break; + case ObjectIdAttributeNumber: + len = sizeof f->t_oid; + break; + case MinTransactionIdAttributeNumber: + len = sizeof f->t_xmin; + break; + case MinCommandIdAttributeNumber: + len = sizeof f->t_cmin; + break; + case MaxTransactionIdAttributeNumber: + len = sizeof f->t_xmax; + break; + case MaxCommandIdAttributeNumber: + len = sizeof f->t_cmax; + break; + case ChainItemPointerAttributeNumber: + len = sizeof f->t_chain; + break; + case AnchorItemPointerAttributeNumber: + elog(WARN, "heap_sysattrlen: field t_anchor does not exist!"); + break; + case MinAbsoluteTimeAttributeNumber: + len = sizeof f->t_tmin; + break; + case MaxAbsoluteTimeAttributeNumber: + len = sizeof f->t_tmax; + break; + case VersionTypeAttributeNumber: + len = sizeof f->t_vtype; + break; + default: + elog(WARN, "sysattrlen: System attribute number %d unknown.", + attno); + len = 0; + break; + } + return (len); +} + +/* ---------------- + * heap_sysattrbyval + * + * This routine returns the "by-value" property of a system attribute. + * ---------------- + */ +bool +heap_sysattrbyval(AttrNumber attno) +{ + bool byval; + + switch (attno) { + case SelfItemPointerAttributeNumber: + byval = false; + break; + case ObjectIdAttributeNumber: + byval = true; + break; + case MinTransactionIdAttributeNumber: + byval = true; + break; + case MinCommandIdAttributeNumber: + byval = true; + break; + case MaxTransactionIdAttributeNumber: + byval = true; + break; + case MaxCommandIdAttributeNumber: + byval = true; + break; + case ChainItemPointerAttributeNumber: + byval = false; + break; + case AnchorItemPointerAttributeNumber: + byval = false; + break; + case MinAbsoluteTimeAttributeNumber: + byval = true; + break; + case MaxAbsoluteTimeAttributeNumber: + byval = true; + break; + case VersionTypeAttributeNumber: + byval = true; + break; + default: + byval = true; + elog(WARN, "sysattrbyval: System attribute number %d unknown.", + attno); + break; + } + + return byval; +} + +/* ---------------- + * heap_getsysattr + * ---------------- + */ +char * +heap_getsysattr(HeapTuple tup, Buffer b, int attnum) +{ + switch (attnum) { + case SelfItemPointerAttributeNumber: + return ((char *)&tup->t_ctid); + case ObjectIdAttributeNumber: + return ((char *) (long) tup->t_oid); + case MinTransactionIdAttributeNumber: + return ((char *) (long) tup->t_xmin); + case MinCommandIdAttributeNumber: + return ((char *) (long) tup->t_cmin); + case MaxTransactionIdAttributeNumber: + return ((char *) (long) tup->t_xmax); + case MaxCommandIdAttributeNumber: + return ((char *) (long) tup->t_cmax); + case ChainItemPointerAttributeNumber: + return ((char *) &tup->t_chain); + case AnchorItemPointerAttributeNumber: + elog(WARN, "heap_getsysattr: t_anchor does not exist!"); + break; + + /* + * For tmin and tmax, we need to do some extra work. These don't + * get filled in until the vacuum cleaner runs (or we manage to flush + * a page after setting the value correctly below). If the vacuum + * cleaner hasn't run yet, then the times stored in the tuple are + * wrong, and we need to look up the commit time of the transaction. + * We cache this value in the tuple to avoid doing the work more than + * once. + */ + + case MinAbsoluteTimeAttributeNumber: + if (!AbsoluteTimeIsBackwardCompatiblyValid(tup->t_tmin) && + TransactionIdDidCommit(tup->t_xmin)) + tup->t_tmin = TransactionIdGetCommitTime(tup->t_xmin); + return ((char *) (long) tup->t_tmin); + case MaxAbsoluteTimeAttributeNumber: + if (!AbsoluteTimeIsBackwardCompatiblyReal(tup->t_tmax)) { + if (TransactionIdDidCommit(tup->t_xmax)) + tup->t_tmax = TransactionIdGetCommitTime(tup->t_xmax); + else + tup->t_tmax = CURRENT_ABSTIME; + } + return ((char *) (long) tup->t_tmax); + case VersionTypeAttributeNumber: + return ((char *) (long) tup->t_vtype); + default: + elog(WARN, "heap_getsysattr: undefined attnum %d", attnum); + } + return(NULL); +} + +/* ---------------- + * fastgetattr + * + * This is a newer version of fastgetattr which attempts to be + * faster by caching attribute offsets in the attribute descriptor. + * + * an alternate way to speed things up would be to cache offsets + * with the tuple, but that seems more difficult unless you take + * the storage hit of actually putting those offsets into the + * tuple you send to disk. Yuck. + * + * This scheme will be slightly slower than that, but should + * preform well for queries which hit large #'s of tuples. After + * you cache the offsets once, examining all the other tuples using + * the same attribute descriptor will go much quicker. -cim 5/4/91 + * ---------------- + */ +char * +fastgetattr(HeapTuple tup, + int attnum, + TupleDesc tupleDesc, + bool *isnull) +{ + char *tp; /* ptr to att in tuple */ + bits8 *bp; /* ptr to att in tuple */ + int slow; /* do we have to walk nulls? */ + AttributeTupleForm *att = tupleDesc->attrs; + + /* ---------------- + * sanity checks + * ---------------- + */ + + Assert(PointerIsValid(isnull)); + Assert(attnum > 0); + + /* ---------------- + * Three cases: + * + * 1: No nulls and no variable length attributes. + * 2: Has a null or a varlena AFTER att. + * 3: Has nulls or varlenas BEFORE att. + * ---------------- + */ + + *isnull = false; + + if (HeapTupleNoNulls(tup)) { + attnum--; + if (att[attnum]->attcacheoff > 0) { + return (char *) + fetchatt( &(att[attnum]), + (char *)tup + tup->t_hoff + att[attnum]->attcacheoff); + } else if (attnum == 0) { + /* + * first attribute is always at position zero + */ + return((char *) fetchatt(&(att[0]), (char *) tup + tup->t_hoff)); + } + + tp = (char *) tup + tup->t_hoff; + + slow = 0; + } else { + /* + * there's a null somewhere in the tuple + */ + + bp = tup->t_bits; + tp = (char *) tup + tup->t_hoff; + slow = 0; + attnum--; + + /* ---------------- + * check to see if desired att is null + * ---------------- + */ + + if (att_isnull(attnum, bp)) { + *isnull = true; + return NULL; + } + + /* ---------------- + * Now check to see if any preceeding bits are null... + * ---------------- + */ + + { + register int i = 0; /* current offset in bp */ + + for (i = 0; i < attnum && !slow; i++) { + if (att_isnull(i, bp)) slow = 1; + } + } + } + + /* + * now check for any non-fixed length attrs before our attribute + */ + if (!slow) { + if (att[attnum]->attcacheoff > 0) { + return (char *) + fetchatt(&(att[attnum]), + tp + att[attnum]->attcacheoff); + } else if (attnum == 0) { + return (char *) + fetchatt(&(att[0]), (char *) tup + tup->t_hoff); + } else if (!HeapTupleAllFixed(tup)) { + register int j = 0; + + for (j = 0; j < attnum && !slow; j++) + if (att[j]->attlen < 1) slow = 1; + } + } + + /* + * if slow is zero, and we got here, we know that we have a tuple with + * no nulls. We also have to initialize the remainder of + * the attribute cached offset values. + */ + if (!slow) { + register int j = 1; + register long off; + + /* + * need to set cache for some atts + */ + + att[0]->attcacheoff = 0; + + while (att[j]->attcacheoff > 0) j++; + + off = att[j-1]->attcacheoff + att[j-1]->attlen; + + for (; j < attnum + 1; j++) { + switch(att[j]->attlen) { + case -1: + off = (att[j]->attalign=='d') ? + DOUBLEALIGN(off) : INTALIGN(off); + break; + case sizeof(char): + break; + case sizeof(short): + off = SHORTALIGN(off); + break; + case sizeof(int32): + off = INTALIGN(off); + break; + default: + if (att[j]->attlen < sizeof(int32)) { + elog(WARN, + "fastgetattr: attribute %d has len %d", + j, att[j]->attlen); + } + if (att[j]->attalign == 'd') + off = DOUBLEALIGN(off); + else + off = LONGALIGN(off); + break; + } + + att[j]->attcacheoff = off; + off += att[j]->attlen; + } + + return + (char *)fetchatt(&(att[attnum]), tp + att[attnum]->attcacheoff); + } else { + register bool usecache = true; + register int off = 0; + register int i; + + /* + * Now we know that we have to walk the tuple CAREFULLY. + * + * Note - This loop is a little tricky. On iteration i we + * first set the offset for attribute i and figure out how much + * the offset should be incremented. Finally, we need to align the + * offset based on the size of attribute i+1 (for which the offset + * has been computed). -mer 12 Dec 1991 + */ + + for (i = 0; i < attnum; i++) { + if (!HeapTupleNoNulls(tup)) { + if (att_isnull(i, bp)) { + usecache = false; + continue; + } + } + switch (att[i]->attlen) { + case -1: + off = (att[i]->attalign=='d') ? + DOUBLEALIGN(off) : INTALIGN(off); + break; + case sizeof(char): + break; + case sizeof(short): + off = SHORTALIGN(off); + break; + case sizeof(int32): + off = INTALIGN(off); + break; + default: + if (att[i]->attlen < sizeof(int32)) + elog(WARN, + "fastgetattr2: attribute %d has len %d", + i, att[i]->attlen); + if (att[i]->attalign == 'd') + off = DOUBLEALIGN(off); + else + off = LONGALIGN(off); + break; + } + if (usecache && att[i]->attcacheoff > 0) { + off = att[i]->attcacheoff; + if (att[i]->attlen == -1) { + usecache = false; + } + } else { + if (usecache) att[i]->attcacheoff = off; + } + + switch(att[i]->attlen) { + case sizeof(char): + off++; + break; + case sizeof(int16): + off += sizeof(int16); + break; + case sizeof(int32): + off += sizeof(int32); + break; + case -1: + usecache = false; + off += VARSIZE(tp + off); + break; + default: + off += att[i]->attlen; + break; + } + } + switch (att[attnum]->attlen) { + case -1: + off = (att[attnum]->attalign=='d')? + DOUBLEALIGN(off) : INTALIGN(off); + break; + case sizeof(char): + break; + case sizeof(short): + off = SHORTALIGN(off); + break; + case sizeof(int32): + off = INTALIGN(off); + break; + default: + if (att[attnum]->attlen < sizeof(int32)) + elog(WARN, "fastgetattr3: attribute %d has len %d", + attnum, att[attnum]->attlen); + if (att[attnum]->attalign == 'd') + off = DOUBLEALIGN(off); + else + off = LONGALIGN(off); + break; + } + return((char *) fetchatt(&(att[attnum]), tp + off)); + } +} + +/* ---------------- + * heap_getattr + * + * returns an attribute from a heap tuple. uses + * ---------------- + */ +char * +heap_getattr(HeapTuple tup, + Buffer b, + int attnum, + TupleDesc tupleDesc, + bool *isnull) +{ + bool localIsNull; + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(tup != NULL); + + if (! PointerIsValid(isnull)) + isnull = &localIsNull; + + if (attnum > (int) tup->t_natts) { + *isnull = true; + return ((char *) NULL); + } + + /* ---------------- + * take care of user defined attributes + * ---------------- + */ + if (attnum > 0) { + char *datum; + datum = fastgetattr(tup, attnum, tupleDesc, isnull); + + return (datum); + } + + /* ---------------- + * take care of system attributes + * ---------------- + */ + *isnull = false; + return + heap_getsysattr(tup, b, attnum); +} + +/* ---------------- + * heap_copytuple + * + * returns a copy of an entire tuple + * ---------------- + */ +HeapTuple +heap_copytuple(HeapTuple tuple) +{ + HeapTuple newTuple; + + if (! HeapTupleIsValid(tuple)) + return (NULL); + + /* XXX For now, just prevent an undetectable executor related error */ + if (tuple->t_len > MAXTUPLEN) { + elog(WARN, "palloctup: cannot handle length %d tuples", + tuple->t_len); + } + + newTuple = (HeapTuple) palloc(tuple->t_len); + memmove((char *) newTuple, (char *) tuple, (int) tuple->t_len); + return(newTuple); +} + +/* ---------------- + * heap_deformtuple + * + * the inverse of heap_formtuple (see below) + * ---------------- + */ +void +heap_deformtuple(HeapTuple tuple, + TupleDesc tdesc, + Datum values[], + char nulls[]) +{ + int i; + int natts; + + Assert(HeapTupleIsValid(tuple)); + + natts = tuple->t_natts; + for (i = 0; i<natts; i++) { + bool isnull; + + values[i] = (Datum)heap_getattr(tuple, + InvalidBuffer, + i+1, + tdesc, + &isnull); + if (isnull) + nulls[i] = 'n'; + else + nulls[i] = ' '; + } +} + +/* ---------------- + * heap_formtuple + * + * constructs a tuple from the given value[] and null[] arrays + * + * old comments + * Handles alignment by aligning 2 byte attributes on short boundries + * and 3 or 4 byte attributes on long word boundries on a vax; and + * aligning non-byte attributes on short boundries on a sun. Does + * not properly align fixed length arrays of 1 or 2 byte types (yet). + * + * Null attributes are indicated by a 'n' in the appropriate byte + * of the null[]. Non-null attributes are indicated by a ' ' (space). + * + * Fix me. (Figure that must keep context if debug--allow give oid.) + * Assumes in order. + * ---------------- + */ +HeapTuple +heap_formtuple(TupleDesc tupleDescriptor, + Datum value[], + char nulls[]) +{ + char *tp; /* tuple pointer */ + HeapTuple tuple; /* return tuple */ + int bitmaplen; + long len; + int hoff; + bool hasnull = false; + int i; + int numberOfAttributes = tupleDescriptor->natts; + + len = sizeof *tuple - sizeof tuple->t_bits; + + for (i = 0; i < numberOfAttributes && !hasnull; i++) { + if (nulls[i] != ' ') hasnull = true; + } + + if (numberOfAttributes > MaxHeapAttributeNumber) + elog(WARN, "heap_formtuple: numberOfAttributes of %d > %d", + numberOfAttributes, MaxHeapAttributeNumber); + + if (hasnull) { + bitmaplen = BITMAPLEN(numberOfAttributes); + len += bitmaplen; + } + + hoff = len = DOUBLEALIGN(len); /* be conservative here */ + + len += ComputeDataSize(tupleDescriptor, value, nulls); + + tp = (char *) palloc(len); + tuple = (HeapTuple) tp; + + memset(tp, 0, (int)len); + + tuple->t_len = len; + tuple->t_natts = numberOfAttributes; + tuple->t_hoff = hoff; + tuple->t_tmin = INVALID_ABSTIME; + tuple->t_tmax = CURRENT_ABSTIME; + + DataFill((char *)tuple + tuple->t_hoff, + tupleDescriptor, + value, + nulls, + &tuple->t_infomask, + (hasnull ? tuple->t_bits : NULL)); + + return (tuple); +} + +/* ---------------- + * heap_modifytuple + * + * forms a new tuple from an old tuple and a set of replacement values. + * ---------------- + */ +HeapTuple +heap_modifytuple(HeapTuple tuple, + Buffer buffer, + Relation relation, + Datum replValue[], + char replNull[], + char repl[]) +{ + int attoff; + int numberOfAttributes; + Datum *value; + char *nulls; + bool isNull; + HeapTuple newTuple; + int madecopy; + uint8 infomask; + + /* ---------------- + * sanity checks + * ---------------- + */ + Assert(HeapTupleIsValid(tuple)); + Assert(BufferIsValid(buffer) || RelationIsValid(relation)); + Assert(HeapTupleIsValid(tuple)); + Assert(PointerIsValid(replValue)); + Assert(PointerIsValid(replNull)); + Assert(PointerIsValid(repl)); + + /* ---------------- + * if we're pointing to a disk page, then first + * make a copy of our tuple so that all the attributes + * are available. XXX this is inefficient -cim + * ---------------- + */ + madecopy = 0; + if (BufferIsValid(buffer) == true) { + relation = (Relation) BufferGetRelation(buffer); + tuple = heap_copytuple(tuple); + madecopy = 1; + } + + numberOfAttributes = RelationGetRelationTupleForm(relation)->relnatts; + + /* ---------------- + * allocate and fill value[] and nulls[] arrays from either + * the tuple or the repl information, as appropriate. + * ---------------- + */ + value = (Datum *) palloc(numberOfAttributes * sizeof *value); + nulls = (char *) palloc(numberOfAttributes * sizeof *nulls); + + for (attoff = 0; + attoff < numberOfAttributes; + attoff += 1) { + + if (repl[attoff] == ' ') { + char *attr; + + attr = + heap_getattr(tuple, + InvalidBuffer, + AttrOffsetGetAttrNumber(attoff), + RelationGetTupleDescriptor(relation), + &isNull) ; + value[attoff] = PointerGetDatum(attr); + nulls[attoff] = (isNull) ? 'n' : ' '; + + } else if (repl[attoff] != 'r') { + elog(WARN, "heap_modifytuple: repl is \\%3d", repl[attoff]); + + } else { /* == 'r' */ + value[attoff] = replValue[attoff]; + nulls[attoff] = replNull[attoff]; + } + } + + /* ---------------- + * create a new tuple from the values[] and nulls[] arrays + * ---------------- + */ + newTuple = heap_formtuple(RelationGetTupleDescriptor(relation), + value, + nulls); + + /* ---------------- + * copy the header except for t_len, t_natts, t_hoff, t_bits, t_infomask + * ---------------- + */ + infomask = newTuple->t_infomask; + memmove((char *) &newTuple->t_ctid, /*XXX*/ + (char *) &tuple->t_ctid, + ((char *) &tuple->t_hoff - (char *) &tuple->t_ctid)); /*XXX*/ + newTuple->t_infomask = infomask; + newTuple->t_natts = numberOfAttributes; /* fix t_natts just in case */ + + /* ---------------- + * if we made a copy of the tuple, then free it. + * ---------------- + */ + if (madecopy) + pfree(tuple); + + return + newTuple; +} + +/* ---------------------------------------------------------------- + * other misc functions + * ---------------------------------------------------------------- + */ + +HeapTuple +heap_addheader(uint32 natts, /* max domain index */ + int structlen, /* its length */ + char *structure) /* pointer to the struct */ +{ + register char *tp; /* tuple data pointer */ + HeapTuple tup; + long len; + int hoff; + + AssertArg(natts > 0); + + len = sizeof (HeapTupleData) - sizeof (tup->t_bits); + + hoff = len = DOUBLEALIGN(len); /* be conservative */ + len += structlen; + tp = (char *) palloc(len); + tup = (HeapTuple) tp; + memset((char*)tup, 0, len); + + tup->t_len = (short) len; /* XXX */ + tp += tup->t_hoff = hoff; + tup->t_natts = natts; + tup->t_infomask = 0; + + memmove(tp, structure, structlen); + + return (tup); +} diff --git a/src/backend/access/common/heapvalid.c b/src/backend/access/common/heapvalid.c new file mode 100644 index 0000000000..b80c5dd9eb --- /dev/null +++ b/src/backend/access/common/heapvalid.c @@ -0,0 +1,134 @@ +/*------------------------------------------------------------------------- + * + * heapvalid.c-- + * heap tuple qualification validity checking code + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/common/Attic/heapvalid.c,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "access/htup.h" +#include "access/skey.h" +#include "access/heapam.h" +#include "utils/tqual.h" +#include "access/valid.h" /* where the declarations go */ +#include "access/xact.h" + +#include "storage/buf.h" +#include "storage/bufmgr.h" +#include "storage/bufpage.h" +#include "storage/itemid.h" +#include "fmgr.h" +#include "utils/elog.h" +#include "utils/rel.h" + +/* ---------------- + * heap_keytest + * + * Test a heap tuple with respect to a scan key. + * ---------------- + */ +bool +heap_keytest(HeapTuple t, + TupleDesc tupdesc, + int nkeys, + ScanKey keys) +{ + bool isnull; + Datum atp; + int test; + + for (; nkeys--; keys++) { + atp = (Datum)heap_getattr(t, InvalidBuffer, + keys->sk_attno, + tupdesc, + &isnull); + + if (isnull) + /* XXX eventually should check if SK_ISNULL */ + return false; + + if (keys->sk_flags & SK_COMMUTE) + test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure, + keys->sk_argument, atp); + else + test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure, + atp, keys->sk_argument); + + if (!test == !(keys->sk_flags & SK_NEGATE)) + return false; + } + + return true; +} + +/* ---------------- + * heap_tuple_satisfies + * + * Returns a valid HeapTuple if it satisfies the timequal and keytest. + * Returns NULL otherwise. Used to be heap_satisifies (sic) which + * returned a boolean. It now returns a tuple so that we can avoid doing two + * PageGetItem's per tuple. + * + * Complete check of validity including LP_CTUP and keytest. + * This should perhaps be combined with valid somehow in the + * future. (Also, additional rule tests/time range tests.) + * + * on 8/21/92 mao says: i rearranged the tests here to do keytest before + * SatisfiesTimeQual. profiling indicated that even for vacuumed relations, + * time qual checking was more expensive than key testing. time qual is + * least likely to fail, too. we should really add the time qual test to + * the restriction and optimize it in the normal way. this has interactions + * with joey's expensive function work. + * ---------------- + */ +HeapTuple +heap_tuple_satisfies(ItemId itemId, + Relation relation, + PageHeader disk_page, + TimeQual qual, + int nKeys, + ScanKey key) +{ + HeapTuple tuple; + bool res; + + if (! ItemIdIsUsed(itemId)) + return NULL; + + tuple = (HeapTuple) PageGetItem((Page) disk_page, itemId); + + if (key != NULL) + res = heap_keytest(tuple, RelationGetTupleDescriptor(relation), + nKeys, key); + else + res = TRUE; + + if (res && (relation->rd_rel->relkind == RELKIND_UNCATALOGED + || HeapTupleSatisfiesTimeQual(tuple,qual))) + return tuple; + + return (HeapTuple) NULL; +} + +/* + * TupleUpdatedByCurXactAndCmd() -- Returns true if this tuple has + * already been updated once by the current transaction/command + * pair. + */ +bool +TupleUpdatedByCurXactAndCmd(HeapTuple t) +{ + if (TransactionIdEquals(t->t_xmax, + GetCurrentTransactionId()) && + t->t_cmax == GetCurrentCommandId()) + return true; + + return false; +} diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c new file mode 100644 index 0000000000..be5d2ccbd9 --- /dev/null +++ b/src/backend/access/common/indextuple.c @@ -0,0 +1,427 @@ +/*------------------------------------------------------------------------- + * + * indextuple.c-- + * This file contains index tuple accessor and mutator routines, + * as well as a few various tuple utilities. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/common/indextuple.c,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include <string.h> + +#include "c.h" +#include "access/ibit.h" +#include "access/itup.h" /* where the declarations go */ +#include "access/heapam.h" +#include "access/genam.h" +#include "access/tupdesc.h" +#include "access/tupmacs.h" + +#include "storage/itemptr.h" +#include "utils/elog.h" +#include "utils/palloc.h" + +static Size IndexInfoFindDataOffset(unsigned short t_info); + +/* ---------------------------------------------------------------- + * index_ tuple interface routines + * ---------------------------------------------------------------- + */ + +/* ---------------- + * index_formtuple + * ---------------- + */ +IndexTuple +index_formtuple(TupleDesc tupleDescriptor, + Datum value[], + char null[]) +{ + register char *tp; /* tuple pointer */ + IndexTuple tuple; /* return tuple */ + Size size, hoff; + int i; + unsigned short infomask = 0; + bool hasnull = false; + char tupmask = 0; + int numberOfAttributes = tupleDescriptor->natts; + + if (numberOfAttributes > MaxIndexAttributeNumber) + elog(WARN, "index_formtuple: numberOfAttributes of %d > %d", + numberOfAttributes, MaxIndexAttributeNumber); + + + for (i = 0; i < numberOfAttributes && !hasnull; i++) { + if (null[i] != ' ') hasnull = true; + } + + if (hasnull) infomask |= INDEX_NULL_MASK; + + hoff = IndexInfoFindDataOffset(infomask); + size = hoff + + ComputeDataSize(tupleDescriptor, + value, null); + size = DOUBLEALIGN(size); /* be conservative */ + + tp = (char *) palloc(size); + tuple = (IndexTuple) tp; + memset(tp,0,(int)size); + + DataFill((char *)tp + hoff, + tupleDescriptor, + value, + null, + &tupmask, + (hasnull ? (bits8*)tp + sizeof(*tuple) : NULL)); + + /* + * We do this because DataFill wants to initialize a "tupmask" which + * is used for HeapTuples, but we want an indextuple infomask. The only + * "relevent" info is the "has variable attributes" field, which is in + * mask position 0x02. We have already set the null mask above. + */ + + if (tupmask & 0x02) infomask |= INDEX_VAR_MASK; + + /* + * Here we make sure that we can actually hold the size. We also want + * to make sure that size is not aligned oddly. This actually is a + * rather odd way to make sure the size is not too large overall. + */ + + if (size & 0xE000) + elog(WARN, "index_formtuple: data takes %d bytes: too big", size); + + + infomask |= size; + + /* ---------------- + * initialize metadata + * ---------------- + */ + tuple->t_info = infomask; + return (tuple); +} + +/* ---------------- + * fastgetiattr + * + * This is a newer version of fastgetiattr which attempts to be + * faster by caching attribute offsets in the attribute descriptor. + * + * an alternate way to speed things up would be to cache offsets + * with the tuple, but that seems more difficult unless you take + * the storage hit of actually putting those offsets into the + * tuple you send to disk. Yuck. + * + * This scheme will be slightly slower than that, but should + * preform well for queries which hit large #'s of tuples. After + * you cache the offsets once, examining all the other tuples using + * the same attribute descriptor will go much quicker. -cim 5/4/91 + * ---------------- + */ +char * +fastgetiattr(IndexTuple tup, + int attnum, + TupleDesc tupleDesc, + bool *isnull) +{ + register char *tp; /* ptr to att in tuple */ + register char *bp; /* ptr to att in tuple */ + int slow; /* do we have to walk nulls? */ + register int data_off; /* tuple data offset */ + + /* ---------------- + * sanity checks + * ---------------- + */ + + Assert(PointerIsValid(isnull)); + Assert(attnum > 0); + + /* ---------------- + * Three cases: + * + * 1: No nulls and no variable length attributes. + * 2: Has a null or a varlena AFTER att. + * 3: Has nulls or varlenas BEFORE att. + * ---------------- + */ + + *isnull = false; + data_off = IndexTupleHasMinHeader(tup) ? sizeof *tup : + IndexInfoFindDataOffset(tup->t_info); + + if (IndexTupleNoNulls(tup)) { + + /* first attribute is always at position zero */ + + if (attnum == 1) { + return(fetchatt(&(tupleDesc->attrs[0]), (char *) tup + data_off)); + } + attnum--; + + if (tupleDesc->attrs[attnum]->attcacheoff > 0) { + return(fetchatt(&(tupleDesc->attrs[attnum]), + (char *) tup + data_off + + tupleDesc->attrs[attnum]->attcacheoff)); + } + + tp = (char *) tup + data_off; + + slow = 0; + }else { /* there's a null somewhere in the tuple */ + + bp = (char *) tup + sizeof(*tup); /* "knows" t_bits are here! */ + slow = 0; + /* ---------------- + * check to see if desired att is null + * ---------------- + */ + + attnum--; + { + if (att_isnull(attnum, bp)) { + *isnull = true; + return NULL; + } + } + /* ---------------- + * Now check to see if any preceeding bits are null... + * ---------------- + */ + { + register int i = 0; /* current offset in bp */ + register int mask; /* bit in byte we're looking at */ + register char n; /* current byte in bp */ + register int byte, finalbit; + + byte = attnum >> 3; + finalbit = attnum & 0x07; + + for (; i <= byte; i++) { + n = bp[i]; + if (i < byte) { + /* check for nulls in any "earlier" bytes */ + if ((~n) != 0) { + slow++; + break; + } + } else { + /* check for nulls "before" final bit of last byte*/ + mask = (finalbit << 1) - 1; + if ((~n) & mask) + slow++; + } + } + } + tp = (char *) tup + data_off; + } + + /* now check for any non-fixed length attrs before our attribute */ + + if (!slow) { + if (tupleDesc->attrs[attnum]->attcacheoff > 0) { + return(fetchatt(&(tupleDesc->attrs[attnum]), + tp + tupleDesc->attrs[attnum]->attcacheoff)); + }else if (!IndexTupleAllFixed(tup)) { + register int j = 0; + + for (j = 0; j < attnum && !slow; j++) + if (tupleDesc->attrs[j]->attlen < 1) slow = 1; + } + } + + /* + * if slow is zero, and we got here, we know that we have a tuple with + * no nulls. We also know that we have to initialize the remainder of + * the attribute cached offset values. + */ + + if (!slow) { + register int j = 1; + register long off; + + /* + * need to set cache for some atts + */ + + tupleDesc->attrs[0]->attcacheoff = 0; + + while (tupleDesc->attrs[j]->attcacheoff > 0) j++; + + off = tupleDesc->attrs[j-1]->attcacheoff + + tupleDesc->attrs[j-1]->attlen; + + for (; j < attnum + 1; j++) { + /* + * Fix me when going to a machine with more than a four-byte + * word! + */ + + switch(tupleDesc->attrs[j]->attlen) + { + case -1: + off = (tupleDesc->attrs[j]->attalign=='d')? + DOUBLEALIGN(off):INTALIGN(off); + break; + case sizeof(char): + break; + case sizeof(short): + off = SHORTALIGN(off); + break; + case sizeof(int32): + off = INTALIGN(off); + break; + default: + if (tupleDesc->attrs[j]->attlen > sizeof(int32)) + off = (tupleDesc->attrs[j]->attalign=='d')? + DOUBLEALIGN(off) : LONGALIGN(off); + else + elog(WARN, "fastgetiattr: attribute %d has len %d", + j, tupleDesc->attrs[j]->attlen); + break; + + } + + tupleDesc->attrs[j]->attcacheoff = off; + off += tupleDesc->attrs[j]->attlen; + } + + return(fetchatt( &(tupleDesc->attrs[attnum]), + tp + tupleDesc->attrs[attnum]->attcacheoff)); + }else { + register bool usecache = true; + register int off = 0; + register int i; + + /* + * Now we know that we have to walk the tuple CAREFULLY. + */ + + for (i = 0; i < attnum; i++) { + if (!IndexTupleNoNulls(tup)) { + if (att_isnull(i, bp)) { + usecache = false; + continue; + } + } + + if (usecache && tupleDesc->attrs[i]->attcacheoff > 0) { + off = tupleDesc->attrs[i]->attcacheoff; + if (tupleDesc->attrs[i]->attlen == -1) + usecache = false; + else + continue; + } + + if (usecache) tupleDesc->attrs[i]->attcacheoff = off; + switch(tupleDesc->attrs[i]->attlen) + { + case sizeof(char): + off++; + break; + case sizeof(short): + off = SHORTALIGN(off) + sizeof(short); + break; + case -1: + usecache = false; + off = (tupleDesc->attrs[i]->attalign=='d')? + DOUBLEALIGN(off):INTALIGN(off); + off += VARSIZE(tp + off); + break; + default: + if (tupleDesc->attrs[i]->attlen > sizeof(int32)) + off = (tupleDesc->attrs[i]->attalign=='d') ? + DOUBLEALIGN(off) + tupleDesc->attrs[i]->attlen : + LONGALIGN(off) + tupleDesc->attrs[i]->attlen; + else + elog(WARN, "fastgetiattr2: attribute %d has len %d", + i, tupleDesc->attrs[i]->attlen); + + break; + } + } + + return(fetchatt(&tupleDesc->attrs[attnum], tp + off)); + } +} + +/* ---------------- + * index_getattr + * ---------------- + */ +Datum +index_getattr(IndexTuple tuple, + AttrNumber attNum, + TupleDesc tupDesc, + bool *isNullOutP) +{ + Assert (attNum > 0); + + return (Datum) + fastgetiattr(tuple, attNum, tupDesc, isNullOutP); +} + +RetrieveIndexResult +FormRetrieveIndexResult(ItemPointer indexItemPointer, + ItemPointer heapItemPointer) +{ + RetrieveIndexResult result; + + Assert(ItemPointerIsValid(indexItemPointer)); + Assert(ItemPointerIsValid(heapItemPointer)); + + result = (RetrieveIndexResult) palloc(sizeof *result); + + result->index_iptr = *indexItemPointer; + result->heap_iptr = *heapItemPointer; + + return (result); +} + +/* + * Takes an infomask as argument (primarily because this needs to be usable + * at index_formtuple time so enough space is allocated). + * + * Change me if adding an attribute to IndexTuples!!!!!!!!!!! + */ +static Size +IndexInfoFindDataOffset(unsigned short t_info) +{ + if (!(t_info & INDEX_NULL_MASK)) + return((Size) sizeof(IndexTupleData)); + else { + Size size = sizeof(IndexTupleData); + + if (t_info & INDEX_NULL_MASK) { + size += sizeof(IndexAttributeBitMapData); + } + return DOUBLEALIGN(size); /* be conservative */ + } +} + +/* + * Copies source into target. If *target == NULL, we palloc space; otherwise + * we assume we have space that is already palloc'ed. + */ +void +CopyIndexTuple(IndexTuple source, IndexTuple *target) +{ + Size size; + IndexTuple ret; + + size = IndexTupleSize(source); + if (*target == NULL) { + *target = (IndexTuple) palloc(size); + } + + ret = *target; + memmove((char*)ret, (char*)source, size); +} + diff --git a/src/backend/access/common/indexvalid.c b/src/backend/access/common/indexvalid.c new file mode 100644 index 0000000000..b437718cec --- /dev/null +++ b/src/backend/access/common/indexvalid.c @@ -0,0 +1,84 @@ +/*------------------------------------------------------------------------- + * + * indexvalid.c-- + * index tuple qualification validity checking code + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/common/Attic/indexvalid.c,v 1.1.1.1 1996/07/09 06:21:09 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "executor/execdebug.h" +#include "access/genam.h" +#include "access/iqual.h" /* where the declarations go */ +#include "access/itup.h" +#include "access/skey.h" + +#include "storage/buf.h" +#include "storage/bufpage.h" +#include "storage/itemid.h" +#include "utils/rel.h" + +/* ---------------------------------------------------------------- + * index scan key qualification code + * ---------------------------------------------------------------- + */ +int NIndexTupleProcessed; + +/* ---------------- + * index_keytest + * + * old comments + * May eventually combine with other tests (like timeranges)? + * Should have Buffer buffer; as an argument and pass it to amgetattr. + * ---------------- + */ +bool +index_keytest(IndexTuple tuple, + TupleDesc tupdesc, + int scanKeySize, + ScanKey key) +{ + bool isNull; + Datum datum; + int test; + + IncrIndexProcessed(); + + while (scanKeySize > 0) { + datum = index_getattr(tuple, + 1, + tupdesc, + &isNull); + + if (isNull) { + /* XXX eventually should check if SK_ISNULL */ + return (false); + } + + if (key[0].sk_flags & SK_COMMUTE) { + test = (int) (*(key[0].sk_func)) + (DatumGetPointer(key[0].sk_argument), + datum); + } else { + test = (int) (*(key[0].sk_func)) + (datum, + DatumGetPointer(key[0].sk_argument)); + } + + if (!test == !(key[0].sk_flags & SK_NEGATE)) { + return (false); + } + + scanKeySize -= 1; + key++; + } + + return (true); +} + diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c new file mode 100644 index 0000000000..556b73b9df --- /dev/null +++ b/src/backend/access/common/printtup.c @@ -0,0 +1,306 @@ +/*------------------------------------------------------------------------- + * + * printtup.c-- + * Routines to print out tuples to the destination (binary or non-binary + * portals, frontend/interactive backend, etc.). + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/common/printtup.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include <sys/file.h> +#include <stdio.h> +#include <string.h> + +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup.h" +#include "access/skey.h" +#include "access/printtup.h" +#include "access/tupdesc.h" +#include "storage/buf.h" +#include "utils/memutils.h" +#include "utils/palloc.h" +#include "fmgr.h" +#include "utils/elog.h" + +#include "utils/syscache.h" +#include "catalog/pg_type.h" + +#include "libpq/libpq.h" + +/* ---------------------------------------------------------------- + * printtup / debugtup support + * ---------------------------------------------------------------- + */ + +/* ---------------- + * typtoout - used by printtup and debugtup + * ---------------- + */ +Oid +typtoout(Oid type) +{ + HeapTuple typeTuple; + + typeTuple = SearchSysCacheTuple(TYPOID, + ObjectIdGetDatum(type), + 0, 0, 0); + + if (HeapTupleIsValid(typeTuple)) + return((Oid) + ((TypeTupleForm) GETSTRUCT(typeTuple))->typoutput); + + elog(WARN, "typtoout: Cache lookup of type %d failed", type); + return(InvalidOid); +} + +Oid +gettypelem(Oid type) +{ + HeapTuple typeTuple; + + typeTuple = SearchSysCacheTuple(TYPOID, + ObjectIdGetDatum(type), + 0,0,0); + + if (HeapTupleIsValid(typeTuple)) + return((Oid) + ((TypeTupleForm) GETSTRUCT(typeTuple))->typelem); + + elog(WARN, "typtoout: Cache lookup of type %d failed", type); + return(InvalidOid); +} + +/* ---------------- + * printtup + * ---------------- + */ +void +printtup(HeapTuple tuple, TupleDesc typeinfo) +{ + int i, j, k; + char *outputstr, *attr; + bool isnull; + Oid typoutput; + + /* ---------------- + * tell the frontend to expect new tuple data + * ---------------- + */ + pq_putnchar("D", 1); + + /* ---------------- + * send a bitmap of which attributes are null + * ---------------- + */ + j = 0; + k = 1 << 7; + for (i = 0; i < tuple->t_natts; ) { + attr = heap_getattr(tuple, InvalidBuffer, ++i, typeinfo, &isnull); + if (!isnull) + j |= k; + k >>= 1; + if (!(i & 7)) { + pq_putint(j, 1); + j = 0; + k = 1 << 7; + } + } + if (i & 7) + pq_putint(j, 1); + + /* ---------------- + * send the attributes of this tuple + * ---------------- + */ + for (i = 0; i < tuple->t_natts; ++i) { + attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull); + typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid); + + if (!isnull && OidIsValid(typoutput)) { + outputstr = fmgr(typoutput, attr, + gettypelem(typeinfo->attrs[i]->atttypid)); + pq_putint(strlen(outputstr)+4, 4); + pq_putnchar(outputstr, strlen(outputstr)); + pfree(outputstr); + } + } +} + +/* ---------------- + * printatt + * ---------------- + */ +static void +printatt(unsigned attributeId, + AttributeTupleForm attributeP, + char *value) +{ + printf("\t%2d: %.*s%s%s%s\t(typeid = %u, len = %d, byval = %c)\n", + attributeId, + NAMEDATALEN, /* attname is a char16 */ + attributeP->attname.data, + value != NULL ? " = \"" : "", + value != NULL ? value : "", + value != NULL ? "\"" : "", + (unsigned int) (attributeP->atttypid), + attributeP->attlen, + attributeP->attbyval ? 't' : 'f'); +} + +/* ---------------- + * showatts + * ---------------- + */ +void +showatts(char *name, TupleDesc tupleDesc) +{ + int i; + int natts = tupleDesc->natts; + AttributeTupleForm *attinfo = tupleDesc->attrs; + + puts(name); + for (i = 0; i < natts; ++i) + printatt((unsigned) i+1, attinfo[i], (char *) NULL); + printf("\t----\n"); +} + +/* ---------------- + * debugtup + * ---------------- + */ +void +debugtup(HeapTuple tuple, TupleDesc typeinfo) +{ + register int i; + char *attr, *value; + bool isnull; + Oid typoutput; + + for (i = 0; i < tuple->t_natts; ++i) { + attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull); + typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid); + + if (!isnull && OidIsValid(typoutput)) { + value = fmgr(typoutput, attr, + gettypelem(typeinfo->attrs[i]->atttypid)); + printatt((unsigned) i+1, typeinfo->attrs[i], value); + pfree(value); + } + } + printf("\t----\n"); +} + +/*#define IPORTAL_DEBUG*/ + +/* ---------------- + * printtup_internal + * Protocol expects either T, D, C, E, or N. + * We use a different data prefix, e.g. 'B' instead of 'D' to + * indicate a tuple in internal (binary) form. + * + * This is same as printtup, except we don't use the typout func. + * ---------------- + */ +void +printtup_internal(HeapTuple tuple, TupleDesc typeinfo) +{ + int i, j, k; + char *attr; + bool isnull; + + /* ---------------- + * tell the frontend to expect new tuple data + * ---------------- + */ + pq_putnchar("B", 1); + + /* ---------------- + * send a bitmap of which attributes are null + * ---------------- + */ + j = 0; + k = 1 << 7; + for (i = 0; i < tuple->t_natts; ) { + attr = heap_getattr(tuple, InvalidBuffer, ++i, typeinfo, &isnull); + if (!isnull) + j |= k; + k >>= 1; + if (!(i & 7)) { + pq_putint(j, 1); + j = 0; + k = 1 << 7; + } + } + if (i & 7) + pq_putint(j, 1); + + /* ---------------- + * send the attributes of this tuple + * ---------------- + */ +#ifdef IPORTAL_DEBUG + fprintf(stderr, "sending tuple with %d atts\n", tuple->t_natts); +#endif + for (i = 0; i < tuple->t_natts; ++i) { + int32 len = typeinfo->attrs[i]->attlen; + + attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull); + if (!isnull) { + /* # of bytes, and opaque data */ + if (len == -1) { + /* variable length, assume a varlena structure */ + len = VARSIZE(attr) - VARHDRSZ; + + pq_putint(len, sizeof(int32)); + pq_putnchar(VARDATA(attr), len); +#ifdef IPORTAL_DEBUG + { + char *d = VARDATA(attr); + + fprintf(stderr, "length %d data %x%x%x%x\n", + len, *d, *(d+1), *(d+2), *(d+3)); + } +#endif + } else { + /* fixed size */ + if (typeinfo->attrs[i]->attbyval) { + int8 i8; + int16 i16; + int32 i32; + + pq_putint(len, sizeof(int32)); + switch (len) { + case sizeof(int8): + i8 = DatumGetChar(attr); + pq_putnchar((char *) &i8, len); + break; + case sizeof(int16): + i16 = DatumGetInt16(attr); + pq_putnchar((char *) &i16, len); + break; + case sizeof(int32): + i32 = DatumGetInt32(attr); + pq_putnchar((char *) &i32, len); + break; + } +#ifdef IPORTAL_DEBUG + fprintf(stderr, "byval length %d data %d\n", len, attr); +#endif + } else { + pq_putint(len, sizeof(int32)); + pq_putnchar(attr, len); +#ifdef IPORTAL_DEBUG + fprintf(stderr, "byref length %d data %x\n", len, attr); +#endif + } + } + } + } +} diff --git a/src/backend/access/common/scankey.c b/src/backend/access/common/scankey.c new file mode 100644 index 0000000000..7a47219a73 --- /dev/null +++ b/src/backend/access/common/scankey.c @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * scan.c-- + * scan direction and key code + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/common/scankey.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "c.h" +#include "access/sdir.h" +#include "access/attnum.h" +#include "access/skey.h" + +#include "fmgr.h" + +/* + * ScanKeyEntryIsLegal -- + * True iff the scan key entry is legal. + */ +#define ScanKeyEntryIsLegal(entry) \ + ((bool) (AssertMacro(PointerIsValid(entry)) && \ + AttributeNumberIsValid(entry->sk_attno))) + +/* + * ScanKeyEntrySetIllegal -- + * Marks a scan key entry as illegal. + */ +void +ScanKeyEntrySetIllegal(ScanKey entry) +{ + + Assert(PointerIsValid(entry)); + + entry->sk_flags = 0; /* just in case... */ + entry->sk_attno = InvalidAttrNumber; + entry->sk_procedure = 0; /* should be InvalidRegProcedure */ +} + +/* + * ScanKeyEntryInitialize -- + * Initializes an scan key entry. + * + * Note: + * Assumes the scan key entry is valid. + * Assumes the intialized scan key entry will be legal. + */ +void +ScanKeyEntryInitialize(ScanKey entry, + bits16 flags, + AttrNumber attributeNumber, + RegProcedure procedure, + Datum argument) +{ + Assert(PointerIsValid(entry)); + + entry->sk_flags = flags; + entry->sk_attno = attributeNumber; + entry->sk_procedure = procedure; + entry->sk_argument = argument; + fmgr_info(procedure, &entry->sk_func, &entry->sk_nargs); + + Assert(ScanKeyEntryIsLegal(entry)); +} diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c new file mode 100644 index 0000000000..527eb5113d --- /dev/null +++ b/src/backend/access/common/tupdesc.c @@ -0,0 +1,398 @@ +/*------------------------------------------------------------------------- + * + * tupdesc.c-- + * POSTGRES tuple descriptor support code + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ + * + * NOTES + * some of the executor utility code such as "ExecTypeFromTL" should be + * moved here. + * + *------------------------------------------------------------------------- + */ +#include <stdio.h> /* for sprintf() */ +#include <ctype.h> +#include <string.h> + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/parsenodes.h" + +#include "access/attnum.h" +#include "access/htup.h" +#include "access/tupdesc.h" + +#include "utils/builtins.h" +#include "utils/elog.h" /* XXX generate exceptions instead */ +#include "utils/palloc.h" + +#include "utils/syscache.h" +#include "catalog/pg_type.h" + +#include "nodes/primnodes.h" + +#include "parser/catalog_utils.h" + +/* ---------------------------------------------------------------- + * CreateTemplateTupleDesc + * + * This function allocates and zeros a tuple descriptor structure. + * ---------------------------------------------------------------- + */ +TupleDesc +CreateTemplateTupleDesc(int natts) +{ + uint32 size; + TupleDesc desc; + + /* ---------------- + * sanity checks + * ---------------- + */ + AssertArg(natts >= 1); + + /* ---------------- + * allocate enough memory for the tuple descriptor and + * zero it as TupleDescInitEntry assumes that the descriptor + * is filled with NULL pointers. + * ---------------- + */ + size = natts * sizeof (AttributeTupleForm); + desc = (TupleDesc) palloc(sizeof(struct tupleDesc)); + desc->attrs = (AttributeTupleForm*) palloc(size); + memset(desc->attrs, 0, size); + + desc->natts = natts; + + return (desc); +} + +/* ---------------------------------------------------------------- + * CreateTupleDesc + * + * This function allocates a new TupleDesc from AttributeTupleForm array + * ---------------------------------------------------------------- + */ +TupleDesc +CreateTupleDesc(int natts, AttributeTupleForm* attrs) +{ + TupleDesc desc; + + /* ---------------- + * sanity checks + * ---------------- + */ + AssertArg(natts >= 1); + + desc = (TupleDesc) palloc(sizeof(struct tupleDesc)); + desc->attrs = attrs; + desc->natts = natts; + + + return (desc); +} + +/* ---------------------------------------------------------------- + * CreateTupleDescCopy + * + * This function creates a new TupleDesc by copying from an existing + * TupleDesc + * + * ---------------------------------------------------------------- + */ +TupleDesc +CreateTupleDescCopy(TupleDesc tupdesc) +{ + TupleDesc desc; + int i, size; + + desc = (TupleDesc) palloc(sizeof(struct tupleDesc)); + desc->natts = tupdesc->natts; + size = desc->natts * sizeof (AttributeTupleForm); + desc->attrs = (AttributeTupleForm*) palloc(size); + for (i=0;i<desc->natts;i++) { + desc->attrs[i] = + (AttributeTupleForm)palloc(ATTRIBUTE_TUPLE_SIZE); + memmove(desc->attrs[i], + tupdesc->attrs[i], + ATTRIBUTE_TUPLE_SIZE); + } + return desc; +} + +/* ---------------------------------------------------------------- + * TupleDescInitEntry + * + * This function initializes a single attribute structure in + * a preallocated tuple descriptor. + * ---------------------------------------------------------------- + */ +bool +TupleDescInitEntry(TupleDesc desc, + AttrNumber attributeNumber, + char *attributeName, + char *typeName, + int attdim, + bool attisset) +{ + HeapTuple tuple; + TypeTupleForm typeForm; + AttributeTupleForm att; + + /* ---------------- + * sanity checks + * ---------------- + */ + AssertArg(PointerIsValid(desc)); + AssertArg(attributeNumber >= 1); + /* attributeName's are sometimes NULL, + from resdom's. I don't know why that is, though -- Jolly */ +/* AssertArg(NameIsValid(attributeName));*/ +/* AssertArg(NameIsValid(typeName));*/ + + AssertArg(!PointerIsValid(desc->attrs[attributeNumber - 1])); + + + /* ---------------- + * allocate storage for this attribute + * ---------------- + */ + + att = (AttributeTupleForm) palloc(ATTRIBUTE_TUPLE_SIZE); + desc->attrs[attributeNumber - 1] = att; + + /* ---------------- + * initialize some of the attribute fields + * ---------------- + */ + att->attrelid = 0; /* dummy value */ + + if (attributeName != NULL) + namestrcpy(&(att->attname), attributeName); + else + memset(att->attname.data,0,NAMEDATALEN); + + + att->attdefrel = 0; /* dummy value */ + att->attnvals = 0; /* dummy value */ + att->atttyparg = 0; /* dummy value */ + att->attbound = 0; /* dummy value */ + att->attcanindex = 0; /* dummy value */ + att->attproc = 0; /* dummy value */ + att->attcacheoff = -1; + + att->attnum = attributeNumber; + att->attnelems = attdim; + att->attisset = attisset; + + /* ---------------- + * search the system cache for the type tuple of the attribute + * we are creating so that we can get the typeid and some other + * stuff. + * + * Note: in the special case of + * + * create EMP (name = char16, manager = EMP) + * + * RelationNameCreateHeapRelation() calls BuildDesc() which + * calls this routine and since EMP does not exist yet, the + * system cache lookup below fails. That's fine, but rather + * then doing a elog(WARN) we just leave that information + * uninitialized, return false, then fix things up later. + * -cim 6/14/90 + * ---------------- + */ + tuple = SearchSysCacheTuple(TYPNAME, PointerGetDatum(typeName), + 0,0,0); + if (! HeapTupleIsValid(tuple)) { + /* ---------------- + * here type info does not exist yet so we just fill + * the attribute with dummy information and return false. + * ---------------- + */ + att->atttypid = InvalidOid; + att->attlen = (int16) 0; + att->attbyval = (bool) 0; + att->attalign = 'i'; + return false; + } + + /* ---------------- + * type info exists so we initialize our attribute + * information from the type tuple we found.. + * ---------------- + */ + typeForm = (TypeTupleForm) GETSTRUCT(tuple); + + att->atttypid = tuple->t_oid; + att->attalign = typeForm->typalign; + + /* ------------------------ + If this attribute is a set, what is really stored in the + attribute is the OID of a tuple in the pg_proc catalog. + The pg_proc tuple contains the query string which defines + this set - i.e., the query to run to get the set. + So the atttypid (just assigned above) refers to the type returned + by this query, but the actual length of this attribute is the + length (size) of an OID. + + Why not just make the atttypid point to the OID type, instead + of the type the query returns? Because the executor uses the atttypid + to tell the front end what type will be returned (in BeginCommand), + and in the end the type returned will be the result of the query, not + an OID. + + Why not wait until the return type of the set is known (i.e., the + recursive call to the executor to execute the set has returned) + before telling the front end what the return type will be? Because + the executor is a delicate thing, and making sure that the correct + order of front-end commands is maintained is messy, especially + considering that target lists may change as inherited attributes + are considered, etc. Ugh. + ----------------------------------------- + */ + if (attisset) { + Type t = type("oid"); + att->attlen = tlen(t); + att->attbyval = tbyval(t); + } else { + att->attlen = typeForm->typlen; + att->attbyval = typeForm->typbyval; + } + + + return true; +} + + +/* ---------------------------------------------------------------- + * TupleDescMakeSelfReference + * + * This function initializes a "self-referential" attribute like + * manager in "create EMP (name=text, manager = EMP)". + * It calls TypeShellMake() which inserts a "shell" type + * tuple into pg_type. A self-reference is one kind of set, so + * its size and byval are the same as for a set. See the comments + * above in TupleDescInitEntry. + * ---------------------------------------------------------------- + */ +static void +TupleDescMakeSelfReference(TupleDesc desc, + AttrNumber attnum, + char *relname) +{ + AttributeTupleForm att; + Type t = type("oid"); + + att = desc->attrs[attnum-1]; + att->atttypid = TypeShellMake(relname); + att->attlen = tlen(t); + att->attbyval = tbyval(t); + att->attnelems = 0; +} + +/* ---------------------------------------------------------------- + * BuildDescForRelation + * + * This is a general purpose function identical to BuildDesc + * but is used by the DefineRelation() code to catch the + * special case where you + * + * create FOO ( ..., x = FOO ) + * + * here, the initial type lookup for "x = FOO" will fail + * because FOO isn't in the catalogs yet. But since we + * are creating FOO, instead of doing an elog() we add + * a shell type tuple to pg_type and fix things later + * in amcreate(). + * ---------------------------------------------------------------- + */ +TupleDesc +BuildDescForRelation(List *schema, char *relname) +{ + int natts; + AttrNumber attnum; + List *p; + TupleDesc desc; + char *attname; + char *typename; + int attdim; + bool attisset; + + /* ---------------- + * allocate a new tuple descriptor + * ---------------- + */ + natts = length(schema); + desc = CreateTemplateTupleDesc(natts); + + attnum = 0; + + typename = palloc(NAMEDATALEN+1); + + foreach(p, schema) { + ColumnDef *entry; + List *arry; + + /* ---------------- + * for each entry in the list, get the name and type + * information from the list and have TupleDescInitEntry + * fill in the attribute information we need. + * ---------------- + */ + attnum++; + + entry = lfirst(p); + attname = entry->colname; + arry = entry->typename->arrayBounds; + attisset = entry->typename->setof; + + if (arry != NIL) { + char buf[20]; + + attdim = length(arry); + + /* array of XXX is _XXX (inherited from release 3) */ + sprintf(buf, "_%.*s", NAMEDATALEN, entry->typename->name); + strcpy(typename, buf); + } else { + strcpy(typename, entry->typename->name); + attdim = 0; + } + + if (! TupleDescInitEntry(desc, attnum, attname, + typename, attdim, attisset)) { + /* ---------------- + * if TupleDescInitEntry() fails, it means there is + * no type in the system catalogs. So now we check if + * the type name equals the relation name. If so we + * have a self reference, otherwise it's an error. + * ---------------- + */ + if (!strcmp(typename, relname)) { + TupleDescMakeSelfReference(desc, attnum, relname); + } else + elog(WARN, "DefineRelation: no such type %.*s", + NAMEDATALEN, typename); + } + + /* + * this is for char() and varchar(). When an entry is of type + * char() or varchar(), typlen is set to the appropriate length, + * which we'll use here instead. (The catalog lookup only returns + * the length of bpchar and varchar which is not what we want!) + * - ay 6/95 + */ + if (entry->typename->typlen > 0) { + desc->attrs[attnum - 1]->attlen = entry->typename->typlen; + } + } + return desc; +} + diff --git a/src/backend/access/funcindex.h b/src/backend/access/funcindex.h new file mode 100644 index 0000000000..4689df19c0 --- /dev/null +++ b/src/backend/access/funcindex.h @@ -0,0 +1,43 @@ +/*------------------------------------------------------------------------- + * + * funcindex.h-- + * + * + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: funcindex.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef _FUNC_INDEX_INCLUDED_ +#define _FUNC_INDEX_INCLUDED_ + +#include "postgres.h" + +typedef struct { + int nargs; + Oid arglist[8]; + Oid procOid; + NameData funcName; +} FuncIndexInfo; + +typedef FuncIndexInfo *FuncIndexInfoPtr; + +/* + * some marginally useful macro definitions + */ +/* #define FIgetname(FINFO) (&((FINFO)->funcName.data[0]))*/ +#define FIgetname(FINFO) (FINFO)->funcName.data +#define FIgetnArgs(FINFO) (FINFO)->nargs +#define FIgetProcOid(FINFO) (FINFO)->procOid +#define FIgetArg(FINFO, argnum) (FINFO)->arglist[argnum] +#define FIgetArglist(FINFO) (FINFO)->arglist + +#define FIsetnArgs(FINFO, numargs) ((FINFO)->nargs = numargs) +#define FIsetProcOid(FINFO, id) ((FINFO)->procOid = id) +#define FIsetArg(FINFO, argnum, argtype) ((FINFO)->arglist[argnum] = argtype) + +#define FIisFunctionalIndex(FINFO) (FINFO->procOid != InvalidOid) + +#endif /* FUNCINDEX_H */ diff --git a/src/backend/access/genam.h b/src/backend/access/genam.h new file mode 100644 index 0000000000..b2544650de --- /dev/null +++ b/src/backend/access/genam.h @@ -0,0 +1,60 @@ +/*------------------------------------------------------------------------- + * + * genam.h-- + * POSTGRES general access method definitions. + * + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: genam.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef GENAM_H +#define GENAM_H + +#include "postgres.h" + +#include "access/attnum.h" +#include "access/htup.h" +#include "access/istrat.h" +#include "access/itup.h" +#include "access/relscan.h" +#include "access/skey.h" +#include "access/sdir.h" +#include "access/funcindex.h" + +/* ---------------- + * generalized index_ interface routines + * ---------------- + */ +extern Relation index_open(Oid relationId); +extern Relation index_openr(char *relationName); +extern void index_close(Relation relation); +extern InsertIndexResult index_insert(Relation relation, + IndexTuple indexTuple); +extern void index_delete(Relation relation, ItemPointer indexItem); +extern IndexScanDesc index_beginscan(Relation relation, bool scanFromEnd, + uint16 numberOfKeys, ScanKey key); +extern void index_rescan(IndexScanDesc scan, bool scanFromEnd, ScanKey key); +extern void index_endscan(IndexScanDesc scan); +extern void index_markpos(IndexScanDesc scan); +extern void index_restrpos(IndexScanDesc scan); +extern RetrieveIndexResult index_getnext(IndexScanDesc scan, + ScanDirection direction); +extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum, + uint16 procnum); +extern Datum GetIndexValue(HeapTuple tuple, TupleDesc hTupDesc, + int attOff, AttrNumber attrNums[], FuncIndexInfo *fInfo, + bool *attNull, Buffer buffer); + +/* in genam.c */ +extern IndexScanDesc RelationGetIndexScan(Relation relation, bool scanFromEnd, + uint16 numberOfKeys, ScanKey key); +extern void IndexScanRestart(IndexScanDesc scan, bool scanFromEnd, + ScanKey key); +extern void IndexScanEnd(IndexScanDesc scan); +extern void IndexScanMarkPosition(IndexScanDesc scan); +extern void IndexScanRestorePosition(IndexScanDesc scan); + +#endif /* GENAM_H */ diff --git a/src/backend/access/hash.h b/src/backend/access/hash.h new file mode 100644 index 0000000000..21407696b4 --- /dev/null +++ b/src/backend/access/hash.h @@ -0,0 +1,336 @@ +/*------------------------------------------------------------------------- + * + * hash.h-- + * header file for postgres hash access method implementation + * + * + * Copyright (c) 1994, Regents of the University of California + * + * $Id: hash.h,v 1.1.1.1 1996/07/09 06:21:08 scrappy Exp $ + * + * NOTES + * modeled after Margo Seltzer's hash implementation for unix. + * + *------------------------------------------------------------------------- + */ +#ifndef HASH_H +#define HASH_H + +#include "access/itup.h" + +/* + * An overflow page is a spare page allocated for storing data whose + * bucket doesn't have room to store it. We use overflow pages rather + * than just splitting the bucket because there is a linear order in + * the way we split buckets. In other words, if there isn't enough space + * in the bucket itself, put it in an overflow page. + * + * Overflow page addresses are stored in form: (Splitnumber, Page offset). + * + * A splitnumber is the number of the generation where the table doubles + * in size. The ovflpage's offset within the splitnumber; offsets start + * at 1. + * + * We convert the stored bitmap address into a page address with the + * macro OADDR_OF(S, O) where S is the splitnumber and O is the page + * offset. + */ +typedef uint32 Bucket; +typedef bits16 OverflowPageAddress; +typedef uint32 SplitNumber; +typedef uint32 PageOffset; + +/* A valid overflow address will always have a page offset >= 1 */ +#define InvalidOvflAddress 0 + +#define SPLITSHIFT 11 +#define SPLITMASK 0x7FF +#define SPLITNUM(N) ((SplitNumber)(((uint32)(N)) >> SPLITSHIFT)) +#define OPAGENUM(N) ((PageOffset)((N) & SPLITMASK)) +#define OADDR_OF(S,O) ((OverflowPageAddress)((uint32)((uint32)(S) << SPLITSHIFT) + (O))) + +#define BUCKET_TO_BLKNO(B) \ + ((Bucket) ((B) + ((B) ? metap->SPARES[_hash_log2((B)+1)-1] : 0)) + 1) +#define OADDR_TO_BLKNO(B) \ + ((BlockNumber) \ + (BUCKET_TO_BLKNO ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B)))); + +/* + * hasho_flag tells us which type of page we're looking at. For + * example, knowing overflow pages from bucket pages is necessary + * information when you're deleting tuples from a page. If all the + * tuples are deleted from an overflow page, the overflow is made + * available to other buckets by calling _hash_freeovflpage(). If all + * the tuples are deleted from a bucket page, no additional action is + * necessary. + */ + +#define LH_UNUSED_PAGE (0) +#define LH_OVERFLOW_PAGE (1 << 0) +#define LH_BUCKET_PAGE (1 << 1) +#define LH_BITMAP_PAGE (1 << 2) +#define LH_META_PAGE (1 << 3) + +typedef struct HashPageOpaqueData { + bits16 hasho_flag; /* is this page a bucket or ovfl */ + Bucket hasho_bucket; /* bucket number this pg belongs to */ + OverflowPageAddress hasho_oaddr; /* ovfl address of this ovfl pg */ + BlockNumber hasho_nextblkno; /* next ovfl blkno */ + BlockNumber hasho_prevblkno; /* previous ovfl (or bucket) blkno */ +} HashPageOpaqueData; + +typedef HashPageOpaqueData *HashPageOpaque; + +/* + * ScanOpaqueData is used to remember which buffers we're currently + * examining in the scan. We keep these buffers locked and pinned and + * recorded in the opaque entry of the scan in order to avoid doing a + * ReadBuffer() for every tuple in the index. This avoids semop() calls, + * which are expensive. + */ + +typedef struct HashScanOpaqueData { + Buffer hashso_curbuf; + Buffer hashso_mrkbuf; +} HashScanOpaqueData; + +typedef HashScanOpaqueData *HashScanOpaque; + +/* + * Definitions for metapage. + */ + +#define HASH_METAPAGE 0 /* metapage is always block 0 */ + +#define HASH_MAGIC 0x6440640 +#define HASH_VERSION 0 + +/* + * NCACHED is used to set the array sizeof spares[] & bitmaps[]. + * + * Spares[] is used to hold the number overflow pages currently + * allocated at a certain splitpoint. For example, if spares[3] = 7 + * then there are a maximum of 7 ovflpages available at splitpoint 3. + * The value in spares[] will change as ovflpages are added within + * a splitpoint. + * + * Within a splitpoint, one can find which ovflpages are available and + * which are used by looking at a bitmaps that are stored on the ovfl + * pages themselves. There is at least one bitmap for every splitpoint's + * ovflpages. Bitmaps[] contains the ovflpage addresses of the ovflpages + * that hold the ovflpage bitmaps. + * + * The reason that the size is restricted to NCACHED (32) is because + * the bitmaps are 16 bits: upper 5 represent the splitpoint, lower 11 + * indicate the page number within the splitpoint. Since there are + * only 5 bits to store the splitpoint, there can only be 32 splitpoints. + * Both spares[] and bitmaps[] use splitpoints as there indices, so there + * can only be 32 of them. + */ + +#define NCACHED 32 + + +typedef struct HashMetaPageData { + PageHeaderData hashm_phdr; /* pad for page header + (do not use) */ + uint32 hashm_magic; /* magic no. for hash tables */ + uint32 hashm_version; /* version ID */ + uint32 hashm_nkeys; /* number of keys stored in + the table */ + uint16 hashm_ffactor; /* fill factor */ + uint16 hashm_bsize; /* bucket size (bytes) - + must be a power of 2 */ + uint16 hashm_bshift; /* bucket shift */ + uint16 hashm_bmsize; /* bitmap array size (bytes) - + must be a power of 2 */ + uint32 hashm_maxbucket; /* ID of maximum bucket + in use */ + uint32 hashm_highmask; /* mask to modulo into + entire table */ + uint32 hashm_lowmask; /* mask to modulo into lower + half of table */ + uint32 hashm_ovflpoint; /* pageno. from which ovflpgs + being allocated */ + uint32 hashm_lastfreed; /* last ovflpage freed */ + uint32 hashm_nmaps; /* Initial number of bitmaps */ + uint32 hashm_spares[NCACHED]; /* spare pages available at + splitpoints */ + BlockNumber hashm_mapp[NCACHED]; /* blknumbers of ovfl page + maps */ + RegProcedure hashm_procid; /* hash procedure id from + pg_proc */ +} HashMetaPageData; + +typedef HashMetaPageData *HashMetaPage; + +/* Short hands for accessing structure */ +#define BSHIFT hashm_bshift +#define OVFL_POINT hashm_ovflpoint +#define LAST_FREED hashm_lastfreed +#define MAX_BUCKET hashm_maxbucket +#define FFACTOR hashm_ffactor +#define HIGH_MASK hashm_highmask +#define LOW_MASK hashm_lowmask +#define NKEYS hashm_nkeys +#define SPARES hashm_spares + +extern bool BuildingHash; + +typedef struct HashItemData { + IndexTupleData hash_itup; +} HashItemData; + +typedef HashItemData *HashItem; + +/* + * Constants + */ +#define DEFAULT_FFACTOR 300 +#define SPLITMAX 8 +#define BYTE_TO_BIT 3 /* 2^3 bits/byte */ +#define INT_TO_BYTE 2 /* 2^2 bytes/int */ +#define INT_TO_BIT 5 /* 2^5 bits/int */ +#define ALL_SET ((uint32) ~0) + +/* + * bitmap pages do not contain tuples. they do contain the standard + * page headers and trailers; however, everything in between is a + * giant bit array. the number of bits that fit on a page obviously + * depends on the page size and the header/trailer overhead. + */ +#define BMPGSZ_BYTE(metap) ((metap)->hashm_bmsize) +#define BMPGSZ_BIT(metap) ((metap)->hashm_bmsize << BYTE_TO_BIT) +#define HashPageGetBitmap(pg) \ + ((uint32 *) (((char *) (pg)) + DOUBLEALIGN(sizeof(PageHeaderData)))) + +/* + * The number of bits in an ovflpage bitmap which + * tells which ovflpages are empty versus in use (NOT the number of + * bits in an overflow page *address* bitmap). + */ +#define BITS_PER_MAP 32 /* Number of bits in ovflpage bitmap */ + +/* Given the address of the beginning of a big map, clear/set the nth bit */ +#define CLRBIT(A, N) ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP))) +#define SETBIT(A, N) ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP))) +#define ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP))) + +/* + * page locking modes + */ +#define HASH_READ 0 +#define HASH_WRITE 1 + +/* + * In general, the hash code tries to localize its knowledge about page + * layout to a couple of routines. However, we need a special value to + * indicate "no page number" in those places where we expect page numbers. + */ + +#define P_NONE 0 + +/* + * Strategy number. There's only one valid strategy for hashing: equality. + */ + +#define HTEqualStrategyNumber 1 +#define HTMaxStrategyNumber 1 + +/* + * When a new operator class is declared, we require that the user supply + * us with an amproc procudure for hashing a key of the new type. + * Since we only have one such proc in amproc, it's number 1. + */ + +#define HASHPROC 1 + +/* public routines */ + +extern void hashbuild(Relation heap, Relation index, int natts, + AttrNumber *attnum, IndexStrategy istrat, uint16 pcount, + Datum *params, FuncIndexInfo *finfo, PredInfo *predInfo); +extern InsertIndexResult hashinsert(Relation rel, IndexTuple itup); +extern char *hashgettuple(IndexScanDesc scan, ScanDirection dir); +extern char *hashbeginscan(Relation rel, bool fromEnd, uint16 keysz, + ScanKey scankey); +extern void hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey); +extern void hashendscan(IndexScanDesc scan); +extern void hashmarkpos(IndexScanDesc scan); +extern void hashrestrpos(IndexScanDesc scan); +extern void hashdelete(Relation rel, ItemPointer tid); + +/* hashfunc.c */ +extern uint32 hashint2(int16 key); +extern uint32 hashint4(uint32 key); +extern uint32 hashfloat4(float32 keyp); +extern uint32 hashfloat8(float64 keyp); +extern uint32 hashoid(Oid key); +extern uint32 hashchar(char key); +extern uint32 hashchar2(uint16 intkey); +extern uint32 hashchar4(uint32 intkey); +extern uint32 hashchar8(char *key); +extern uint32 hashchar16(char *key); +extern uint32 hashtext(struct varlena *key); + +/* private routines */ + +/* hashinsert.c */ +extern InsertIndexResult _hash_doinsert(Relation rel, HashItem hitem); + + +/* hashovfl.c */ +extern Buffer _hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf); +extern Buffer _hash_freeovflpage(Relation rel, Buffer ovflbuf); +extern int32 _hash_initbitmap(Relation rel, HashMetaPage metap, int32 pnum, + int32 nbits, int32 ndx); +extern void _hash_squeezebucket(Relation rel, HashMetaPage metap, + Bucket bucket); + + +/* hashpage.c */ +extern void _hash_metapinit(Relation rel); +extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access); +extern void _hash_relbuf(Relation rel, Buffer buf, int access); +extern void _hash_wrtbuf(Relation rel, Buffer buf); +extern void _hash_wrtnorelbuf(Relation rel, Buffer buf); +extern Page _hash_chgbufaccess(Relation rel, Buffer *bufp, int from_access, + int to_access); +extern void _hash_pageinit(Page page, Size size); +extern void _hash_pagedel(Relation rel, ItemPointer tid); +extern void _hash_expandtable(Relation rel, Buffer metabuf); + + +/* hashscan.c */ +extern void _hash_regscan(IndexScanDesc scan); +extern void _hash_dropscan(IndexScanDesc scan); +extern void _hash_adjscans(Relation rel, ItemPointer tid); + + +/* hashsearch.c */ +extern void _hash_search(Relation rel, int keysz, ScanKey scankey, + Buffer *bufP, HashMetaPage metap); +extern RetrieveIndexResult _hash_next(IndexScanDesc scan, ScanDirection dir); +extern RetrieveIndexResult _hash_first(IndexScanDesc scan, ScanDirection dir); +extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, + Buffer metabuf); + + +/* hashstrat.c */ +extern StrategyNumber _hash_getstrat(Relation rel, AttrNumber attno, + RegProcedure proc); +extern bool _hash_invokestrat(Relation rel, AttrNumber attno, + StrategyNumber strat, Datum left, Datum right); + + +/* hashutil.c */ +extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup, + HashMetaPage metap); +extern void _hash_freeskey(ScanKey skey); +extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup); +extern HashItem _hash_formitem(IndexTuple itup); +extern Bucket _hash_call(Relation rel, HashMetaPage metap, Datum key); +extern uint32 _hash_log2(uint32 num); +extern void _hash_checkpage(Page page, int flags); + +#endif /* HASH_H */ diff --git a/src/backend/access/hash/Makefile.inc b/src/backend/access/hash/Makefile.inc new file mode 100644 index 0000000000..8ea221bc26 --- /dev/null +++ b/src/backend/access/hash/Makefile.inc @@ -0,0 +1,18 @@ +#------------------------------------------------------------------------- +# +# Makefile.inc-- +# Makefile for access/hash (hash access method) +# +# Copyright (c) 1994, Regents of the University of California +# +# +# IDENTIFICATION +# $Header: /cvsroot/pgsql/src/backend/access/hash/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ +# +#------------------------------------------------------------------------- + +SUBSRCS+= hash.c hashfunc.c hashinsert.c hashovfl.c hashpage.c hashscan.c \ + hashsearch.c hashstrat.c hashutil.c + + + diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c new file mode 100644 index 0000000000..a4a4e16e59 --- /dev/null +++ b/src/backend/access/hash/hash.c @@ -0,0 +1,467 @@ +/*------------------------------------------------------------------------- + * + * hash.c-- + * Implementation of Margo Seltzer's Hashing package for postgres. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ + * + * NOTES + * This file contains only the public interface routines. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/bufmgr.h" +#include "storage/bufpage.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/rel.h" +#include "utils/excid.h" +#include "access/heapam.h" +#include "access/genam.h" +#include "access/sdir.h" +#include "access/hash.h" +#include "access/funcindex.h" +#include "nodes/execnodes.h" +#include "nodes/plannodes.h" +#include "executor/executor.h" +#include "executor/tuptable.h" +#include "catalog/index.h" + + +bool BuildingHash = false; + +/* + * hashbuild() -- build a new hash index. + * + * We use a global variable to record the fact that we're creating + * a new index. This is used to avoid high-concurrency locking, + * since the index won't be visible until this transaction commits + * and since building is guaranteed to be single-threaded. + */ +void +hashbuild(Relation heap, + Relation index, + int natts, + AttrNumber *attnum, + IndexStrategy istrat, + uint16 pcount, + Datum *params, + FuncIndexInfo *finfo, + PredInfo *predInfo) +{ + HeapScanDesc hscan; + Buffer buffer; + HeapTuple htup; + IndexTuple itup; + TupleDesc htupdesc, itupdesc; + Datum *attdata; + bool *nulls; + InsertIndexResult res; + int nhtups, nitups; + int i; + HashItem hitem; + ExprContext *econtext; + TupleTable tupleTable; + TupleTableSlot *slot; + Oid hrelid, irelid; + Node *pred, *oldPred; + + /* note that this is a new btree */ + BuildingHash = true; + + pred = predInfo->pred; + oldPred = predInfo->oldPred; + + /* initialize the hash index metadata page (if this is a new index) */ + if (oldPred == NULL) + _hash_metapinit(index); + + /* get tuple descriptors for heap and index relations */ + htupdesc = RelationGetTupleDescriptor(heap); + itupdesc = RelationGetTupleDescriptor(index); + + /* get space for data items that'll appear in the index tuple */ + attdata = (Datum *) palloc(natts * sizeof(Datum)); + nulls = (bool *) palloc(natts * sizeof(bool)); + + /* + * If this is a predicate (partial) index, we will need to evaluate the + * predicate using ExecQual, which requires the current tuple to be in a + * slot of a TupleTable. In addition, ExecQual must have an ExprContext + * referring to that slot. Here, we initialize dummy TupleTable and + * ExprContext objects for this purpose. --Nels, Feb '92 + */ +#ifndef OMIT_PARTIAL_INDEX + if (pred != NULL || oldPred != NULL) { + tupleTable = ExecCreateTupleTable(1); + slot = ExecAllocTableSlot(tupleTable); + econtext = makeNode(ExprContext); + FillDummyExprContext(econtext, slot, htupdesc, buffer); + } +#endif /* OMIT_PARTIAL_INDEX */ + + /* start a heap scan */ + hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); + htup = heap_getnext(hscan, 0, &buffer); + + /* build the index */ + nhtups = nitups = 0; + + for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) { + + nhtups++; + + /* + * If oldPred != NULL, this is an EXTEND INDEX command, so skip + * this tuple if it was already in the existing partial index + */ + if (oldPred != NULL) { + /*SetSlotContents(slot, htup); */ +#ifndef OMIT_PARTIAL_INDEX + slot->val = htup; + if (ExecQual((List*)oldPred, econtext) == true) { + nitups++; + continue; + } +#endif /* OMIT_PARTIAL_INDEX */ + } + + /* Skip this tuple if it doesn't satisfy the partial-index predicate */ + if (pred != NULL) { +#ifndef OMIT_PARTIAL_INDEX + /*SetSlotContents(slot, htup); */ + slot->val = htup; + if (ExecQual((List*)pred, econtext) == false) + continue; +#endif /* OMIT_PARTIAL_INDEX */ +} + + nitups++; + + /* + * For the current heap tuple, extract all the attributes + * we use in this index, and note which are null. + */ + for (i = 1; i <= natts; i++) { + int attoff; + bool attnull; + + /* + * Offsets are from the start of the tuple, and are + * zero-based; indices are one-based. The next call + * returns i - 1. That's data hiding for you. + */ + + /* attoff = i - 1 */ + attoff = AttrNumberGetAttrOffset(i); + + /* below, attdata[attoff] set to equal some datum & + * attnull is changed to indicate whether or not the attribute + * is null for this tuple + */ + attdata[attoff] = GetIndexValue(htup, + htupdesc, + attoff, + attnum, + finfo, + &attnull, + buffer); + nulls[attoff] = (attnull ? 'n' : ' '); + } + + /* form an index tuple and point it at the heap tuple */ + itup = index_formtuple(itupdesc, attdata, nulls); + + /* + * If the single index key is null, we don't insert it into + * the index. Hash tables support scans on '='. + * Relational algebra says that A = B + * returns null if either A or B is null. This + * means that no qualification used in an index scan could ever + * return true on a null attribute. It also means that indices + * can't be used by ISNULL or NOTNULL scans, but that's an + * artifact of the strategy map architecture chosen in 1986, not + * of the way nulls are handled here. + */ + + if (itup->t_info & INDEX_NULL_MASK) { + pfree(itup); + continue; + } + + itup->t_tid = htup->t_ctid; + hitem = _hash_formitem(itup); + res = _hash_doinsert(index, hitem); + pfree(hitem); + pfree(itup); + pfree(res); + } + + /* okay, all heap tuples are indexed */ + heap_endscan(hscan); + + if (pred != NULL || oldPred != NULL) { +#ifndef OMIT_PARTIAL_INDEX + ExecDestroyTupleTable(tupleTable, true); + pfree(econtext); +#endif /* OMIT_PARTIAL_INDEX */ + } + + /* + * Since we just counted the tuples in the heap, we update its + * stats in pg_class to guarantee that the planner takes advantage + * of the index we just created. Finally, only update statistics + * during normal index definitions, not for indices on system catalogs + * created during bootstrap processing. We must close the relations + * before updatings statistics to guarantee that the relcache entries + * are flushed when we increment the command counter in UpdateStats(). + */ + if (IsNormalProcessingMode()) + { + hrelid = heap->rd_id; + irelid = index->rd_id; + heap_close(heap); + index_close(index); + UpdateStats(hrelid, nhtups, true); + UpdateStats(irelid, nitups, false); + if (oldPred != NULL) { + if (nitups == nhtups) pred = NULL; + UpdateIndexPredicate(irelid, oldPred, pred); + } + } + + /* be tidy */ + pfree(nulls); + pfree(attdata); + + /* all done */ + BuildingHash = false; +} + +/* + * hashinsert() -- insert an index tuple into a hash table. + * + * Hash on the index tuple's key, find the appropriate location + * for the new tuple, put it there, and return an InsertIndexResult + * to the caller. + */ +InsertIndexResult +hashinsert(Relation rel, IndexTuple itup) +{ + HashItem hitem; + InsertIndexResult res; + + if (itup->t_info & INDEX_NULL_MASK) + return ((InsertIndexResult) NULL); + + hitem = _hash_formitem(itup); + + res = _hash_doinsert(rel, hitem); + + pfree(hitem); + + return (res); +} + + +/* + * hashgettuple() -- Get the next tuple in the scan. + */ +char * +hashgettuple(IndexScanDesc scan, ScanDirection dir) +{ + RetrieveIndexResult res; + + /* + * If we've already initialized this scan, we can just advance it + * in the appropriate direction. If we haven't done so yet, we + * call a routine to get the first item in the scan. + */ + + if (ItemPointerIsValid(&(scan->currentItemData))) + res = _hash_next(scan, dir); + else + res = _hash_first(scan, dir); + + return ((char *) res); +} + + +/* + * hashbeginscan() -- start a scan on a hash index + */ +char * +hashbeginscan(Relation rel, + bool fromEnd, + uint16 keysz, + ScanKey scankey) +{ + IndexScanDesc scan; + HashScanOpaque so; + + scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey); + so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData)); + so->hashso_curbuf = so->hashso_mrkbuf = InvalidBuffer; + scan->opaque = so; + scan->flags = 0x0; + + /* register scan in case we change pages it's using */ + _hash_regscan(scan); + + return ((char *) scan); +} + +/* + * hashrescan() -- rescan an index relation + */ +void +hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey) +{ + ItemPointer iptr; + HashScanOpaque so; + + so = (HashScanOpaque) scan->opaque; + + /* we hold a read lock on the current page in the scan */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { + _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); + so->hashso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { + _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); + so->hashso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* reset the scan key */ + if (scan->numberOfKeys > 0) { + memmove(scan->keyData, + scankey, + scan->numberOfKeys * sizeof(ScanKeyData)); + } +} + +/* + * hashendscan() -- close down a scan + */ +void +hashendscan(IndexScanDesc scan) +{ + + ItemPointer iptr; + HashScanOpaque so; + + so = (HashScanOpaque) scan->opaque; + + /* release any locks we still hold */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { + _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); + so->hashso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { + if (BufferIsValid(so->hashso_mrkbuf)) + _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); + so->hashso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* don't need scan registered anymore */ + _hash_dropscan(scan); + + /* be tidy */ +#ifdef PERFECT_MMGR + pfree (scan->opaque); +#endif /* PERFECT_MMGR */ +} + +/* + * hashmarkpos() -- save current scan position + * + */ +void +hashmarkpos(IndexScanDesc scan) +{ + ItemPointer iptr; + HashScanOpaque so; + + /* see if we ever call this code. if we do, then so_mrkbuf a + * useful element in the scan->opaque structure. if this procedure + * is never called, so_mrkbuf should be removed from the scan->opaque + * structure. + */ + elog(NOTICE, "Hashmarkpos() called."); + + so = (HashScanOpaque) scan->opaque; + + /* release lock on old marked data, if any */ + if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { + _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ); + so->hashso_mrkbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* bump lock on currentItemData and copy to currentMarkData */ + if (ItemPointerIsValid(&(scan->currentItemData))) { + so->hashso_mrkbuf = _hash_getbuf(scan->relation, + BufferGetBlockNumber(so->hashso_curbuf), + HASH_READ); + scan->currentMarkData = scan->currentItemData; + } +} + +/* + * hashrestrpos() -- restore scan to last saved position + */ +void +hashrestrpos(IndexScanDesc scan) +{ + ItemPointer iptr; + HashScanOpaque so; + + /* see if we ever call this code. if we do, then so_mrkbuf a + * useful element in the scan->opaque structure. if this procedure + * is never called, so_mrkbuf should be removed from the scan->opaque + * structure. + */ + elog(NOTICE, "Hashrestrpos() called."); + + so = (HashScanOpaque) scan->opaque; + + /* release lock on current data, if any */ + if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { + _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ); + so->hashso_curbuf = InvalidBuffer; + ItemPointerSetInvalid(iptr); + } + + /* bump lock on currentMarkData and copy to currentItemData */ + if (ItemPointerIsValid(&(scan->currentMarkData))) { + so->hashso_curbuf = + _hash_getbuf(scan->relation, + BufferGetBlockNumber(so->hashso_mrkbuf), + HASH_READ); + + scan->currentItemData = scan->currentMarkData; + } +} + +/* stubs */ +void +hashdelete(Relation rel, ItemPointer tid) +{ + /* adjust any active scans that will be affected by this deletion */ + _hash_adjscans(rel, tid); + + /* delete the data from the page */ + _hash_pagedel(rel, tid); +} + diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c new file mode 100644 index 0000000000..6b37de2991 --- /dev/null +++ b/src/backend/access/hash/hashfunc.c @@ -0,0 +1,276 @@ +/*------------------------------------------------------------------------- + * + * hashfunc.c-- + * Comparison functions for hash access method. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ + * + * NOTES + * These functions are stored in pg_amproc. For each operator class + * defined on hash tables, they compute the hash value of the argument. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "utils/nabstime.h" + +uint32 hashint2(int16 key) +{ + return ((uint32) ~key); +} + +uint32 hashint4(uint32 key) +{ + return (~key); +} + +/* Hash function from Chris Torek. */ +uint32 hashfloat4(float32 keyp) +{ + int len; + int loop; + uint32 h; + char *kp = (char *) keyp; + + len = sizeof(float32data); + +#define HASH4a h = (h << 5) - h + *kp++; +#define HASH4b h = (h << 5) + h + *kp++; +#define HASH4 HASH4b + + + h = 0; + if (len > 0) { + loop = (len + 8 - 1) >> 3; + + switch (len & (8 - 1)) { + case 0: + do { /* All fall throughs */ + HASH4; + case 7: + HASH4; + case 6: + HASH4; + case 5: + HASH4; + case 4: + HASH4; + case 3: + HASH4; + case 2: + HASH4; + case 1: + HASH4; + } while (--loop); + } + } + return (h); +} + + +uint32 hashfloat8(float64 keyp) +{ + int len; + int loop; + uint32 h; + char *kp = (char *) keyp; + + len = sizeof(float64data); + +#define HASH4a h = (h << 5) - h + *kp++; +#define HASH4b h = (h << 5) + h + *kp++; +#define HASH4 HASH4b + + + h = 0; + if (len > 0) { + loop = (len + 8 - 1) >> 3; + + switch (len & (8 - 1)) { + case 0: + do { /* All fall throughs */ + HASH4; + case 7: + HASH4; + case 6: + HASH4; + case 5: + HASH4; + case 4: + HASH4; + case 3: + HASH4; + case 2: + HASH4; + case 1: + HASH4; + } while (--loop); + } + } + return (h); +} + + +uint32 hashoid(Oid key) +{ + return ((uint32) ~key); +} + + +uint32 hashchar(char key) +{ + int len; + uint32 h; + + len = sizeof(char); + +#define PRIME1 37 +#define PRIME2 1048583 + + h = 0; + /* Convert char to integer */ + h = h * PRIME1 ^ (key - ' '); + h %= PRIME2; + + return (h); +} + +uint32 hashchar2(uint16 intkey) +{ + uint32 h; + int len; + char *key = (char *) &intkey; + + h = 0; + len = sizeof(uint16); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); +} + +uint32 hashchar4(uint32 intkey) +{ + uint32 h; + int len; + char *key = (char *) &intkey; + + h = 0; + len = sizeof(uint32); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); +} + +uint32 hashchar8(char *key) +{ + uint32 h; + int len; + + h = 0; + len = sizeof(char8); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); +} + +uint32 hashname(NameData *n) +{ + uint32 h; + int len; + char *key; + + key = n->data; + + h = 0; + len = NAMEDATALEN; + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); +} + + +uint32 hashchar16(char *key) +{ + uint32 h; + int len; + + h = 0; + len = sizeof(char16); + /* Convert string to integer */ + while (len--) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + + return (h); +} + + +/* + * (Comment from the original db3 hashing code: ) + * + * "This is INCREDIBLY ugly, but fast. We break the string up into 8 byte + * units. On the first time through the loop we get the 'leftover bytes' + * (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle + * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If + * this routine is heavily used enough, it's worth the ugly coding. + * + * "OZ's original sdbm hash" + */ +uint32 hashtext(struct varlena *key) +{ + int keylen; + char *keydata; + uint32 n; + int loop; + + keydata = VARDATA(key); + keylen = VARSIZE(key); + + /* keylen includes the four bytes in which string keylength is stored */ + keylen -= sizeof(VARSIZE(key)); + +#define HASHC n = *keydata++ + 65599 * n + + n = 0; + if (keylen > 0) { + loop = (keylen + 8 - 1) >> 3; + + switch (keylen & (8 - 1)) { + case 0: + do { /* All fall throughs */ + HASHC; + case 7: + HASHC; + case 6: + HASHC; + case 5: + HASHC; + case 4: + HASHC; + case 3: + HASHC; + case 2: + HASHC; + case 1: + HASHC; + } while (--loop); + } + } + return (n); +} diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c new file mode 100644 index 0000000000..c514cc614d --- /dev/null +++ b/src/backend/access/hash/hashinsert.c @@ -0,0 +1,239 @@ +/*------------------------------------------------------------------------- + * + * hashinsert.c-- + * Item insertion in hash tables for Postgres. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/bufmgr.h" +#include "storage/bufpage.h" + +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/rel.h" +#include "utils/excid.h" + +#include "access/heapam.h" +#include "access/genam.h" +#include "access/hash.h" + +static InsertIndexResult _hash_insertonpg(Relation rel, Buffer buf, int keysz, ScanKey scankey, HashItem hitem, Buffer metabuf); +static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, HashItem hitem); + +/* + * _hash_doinsert() -- Handle insertion of a single HashItem in the table. + * + * This routine is called by the public interface routines, hashbuild + * and hashinsert. By here, hashitem is filled in, and has a unique + * (xid, seqno) pair. The datum to be used as a "key" is in the + * hashitem. + */ +InsertIndexResult +_hash_doinsert(Relation rel, HashItem hitem) +{ + Buffer buf; + Buffer metabuf; + BlockNumber blkno; + HashMetaPage metap; + IndexTuple itup; + InsertIndexResult res; + ScanKey itup_scankey; + int natts; + Page page; + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* we need a scan key to do our search, so build one */ + itup = &(hitem->hash_itup); + if ((natts = rel->rd_rel->relnatts) != 1) + elog(WARN, "Hash indices valid for only one index key."); + itup_scankey = _hash_mkscankey(rel, itup, metap); + + /* + * find the first page in the bucket chain containing this key and + * place it in buf. _hash_search obtains a read lock for us. + */ + _hash_search(rel, natts, itup_scankey, &buf, metap); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE); + + /* + * trade in our read lock for a write lock so that we can do the + * insertion. + */ + blkno = BufferGetBlockNumber(buf); + _hash_relbuf(rel, buf, HASH_READ); + buf = _hash_getbuf(rel, blkno, HASH_WRITE); + + + /* + * XXX btree comment (haven't decided what to do in hash): don't + * think the bucket can be split while we're reading the metapage. + * + * If the page was split between the time that we surrendered our + * read lock and acquired our write lock, then this page may no + * longer be the right place for the key we want to insert. + */ + + /* do the insertion */ + res = _hash_insertonpg(rel, buf, natts, itup_scankey, + hitem, metabuf); + + /* be tidy */ + _hash_freeskey(itup_scankey); + + return (res); +} + +/* + * _hash_insertonpg() -- Insert a tuple on a particular page in the table. + * + * This recursive procedure does the following things: + * + * + if necessary, splits the target page. + * + inserts the tuple. + * + * On entry, we must have the right buffer on which to do the + * insertion, and the buffer must be pinned and locked. On return, + * we will have dropped both the pin and the write lock on the buffer. + * + */ +static InsertIndexResult +_hash_insertonpg(Relation rel, + Buffer buf, + int keysz, + ScanKey scankey, + HashItem hitem, + Buffer metabuf) +{ + InsertIndexResult res; + Page page; + BlockNumber itup_blkno; + OffsetNumber itup_off; + int itemsz; + HashPageOpaque pageopaque; + bool do_expand = false; + Buffer ovflbuf; + HashMetaPage metap; + Bucket bucket; + + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); + bucket = pageopaque->hasho_bucket; + + itemsz = IndexTupleDSize(hitem->hash_itup) + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + itemsz = DOUBLEALIGN(itemsz); + + while (PageGetFreeSpace(page) < itemsz) { + /* + * no space on this page; check for an overflow page + */ + if (BlockNumberIsValid(pageopaque->hasho_nextblkno)) { + /* + * ovfl page exists; go get it. if it doesn't have room, + * we'll find out next pass through the loop test above. + */ + ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno, + HASH_WRITE); + _hash_relbuf(rel, buf, HASH_WRITE); + buf = ovflbuf; + page = BufferGetPage(buf); + } else { + /* + * we're at the end of the bucket chain and we haven't + * found a page with enough room. allocate a new overflow + * page. + */ + do_expand = true; + ovflbuf = _hash_addovflpage(rel, &metabuf, buf); + _hash_relbuf(rel, buf, HASH_WRITE); + buf = ovflbuf; + page = BufferGetPage(buf); + + if (PageGetFreeSpace(page) < itemsz) { + /* it doesn't fit on an empty page -- give up */ + elog(WARN, "hash item too large"); + } + } + _hash_checkpage(page, LH_OVERFLOW_PAGE); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(pageopaque->hasho_bucket == bucket); + } + + itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem); + itup_blkno = BufferGetBlockNumber(buf); + + /* by here, the new tuple is inserted */ + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + + ItemPointerSet(&(res->pointerData), itup_blkno, itup_off); + + if (res != NULL) { + /* + * Increment the number of keys in the table. + * We switch lock access type just for a moment + * to allow greater accessibility to the metapage. + */ + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, + HASH_READ, HASH_WRITE); + metap->hashm_nkeys += 1; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, + HASH_WRITE, HASH_READ); + + } + + _hash_wrtbuf(rel, buf); + + if (do_expand || + (metap->hashm_nkeys / (metap->hashm_maxbucket + 1)) + > metap->hashm_ffactor) { + _hash_expandtable(rel, metabuf); + } + _hash_relbuf(rel, metabuf, HASH_READ); + return (res); +} + +/* + * _hash_pgaddtup() -- add a tuple to a particular page in the index. + * + * This routine adds the tuple to the page as requested, and keeps the + * write lock and reference associated with the page's buffer. It is + * an error to call pgaddtup() without a write lock and reference. + */ +static OffsetNumber +_hash_pgaddtup(Relation rel, + Buffer buf, + int keysz, + ScanKey itup_scankey, + Size itemsize, + HashItem hitem) +{ + OffsetNumber itup_off; + Page page; + + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); + + itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + (void) PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED); + + /* write the buffer, but hold our lock */ + _hash_wrtnorelbuf(rel, buf); + + return (itup_off); +} diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c new file mode 100644 index 0000000000..55ee9e9ce7 --- /dev/null +++ b/src/backend/access/hash/hashovfl.c @@ -0,0 +1,614 @@ +/*------------------------------------------------------------------------- + * + * hashovfl.c-- + * Overflow page management code for the Postgres hash access method + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ + * + * NOTES + * Overflow pages look like ordinary relation pages. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/bufmgr.h" +#include "storage/bufpage.h" + +#include "utils/elog.h" +#include "utils/rel.h" +#include "utils/excid.h" + +#include "access/genam.h" +#include "access/hash.h" + +static OverflowPageAddress _hash_getovfladdr(Relation rel, Buffer *metabufp); +static uint32 _hash_firstfreebit(uint32 map); + +/* + * _hash_addovflpage + * + * Add an overflow page to the page currently pointed to by the buffer + * argument 'buf'. + * + * *Metabufp has a read lock upon entering the function; buf has a + * write lock. + * + */ +Buffer +_hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf) +{ + + OverflowPageAddress oaddr; + BlockNumber ovflblkno; + Buffer ovflbuf; + HashMetaPage metap; + HashPageOpaque ovflopaque; + HashPageOpaque pageopaque; + Page page; + Page ovflpage; + + /* this had better be the last page in a bucket chain */ + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(!BlockNumberIsValid(pageopaque->hasho_nextblkno)); + + metap = (HashMetaPage) BufferGetPage(*metabufp); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* allocate an empty overflow page */ + oaddr = _hash_getovfladdr(rel, metabufp); + if (oaddr == InvalidOvflAddress) { + elog(WARN, "_hash_addovflpage: problem with _hash_getovfladdr."); + } + ovflblkno = OADDR_TO_BLKNO(OADDR_OF(SPLITNUM(oaddr), OPAGENUM(oaddr))); + Assert(BlockNumberIsValid(ovflblkno)); + ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE); + Assert(BufferIsValid(ovflbuf)); + ovflpage = BufferGetPage(ovflbuf); + + /* initialize the new overflow page */ + _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf)); + ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); + ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf); + ovflopaque->hasho_nextblkno = InvalidBlockNumber; + ovflopaque->hasho_flag = LH_OVERFLOW_PAGE; + ovflopaque->hasho_oaddr = oaddr; + ovflopaque->hasho_bucket = pageopaque->hasho_bucket; + _hash_wrtnorelbuf(rel, ovflbuf); + + /* logically chain overflow page to previous page */ + pageopaque->hasho_nextblkno = ovflblkno; + _hash_wrtnorelbuf(rel, buf); + return (ovflbuf); +} + +/* + * _hash_getovfladdr() + * + * Find an available overflow page and return its address. + * + * When we enter this function, we have a read lock on *metabufp which + * we change to a write lock immediately. Before exiting, the write lock + * is exchanged for a read lock. + * + */ +static OverflowPageAddress +_hash_getovfladdr(Relation rel, Buffer *metabufp) +{ + HashMetaPage metap; + Buffer mapbuf; + BlockNumber blkno; + PageOffset offset; + OverflowPageAddress oaddr; + SplitNumber splitnum; + uint32 *freep; + uint32 max_free; + uint32 bit; + uint32 first_page; + uint32 free_bit; + uint32 free_page; + uint32 in_use_bits; + uint32 i, j; + + metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE); + + splitnum = metap->OVFL_POINT; + max_free = metap->SPARES[splitnum]; + + free_page = (max_free - 1) >> (metap->BSHIFT + BYTE_TO_BIT); + free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1); + + /* Look through all the free maps to find the first free block */ + first_page = metap->LAST_FREED >> (metap->BSHIFT + BYTE_TO_BIT); + for ( i = first_page; i <= free_page; i++ ) { + Page mappage; + + blkno = metap->hashm_mapp[i]; + mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE); + mappage = BufferGetPage(mapbuf); + _hash_checkpage(mappage, LH_BITMAP_PAGE); + freep = HashPageGetBitmap(mappage); + Assert(freep); + + if (i == free_page) + in_use_bits = free_bit; + else + in_use_bits = BMPGSZ_BIT(metap) - 1; + + if (i == first_page) { + bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1); + j = bit / BITS_PER_MAP; + bit = bit & ~(BITS_PER_MAP - 1); + } else { + bit = 0; + j = 0; + } + for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) + if (freep[j] != ALL_SET) + goto found; + } + + /* No Free Page Found - have to allocate a new page */ + metap->LAST_FREED = metap->SPARES[splitnum]; + metap->SPARES[splitnum]++; + offset = metap->SPARES[splitnum] - + (splitnum ? metap->SPARES[splitnum - 1] : 0); + +#define OVMSG "HASH: Out of overflow pages. Out of luck.\n" + + if (offset > SPLITMASK) { + if (++splitnum >= NCACHED) { + elog(WARN, OVMSG); + } + metap->OVFL_POINT = splitnum; + metap->SPARES[splitnum] = metap->SPARES[splitnum-1]; + metap->SPARES[splitnum-1]--; + offset = 0; + } + + /* Check if we need to allocate a new bitmap page */ + if (free_bit == BMPGSZ_BIT(metap) - 1) { + /* won't be needing old map page */ + + _hash_relbuf(rel, mapbuf, HASH_WRITE); + + free_page++; + if (free_page >= NCACHED) { + elog(WARN, OVMSG); + } + + /* + * This is tricky. The 1 indicates that you want the new page + * allocated with 1 clear bit. Actually, you are going to + * allocate 2 pages from this map. The first is going to be + * the map page, the second is the overflow page we were + * looking for. The init_bitmap routine automatically, sets + * the first bit of itself to indicate that the bitmap itself + * is in use. We would explicitly set the second bit, but + * don't have to if we tell init_bitmap not to leave it clear + * in the first place. + */ + if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset), + 1, free_page)) { + elog(WARN, "overflow_page: problem with _hash_initbitmap."); + } + metap->SPARES[splitnum]++; + offset++; + if (offset > SPLITMASK) { + if (++splitnum >= NCACHED) { + elog(WARN, OVMSG); + } + metap->OVFL_POINT = splitnum; + metap->SPARES[splitnum] = metap->SPARES[splitnum-1]; + metap->SPARES[splitnum-1]--; + offset = 0; + } + } else { + + /* + * Free_bit addresses the last used bit. Bump it to address + * the first available bit. + */ + free_bit++; + SETBIT(freep, free_bit); + _hash_wrtbuf(rel, mapbuf); + } + + /* Calculate address of the new overflow page */ + oaddr = OADDR_OF(splitnum, offset); + _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ); + return (oaddr); + + found: + bit = bit + _hash_firstfreebit(freep[j]); + SETBIT(freep, bit); + _hash_wrtbuf(rel, mapbuf); + + /* + * Bits are addressed starting with 0, but overflow pages are addressed + * beginning at 1. Bit is a bit addressnumber, so we need to increment + * it to convert it to a page number. + */ + + bit = 1 + bit + (i * BMPGSZ_BIT(metap)); + if (bit >= metap->LAST_FREED) { + metap->LAST_FREED = bit - 1; + } + + /* Calculate the split number for this page */ + for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++) + ; + offset = (i ? bit - metap->SPARES[i - 1] : bit); + if (offset >= SPLITMASK) { + elog(WARN, OVMSG); + } + + /* initialize this page */ + oaddr = OADDR_OF(i, offset); + _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ); + return (oaddr); +} + +/* + * _hash_firstfreebit() + * + * Return the first bit that is not set in the argument 'map'. This + * function is used to find an available overflow page within a + * splitnumber. + * + */ +static uint32 +_hash_firstfreebit(uint32 map) +{ + uint32 i, mask; + + mask = 0x1; + for (i = 0; i < BITS_PER_MAP; i++) { + if (!(mask & map)) + return (i); + mask = mask << 1; + } + return (i); +} + +/* + * _hash_freeovflpage() - + * + * Mark this overflow page as free and return a buffer with + * the page that follows it (which may be defined as + * InvalidBuffer). + * + */ +Buffer +_hash_freeovflpage(Relation rel, Buffer ovflbuf) +{ + HashMetaPage metap; + Buffer metabuf; + Buffer mapbuf; + BlockNumber prevblkno; + BlockNumber blkno; + BlockNumber nextblkno; + HashPageOpaque ovflopaque; + Page ovflpage; + Page mappage; + OverflowPageAddress addr; + SplitNumber splitnum; + uint32 *freep; + uint32 ovflpgno; + int32 bitmappage, bitmapbit; + Bucket bucket; + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + ovflpage = BufferGetPage(ovflbuf); + _hash_checkpage(ovflpage, LH_OVERFLOW_PAGE); + ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); + addr = ovflopaque->hasho_oaddr; + nextblkno = ovflopaque->hasho_nextblkno; + prevblkno = ovflopaque->hasho_prevblkno; + bucket = ovflopaque->hasho_bucket; + (void) memset(ovflpage, 0, BufferGetPageSize(ovflbuf)); + _hash_wrtbuf(rel, ovflbuf); + + /* + * fix up the bucket chain. this is a doubly-linked list, so we + * must fix up the bucket chain members behind and ahead of the + * overflow page being deleted. + * + * XXX this should look like: + * - lock prev/next + * - modify/write prev/next (how to do write ordering with a + * doubly-linked list???) + * - unlock prev/next + */ + if (BlockNumberIsValid(prevblkno)) { + Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE); + Page prevpage = BufferGetPage(prevbuf); + HashPageOpaque prevopaque = + (HashPageOpaque) PageGetSpecialPointer(prevpage); + + _hash_checkpage(prevpage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); + Assert(prevopaque->hasho_bucket == bucket); + prevopaque->hasho_nextblkno = nextblkno; + _hash_wrtbuf(rel, prevbuf); + } + if (BlockNumberIsValid(nextblkno)) { + Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE); + Page nextpage = BufferGetPage(nextbuf); + HashPageOpaque nextopaque = + (HashPageOpaque) PageGetSpecialPointer(nextpage); + + _hash_checkpage(nextpage, LH_OVERFLOW_PAGE); + Assert(nextopaque->hasho_bucket == bucket); + nextopaque->hasho_prevblkno = prevblkno; + _hash_wrtbuf(rel, nextbuf); + } + + /* + * Fix up the overflow page bitmap that tracks this particular + * overflow page. The bitmap can be found in the MetaPageData + * array element hashm_mapp[bitmappage]. + */ + splitnum = (addr >> SPLITSHIFT); + ovflpgno = + (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1; + + if (ovflpgno < metap->LAST_FREED) { + metap->LAST_FREED = ovflpgno; + } + + bitmappage = (ovflpgno >> (metap->BSHIFT + BYTE_TO_BIT)); + bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1); + + blkno = metap->hashm_mapp[bitmappage]; + mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE); + mappage = BufferGetPage(mapbuf); + _hash_checkpage(mappage, LH_BITMAP_PAGE); + freep = HashPageGetBitmap(mappage); + CLRBIT(freep, bitmapbit); + _hash_wrtbuf(rel, mapbuf); + + _hash_relbuf(rel, metabuf, HASH_WRITE); + + /* + * now instantiate the page that replaced this one, + * if it exists, and return that buffer with a write lock. + */ + if (BlockNumberIsValid(nextblkno)) { + return (_hash_getbuf(rel, nextblkno, HASH_WRITE)); + } else { + return (InvalidBuffer); + } +} + + +/* + * _hash_initbitmap() + * + * Initialize a new bitmap page. The metapage has a write-lock upon + * entering the function. + * + * 'pnum' is the OverflowPageAddress of the new bitmap page. + * 'nbits' is how many bits to clear (i.e., make available) in the new + * bitmap page. the remainder of the bits (as well as the first bit, + * representing the bitmap page itself) will be set. + * 'ndx' is the 0-based offset of the new bitmap page within the + * metapage's array of bitmap page OverflowPageAddresses. + */ + +#define INT_MASK ((1 << INT_TO_BIT) -1) + +int32 +_hash_initbitmap(Relation rel, + HashMetaPage metap, + int32 pnum, + int32 nbits, + int32 ndx) +{ + Buffer buf; + BlockNumber blkno; + Page pg; + HashPageOpaque op; + uint32 *freep; + int clearbytes, clearints; + + blkno = OADDR_TO_BLKNO(pnum); + buf = _hash_getbuf(rel, blkno, HASH_WRITE); + pg = BufferGetPage(buf); + _hash_pageinit(pg, BufferGetPageSize(buf)); + op = (HashPageOpaque) PageGetSpecialPointer(pg); + op->hasho_oaddr = InvalidOvflAddress; + op->hasho_prevblkno = InvalidBlockNumber; + op->hasho_nextblkno = InvalidBlockNumber; + op->hasho_flag = LH_BITMAP_PAGE; + op->hasho_bucket = -1; + + freep = HashPageGetBitmap(pg); + + /* set all of the bits above 'nbits' to 1 */ + clearints = ((nbits - 1) >> INT_TO_BIT) + 1; + clearbytes = clearints << INT_TO_BYTE; + (void) memset((char *) freep, 0, clearbytes); + (void) memset(((char *) freep) + clearbytes, 0xFF, + BMPGSZ_BYTE(metap) - clearbytes); + freep[clearints - 1] = ALL_SET << (nbits & INT_MASK); + + /* bit 0 represents the new bitmap page */ + SETBIT(freep, 0); + + /* metapage already has a write lock */ + metap->hashm_nmaps++; + metap->hashm_mapp[ndx] = blkno; + + /* write out the new bitmap page (releasing its locks) */ + _hash_wrtbuf(rel, buf); + + return (0); +} + + +/* + * _hash_squeezebucket(rel, bucket) + * + * Try to squeeze the tuples onto pages occuring earlier in the + * bucket chain in an attempt to free overflow pages. When we start + * the "squeezing", the page from which we start taking tuples (the + * "read" page) is the last bucket in the bucket chain and the page + * onto which we start squeezing tuples (the "write" page) is the + * first page in the bucket chain. The read page works backward and + * the write page works forward; the procedure terminates when the + * read page and write page are the same page. + */ +void +_hash_squeezebucket(Relation rel, + HashMetaPage metap, + Bucket bucket) +{ + Buffer wbuf; + Buffer rbuf; + BlockNumber wblkno; + BlockNumber rblkno; + Page wpage; + Page rpage; + HashPageOpaque wopaque; + HashPageOpaque ropaque; + OffsetNumber woffnum; + OffsetNumber roffnum; + HashItem hitem; + int itemsz; + +/* elog(DEBUG, "_hash_squeezebucket: squeezing bucket %d", bucket); */ + + /* + * start squeezing into the base bucket page. + */ + wblkno = BUCKET_TO_BLKNO(bucket); + wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); + wpage = BufferGetPage(wbuf); + _hash_checkpage(wpage, LH_BUCKET_PAGE); + wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); + + /* + * if there aren't any overflow pages, there's nothing to squeeze. + */ + if (!BlockNumberIsValid(wopaque->hasho_nextblkno)) { + _hash_relbuf(rel, wbuf, HASH_WRITE); + return; + } + + /* + * find the last page in the bucket chain by starting at the base + * bucket page and working forward. + * + * XXX if chains tend to be long, we should probably move forward + * using HASH_READ and then _hash_chgbufaccess to HASH_WRITE when + * we reach the end. if they are short we probably don't care + * very much. if the hash function is working at all, they had + * better be short.. + */ + ropaque = wopaque; + do { + rblkno = ropaque->hasho_nextblkno; + if (ropaque != wopaque) { + _hash_relbuf(rel, rbuf, HASH_WRITE); + } + rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); + rpage = BufferGetPage(rbuf); + _hash_checkpage(rpage, LH_OVERFLOW_PAGE); + Assert(!PageIsEmpty(rpage)); + ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); + Assert(ropaque->hasho_bucket == bucket); + } while (BlockNumberIsValid(ropaque->hasho_nextblkno)); + + /* + * squeeze the tuples. + */ + roffnum = FirstOffsetNumber; + for(;;) { + hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum)); + itemsz = IndexTupleDSize(hitem->hash_itup) + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + itemsz = DOUBLEALIGN(itemsz); + + /* + * walk up the bucket chain, looking for a page big enough for + * this item. + */ + while (PageGetFreeSpace(wpage) < itemsz) { + wblkno = wopaque->hasho_nextblkno; + + _hash_wrtbuf(rel, wbuf); + + if (!BlockNumberIsValid(wblkno) || (rblkno == wblkno)) { + _hash_wrtbuf(rel, rbuf); + /* wbuf is already released */ + return; + } + + wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); + wpage = BufferGetPage(wbuf); + _hash_checkpage(wpage, LH_OVERFLOW_PAGE); + Assert(!PageIsEmpty(wpage)); + wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); + Assert(wopaque->hasho_bucket == bucket); + } + + /* + * if we're here, we have found room so insert on the "write" + * page. + */ + woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage)); + (void) PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED); + + /* + * delete the tuple from the "read" page. + * PageIndexTupleDelete repacks the ItemId array, so 'roffnum' + * will be "advanced" to the "next" ItemId. + */ + PageIndexTupleDelete(rpage, roffnum); + _hash_wrtnorelbuf(rel, rbuf); + + /* + * if the "read" page is now empty because of the deletion, + * free it. + */ + if (PageIsEmpty(rpage) && (ropaque->hasho_flag & LH_OVERFLOW_PAGE)) { + rblkno = ropaque->hasho_prevblkno; + Assert(BlockNumberIsValid(rblkno)); + + /* + * free this overflow page. the extra _hash_relbuf is + * because _hash_freeovflpage gratuitously returns the + * next page (we want the previous page and will get it + * ourselves later). + */ + rbuf = _hash_freeovflpage(rel, rbuf); + if (BufferIsValid(rbuf)) { + _hash_relbuf(rel, rbuf, HASH_WRITE); + } + + if (rblkno == wblkno) { + /* rbuf is already released */ + _hash_wrtbuf(rel, wbuf); + return; + } + + rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); + rpage = BufferGetPage(rbuf); + _hash_checkpage(rpage, LH_OVERFLOW_PAGE); + Assert(!PageIsEmpty(rpage)); + ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); + Assert(ropaque->hasho_bucket == bucket); + + roffnum = FirstOffsetNumber; + } + } +} diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c new file mode 100644 index 0000000000..2c6ebed835 --- /dev/null +++ b/src/backend/access/hash/hashpage.c @@ -0,0 +1,669 @@ +/*------------------------------------------------------------------------- + * + * hashpage.c-- + * Hash table page management code for the Postgres hash access method + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ + * + * NOTES + * Postgres hash pages look like ordinary relation pages. The opaque + * data at high addresses includes information about the page including + * whether a page is an overflow page or a true bucket, the block + * numbers of the preceding and following pages, and the overflow + * address of the page if it is an overflow page. + * + * The first page in a hash relation, page zero, is special -- it stores + * information describing the hash table; it is referred to as teh + * "meta page." Pages one and higher store the actual data. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/bufmgr.h" +#include "storage/bufpage.h" + +#include "utils/elog.h" +#include "utils/rel.h" +#include "utils/excid.h" + +#include "access/genam.h" +#include "access/hash.h" + +static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access); +static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access); +static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket); + +/* + * We use high-concurrency locking on hash indices. There are two cases in + * which we don't do locking. One is when we're building the index. + * Since the creating transaction has not committed, no one can see + * the index, and there's no reason to share locks. The second case + * is when we're just starting up the database system. We use some + * special-purpose initialization code in the relation cache manager + * (see utils/cache/relcache.c) to allow us to do indexed scans on + * the system catalogs before we'd normally be able to. This happens + * before the lock table is fully initialized, so we can't use it. + * Strictly speaking, this violates 2pl, but we don't do 2pl on the + * system catalogs anyway. + */ + + +#define USELOCKING (!BuildingHash && !IsInitProcessingMode()) + + +/* + * _hash_metapinit() -- Initialize the metadata page of a hash index, + * the two buckets that we begin with and the initial + * bitmap page. + */ +void +_hash_metapinit(Relation rel) +{ + HashMetaPage metap; + HashPageOpaque pageopaque; + Buffer metabuf; + Buffer buf; + Page pg; + int nbuckets; + uint32 nelem; /* number elements */ + uint32 lg2nelem; /* _hash_log2(nelem) */ + uint32 nblocks; + uint16 i; + + /* can't be sharing this with anyone, now... */ + if (USELOCKING) + RelationSetLockForWrite(rel); + + if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) { + elog(WARN, "Cannot initialize non-empty hash table %s", + RelationGetRelationName(rel)); + } + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); + pg = BufferGetPage(metabuf); + metap = (HashMetaPage) pg; + _hash_pageinit(pg, BufferGetPageSize(metabuf)); + + metap->hashm_magic = HASH_MAGIC; + metap->hashm_version = HASH_VERSION; + metap->hashm_nkeys = 0; + metap->hashm_nmaps = 0; + metap->hashm_ffactor = DEFAULT_FFACTOR; + metap->hashm_bsize = BufferGetPageSize(metabuf); + metap->hashm_bshift = _hash_log2(metap->hashm_bsize); + for (i = metap->hashm_bshift; i > 0; --i) { + if ((1 << i) < (metap->hashm_bsize - + (DOUBLEALIGN(sizeof(PageHeaderData)) + + DOUBLEALIGN(sizeof(HashPageOpaqueData))))) { + break; + } + } + Assert(i); + metap->hashm_bmsize = 1 << i; + metap->hashm_procid = index_getprocid(rel, 1, HASHPROC); + + /* + * Make nelem = 2 rather than 0 so that we end up allocating space + * for the next greater power of two number of buckets. + */ + nelem = 2; + lg2nelem = 1; /*_hash_log2(MAX(nelem, 2)) */ + nbuckets = 2; /*1 << lg2nelem */ + + memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares)); + memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp)); + + metap->hashm_spares[lg2nelem] = 2; /* lg2nelem + 1 */ + metap->hashm_spares[lg2nelem + 1] = 2; /* lg2nelem + 1 */ + metap->hashm_ovflpoint = 1; /* lg2nelem */ + metap->hashm_lastfreed = 2; + + metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */ + metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */ + + pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); + pageopaque->hasho_oaddr = InvalidOvflAddress; + pageopaque->hasho_prevblkno = InvalidBlockNumber; + pageopaque->hasho_nextblkno = InvalidBlockNumber; + pageopaque->hasho_flag = LH_META_PAGE; + pageopaque->hasho_bucket = -1; + + /* + * First bitmap page is at: splitpoint lg2nelem page offset 1 which + * turns out to be page 3. Couldn't initialize page 3 until we created + * the first two buckets above. + */ + if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0)) + elog(WARN, "Problem with _hash_initbitmap."); + + /* all done */ + _hash_wrtnorelbuf(rel, metabuf); + + /* + * initialize the first two buckets + */ + for (i = 0; i <= 1; i++) { + buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE); + pg = BufferGetPage(buf); + _hash_pageinit(pg, BufferGetPageSize(buf)); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); + pageopaque->hasho_oaddr = InvalidOvflAddress; + pageopaque->hasho_prevblkno = InvalidBlockNumber; + pageopaque->hasho_nextblkno = InvalidBlockNumber; + pageopaque->hasho_flag = LH_BUCKET_PAGE; + pageopaque->hasho_bucket = i; + _hash_wrtbuf(rel, buf); + } + + _hash_relbuf(rel, metabuf, HASH_WRITE); + + if (USELOCKING) + RelationUnsetLockForWrite(rel); +} + +/* + * _hash_getbuf() -- Get a buffer by block number for read or write. + * + * When this routine returns, the appropriate lock is set on the + * requested buffer its reference count is correct. + * + * XXX P_NEW is not used because, unlike the tree structures, we + * need the bucket blocks to be at certain block numbers. we must + * depend on the caller to call _hash_pageinit on the block if it + * knows that this is a new block. + */ +Buffer +_hash_getbuf(Relation rel, BlockNumber blkno, int access) +{ + Buffer buf; + + if (blkno == P_NEW) { + elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW"); + } + switch (access) { + case HASH_WRITE: + case HASH_READ: + _hash_setpagelock(rel, blkno, access); + break; + default: + elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %.*s", + access, NAMEDATALEN, RelationGetRelationName(rel)); + break; + } + buf = ReadBuffer(rel, blkno); + + /* ref count and lock type are correct */ + return (buf); +} + +/* + * _hash_relbuf() -- release a locked buffer. + */ +void +_hash_relbuf(Relation rel, Buffer buf, int access) +{ + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + + switch (access) { + case HASH_WRITE: + case HASH_READ: + _hash_unsetpagelock(rel, blkno, access); + break; + default: + elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %.*s", + access, blkno, NAMEDATALEN, RelationGetRelationName(rel)); + } + + ReleaseBuffer(buf); +} + +/* + * _hash_wrtbuf() -- write a hash page to disk. + * + * This routine releases the lock held on the buffer and our reference + * to it. It is an error to call _hash_wrtbuf() without a write lock + * or a reference to the buffer. + */ +void +_hash_wrtbuf(Relation rel, Buffer buf) +{ + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + WriteBuffer(buf); + _hash_unsetpagelock(rel, blkno, HASH_WRITE); +} + +/* + * _hash_wrtnorelbuf() -- write a hash page to disk, but do not release + * our reference or lock. + * + * It is an error to call _hash_wrtnorelbuf() without a write lock + * or a reference to the buffer. + */ +void +_hash_wrtnorelbuf(Relation rel, Buffer buf) +{ + BlockNumber blkno; + + blkno = BufferGetBlockNumber(buf); + WriteNoReleaseBuffer(buf); +} + +Page +_hash_chgbufaccess(Relation rel, + Buffer *bufp, + int from_access, + int to_access) +{ + BlockNumber blkno; + + blkno = BufferGetBlockNumber(*bufp); + + switch (from_access) { + case HASH_WRITE: + _hash_wrtbuf(rel, *bufp); + break; + case HASH_READ: + _hash_relbuf(rel, *bufp, from_access); + break; + default: + elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %.*s", + from_access, blkno, NAMEDATALEN, RelationGetRelationName(rel)); + break; + } + *bufp = _hash_getbuf(rel, blkno, to_access); + return (BufferGetPage(*bufp)); +} + +/* + * _hash_pageinit() -- Initialize a new page. + */ +void +_hash_pageinit(Page page, Size size) +{ + Assert(((PageHeader) page)->pd_lower == 0); + Assert(((PageHeader) page)->pd_upper == 0); + Assert(((PageHeader) page)->pd_special == 0); + + /* + * Cargo-cult programming -- don't really need this to be zero, but + * creating new pages is an infrequent occurrence and it makes me feel + * good when I know they're empty. + */ + memset(page, 0, size); + + PageInit(page, size, sizeof(HashPageOpaqueData)); +} + +static void +_hash_setpagelock(Relation rel, + BlockNumber blkno, + int access) +{ + ItemPointerData iptr; + + if (USELOCKING) { + ItemPointerSet(&iptr, blkno, 1); + + switch (access) { + case HASH_WRITE: + RelationSetSingleWLockPage(rel, &iptr); + break; + case HASH_READ: + RelationSetSingleRLockPage(rel, &iptr); + break; + default: + elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %.*s", + access, blkno, NAMEDATALEN, RelationGetRelationName(rel)); + break; + } + } +} + +static void +_hash_unsetpagelock(Relation rel, + BlockNumber blkno, + int access) +{ + ItemPointerData iptr; + + if (USELOCKING) { + ItemPointerSet(&iptr, blkno, 1); + + switch (access) { + case HASH_WRITE: + RelationUnsetSingleWLockPage(rel, &iptr); + break; + case HASH_READ: + RelationUnsetSingleRLockPage(rel, &iptr); + break; + default: + elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %.*s", + access, blkno, NAMEDATALEN, RelationGetRelationName(rel)); + break; + } + } +} + +void +_hash_pagedel(Relation rel, ItemPointer tid) +{ + Buffer buf; + Buffer metabuf; + Page page; + BlockNumber blkno; + OffsetNumber offno; + HashMetaPage metap; + HashPageOpaque opaque; + + blkno = ItemPointerGetBlockNumber(tid); + offno = ItemPointerGetOffsetNumber(tid); + + buf = _hash_getbuf(rel, blkno, HASH_WRITE); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + + PageIndexTupleDelete(page, offno); + _hash_wrtnorelbuf(rel, buf); + + if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) { + buf = _hash_freeovflpage(rel, buf); + if (BufferIsValid(buf)) { + _hash_relbuf(rel, buf, HASH_WRITE); + } + } else { + _hash_relbuf(rel, buf, HASH_WRITE); + } + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + ++metap->hashm_nkeys; + _hash_wrtbuf(rel, metabuf); +} + +void +_hash_expandtable(Relation rel, Buffer metabuf) +{ + HashMetaPage metap; + Bucket old_bucket; + Bucket new_bucket; + uint32 spare_ndx; + +/* elog(DEBUG, "_hash_expandtable: expanding..."); */ + + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); + new_bucket = ++metap->MAX_BUCKET; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); + old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK); + + /* + * If the split point is increasing (MAX_BUCKET's log base 2 + * * increases), we need to copy the current contents of the spare + * split bucket to the next bucket. + */ + spare_ndx = _hash_log2(metap->MAX_BUCKET + 1); + if (spare_ndx > metap->OVFL_POINT) { + + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); + metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT]; + metap->OVFL_POINT = spare_ndx; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); + } + + if (new_bucket > metap->HIGH_MASK) { + + /* Starting a new doubling */ + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE); + metap->LOW_MASK = metap->HIGH_MASK; + metap->HIGH_MASK = new_bucket | metap->LOW_MASK; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ); + + } + /* Relocate records to the new bucket */ + _hash_splitpage(rel, metabuf, old_bucket, new_bucket); +} + + +/* + * _hash_splitpage -- split 'obucket' into 'obucket' and 'nbucket' + * + * this routine is actually misnamed -- we are splitting a bucket that + * consists of a base bucket page and zero or more overflow (bucket + * chain) pages. + */ +static void +_hash_splitpage(Relation rel, + Buffer metabuf, + Bucket obucket, + Bucket nbucket) +{ + Bucket bucket; + Buffer obuf; + Buffer nbuf; + Buffer ovflbuf; + BlockNumber oblkno; + BlockNumber nblkno; + bool null; + Datum datum; + HashItem hitem; + HashPageOpaque oopaque; + HashPageOpaque nopaque; + HashMetaPage metap; + IndexTuple itup; + int itemsz; + OffsetNumber ooffnum; + OffsetNumber noffnum; + OffsetNumber omaxoffnum; + Page opage; + Page npage; + TupleDesc itupdesc; + +/* elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d", + obucket, obucket, nbucket); +*/ + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* get the buffers & pages */ + oblkno = BUCKET_TO_BLKNO(obucket); + nblkno = BUCKET_TO_BLKNO(nbucket); + obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); + nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE); + opage = BufferGetPage(obuf); + npage = BufferGetPage(nbuf); + + /* initialize the new bucket */ + _hash_pageinit(npage, BufferGetPageSize(nbuf)); + nopaque = (HashPageOpaque) PageGetSpecialPointer(npage); + nopaque->hasho_prevblkno = InvalidBlockNumber; + nopaque->hasho_nextblkno = InvalidBlockNumber; + nopaque->hasho_flag = LH_BUCKET_PAGE; + nopaque->hasho_oaddr = InvalidOvflAddress; + nopaque->hasho_bucket = nbucket; + _hash_wrtnorelbuf(rel, nbuf); + + /* + * make sure the old bucket isn't empty. advance 'opage' and + * friends through the overflow bucket chain until we find a + * non-empty page. + * + * XXX we should only need this once, if we are careful to + * preserve the invariant that overflow pages are never empty. + */ + _hash_checkpage(opage, LH_BUCKET_PAGE); + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + if (PageIsEmpty(opage)) { + oblkno = oopaque->hasho_nextblkno; + _hash_relbuf(rel, obuf, HASH_WRITE); + if (!BlockNumberIsValid(oblkno)) { + /* + * the old bucket is completely empty; of course, the new + * bucket will be as well, but since it's a base bucket + * page we don't care. + */ + _hash_relbuf(rel, nbuf, HASH_WRITE); + return; + } + obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); + opage = BufferGetPage(obuf); + _hash_checkpage(opage, LH_OVERFLOW_PAGE); + if (PageIsEmpty(opage)) { + elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno); + } + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + } + + /* + * we are now guaranteed that 'opage' is not empty. partition the + * tuples in the old bucket between the old bucket and the new + * bucket, advancing along their respective overflow bucket chains + * and adding overflow pages as needed. + */ + ooffnum = FirstOffsetNumber; + omaxoffnum = PageGetMaxOffsetNumber(opage); + for (;;) { + /* + * at each iteration through this loop, each of these variables + * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum + */ + + /* check if we're at the end of the page */ + if (ooffnum > omaxoffnum) { + /* at end of page, but check for overflow page */ + oblkno = oopaque->hasho_nextblkno; + if (BlockNumberIsValid(oblkno)) { + /* + * we ran out of tuples on this particular page, but + * we have more overflow pages; re-init values. + */ + _hash_wrtbuf(rel, obuf); + obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); + opage = BufferGetPage(obuf); + _hash_checkpage(opage, LH_OVERFLOW_PAGE); + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + + /* we're guaranteed that an ovfl page has at least 1 tuple */ + if (PageIsEmpty(opage)) { + elog(WARN, "_hash_splitpage: empty ovfl page %d!", + oblkno); + } + ooffnum = FirstOffsetNumber; + omaxoffnum = PageGetMaxOffsetNumber(opage); + } else { + /* + * we're at the end of the bucket chain, so now we're + * really done with everything. before quitting, call + * _hash_squeezebucket to ensure the tuples in the + * bucket (including the overflow pages) are packed as + * tightly as possible. + */ + _hash_wrtbuf(rel, obuf); + _hash_wrtbuf(rel, nbuf); + _hash_squeezebucket(rel, metap, obucket); + return; + } + } + + /* hash on the tuple */ + hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum)); + itup = &(hitem->hash_itup); + itupdesc = RelationGetTupleDescriptor(rel); + datum = index_getattr(itup, 1, itupdesc, &null); + bucket = _hash_call(rel, metap, datum); + + if (bucket == nbucket) { + /* + * insert the tuple into the new bucket. if it doesn't + * fit on the current page in the new bucket, we must + * allocate a new overflow page and place the tuple on + * that page instead. + */ + itemsz = IndexTupleDSize(hitem->hash_itup) + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + + itemsz = DOUBLEALIGN(itemsz); + + if (PageGetFreeSpace(npage) < itemsz) { + ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf); + _hash_wrtbuf(rel, nbuf); + nbuf = ovflbuf; + npage = BufferGetPage(nbuf); + _hash_checkpage(npage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); + } + + noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage)); + (void) PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED); + _hash_wrtnorelbuf(rel, nbuf); + + /* + * now delete the tuple from the old bucket. after this + * section of code, 'ooffnum' will actually point to the + * ItemId to which we would point if we had advanced it + * before the deletion (PageIndexTupleDelete repacks the + * ItemId array). this also means that 'omaxoffnum' is + * exactly one less than it used to be, so we really can + * just decrement it instead of calling + * PageGetMaxOffsetNumber. + */ + PageIndexTupleDelete(opage, ooffnum); + _hash_wrtnorelbuf(rel, obuf); + omaxoffnum = OffsetNumberPrev(omaxoffnum); + + /* + * tidy up. if the old page was an overflow page and it + * is now empty, we must free it (we want to preserve the + * invariant that overflow pages cannot be empty). + */ + if (PageIsEmpty(opage) && + (oopaque->hasho_flag & LH_OVERFLOW_PAGE)) { + obuf = _hash_freeovflpage(rel, obuf); + + /* check that we're not through the bucket chain */ + if (BufferIsInvalid(obuf)) { + _hash_wrtbuf(rel, nbuf); + _hash_squeezebucket(rel, metap, obucket); + return; + } + + /* + * re-init. again, we're guaranteed that an ovfl page + * has at least one tuple. + */ + opage = BufferGetPage(obuf); + _hash_checkpage(opage, LH_OVERFLOW_PAGE); + oblkno = BufferGetBlockNumber(obuf); + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + if (PageIsEmpty(opage)) { + elog(WARN, "_hash_splitpage: empty overflow page %d", + oblkno); + } + ooffnum = FirstOffsetNumber; + omaxoffnum = PageGetMaxOffsetNumber(opage); + } + } else { + /* + * the tuple stays on this page. we didn't move anything, + * so we didn't delete anything and therefore we don't + * have to change 'omaxoffnum'. + * + * XXX any hash value from [0, nbucket-1] will map to this + * bucket, which doesn't make sense to me. + */ + ooffnum = OffsetNumberNext(ooffnum); + } + } + /*NOTREACHED*/ +} diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c new file mode 100644 index 0000000000..c4cce0e70d --- /dev/null +++ b/src/backend/access/hash/hashscan.c @@ -0,0 +1,172 @@ +/*------------------------------------------------------------------------- + * + * hashscan.c-- + * manage scans on hash tables + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashscan.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ + * + * NOTES + * Because we can be doing an index scan on a relation while we + * update it, we need to avoid missing data that moves around in + * the index. The routines and global variables in this file + * guarantee that all scans in the local address space stay + * correctly positioned. This is all we need to worry about, since + * write locking guarantees that no one else will be on the same + * page at the same time as we are. + * + * The scheme is to manage a list of active scans in the current + * backend. Whenever we add or remove records from an index, we + * check the list of active scans to see if any has been affected. + * A scan is affected only if it is on the same relation, and the + * same page, as the update. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/bufmgr.h" +#include "storage/bufpage.h" + +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/rel.h" +#include "utils/excid.h" + +#include "access/heapam.h" +#include "access/genam.h" +#include "access/sdir.h" +#include "access/hash.h" + +static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); +static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno); + +typedef struct HashScanListData { + IndexScanDesc hashsl_scan; + struct HashScanListData *hashsl_next; +} HashScanListData; + +typedef HashScanListData *HashScanList; + +static HashScanList HashScans = (HashScanList) NULL; + +/* + * _Hash_regscan() -- register a new scan. + */ +void +_hash_regscan(IndexScanDesc scan) +{ + HashScanList new_el; + + new_el = (HashScanList) palloc(sizeof(HashScanListData)); + new_el->hashsl_scan = scan; + new_el->hashsl_next = HashScans; + HashScans = new_el; +} + +/* + * _hash_dropscan() -- drop a scan from the scan list + */ +void +_hash_dropscan(IndexScanDesc scan) +{ + HashScanList chk, last; + + last = (HashScanList) NULL; + for (chk = HashScans; + chk != (HashScanList) NULL && chk->hashsl_scan != scan; + chk = chk->hashsl_next) { + last = chk; + } + + if (chk == (HashScanList) NULL) + elog(WARN, "hash scan list trashed; can't find 0x%lx", scan); + + if (last == (HashScanList) NULL) + HashScans = chk->hashsl_next; + else + last->hashsl_next = chk->hashsl_next; + +#ifdef PERFECT_MEM + pfree (chk); +#endif /* PERFECT_MEM */ +} + +void +_hash_adjscans(Relation rel, ItemPointer tid) +{ + HashScanList l; + Oid relid; + + relid = rel->rd_id; + for (l = HashScans; l != (HashScanList) NULL; l = l->hashsl_next) { + if (relid == l->hashsl_scan->relation->rd_id) + _hash_scandel(l->hashsl_scan, ItemPointerGetBlockNumber(tid), + ItemPointerGetOffsetNumber(tid)); + } +} + +static void +_hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno) +{ + ItemPointer current; + Buffer buf; + Buffer metabuf; + HashScanOpaque so; + + if (!_hash_scantouched(scan, blkno, offno)) + return; + + metabuf = _hash_getbuf(scan->relation, HASH_METAPAGE, HASH_READ); + + so = (HashScanOpaque) scan->opaque; + buf = so->hashso_curbuf; + + current = &(scan->currentItemData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) { + _hash_step(scan, &buf, BackwardScanDirection, metabuf); + so->hashso_curbuf = buf; + } + + current = &(scan->currentMarkData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) { + ItemPointerData tmp; + tmp = *current; + *current = scan->currentItemData; + scan->currentItemData = tmp; + _hash_step(scan, &buf, BackwardScanDirection, metabuf); + so->hashso_mrkbuf = buf; + tmp = *current; + *current = scan->currentItemData; + scan->currentItemData = tmp; + } +} + +static bool +_hash_scantouched(IndexScanDesc scan, + BlockNumber blkno, + OffsetNumber offno) +{ + ItemPointer current; + + current = &(scan->currentItemData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + return (true); + + current = &(scan->currentMarkData); + if (ItemPointerIsValid(current) + && ItemPointerGetBlockNumber(current) == blkno + && ItemPointerGetOffsetNumber(current) >= offno) + return (true); + + return (false); +} diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c new file mode 100644 index 0000000000..056235dec8 --- /dev/null +++ b/src/backend/access/hash/hashsearch.c @@ -0,0 +1,425 @@ +/*------------------------------------------------------------------------- + * + * hashsearch.c-- + * search code for postgres hash tables + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/bufmgr.h" +#include "storage/bufpage.h" + +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/rel.h" +#include "utils/excid.h" + +#include "fmgr.h" + +#include "access/heapam.h" +#include "access/genam.h" +#include "access/skey.h" +#include "access/sdir.h" +#include "access/hash.h" + +/* + * _hash_search() -- Finds the page/bucket that the contains the + * scankey and loads it into *bufP. the buffer has a read lock. + */ +void +_hash_search(Relation rel, + int keysz, + ScanKey scankey, + Buffer *bufP, + HashMetaPage metap) +{ + BlockNumber blkno; + Datum keyDatum; + Bucket bucket; + + if (scankey == (ScanKey) NULL || + (keyDatum = scankey[0].sk_argument) == (Datum) NULL) { + /* + * If the scankey argument is NULL, all tuples will satisfy + * the scan so we start the scan at the first bucket (bucket + * 0). + */ + bucket = 0; + } else { + bucket = _hash_call(rel, metap, keyDatum); + } + + blkno = BUCKET_TO_BLKNO(bucket); + + *bufP = _hash_getbuf(rel, blkno, HASH_READ); +} + +/* + * _hash_next() -- Get the next item in a scan. + * + * On entry, we have a valid currentItemData in the scan, and a + * read lock on the page that contains that item. We do not have + * the page pinned. We return the next item in the scan. On + * exit, we have the page containing the next item locked but not + * pinned. + */ +RetrieveIndexResult +_hash_next(IndexScanDesc scan, ScanDirection dir) +{ + Relation rel; + Buffer buf; + Buffer metabuf; + Page page; + OffsetNumber offnum; + RetrieveIndexResult res; + ItemPointer current; + ItemPointer iptr; + HashItem hitem; + IndexTuple itup; + HashScanOpaque so; + + rel = scan->relation; + so = (HashScanOpaque) scan->opaque; + current = &(scan->currentItemData); + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); + + /* + * XXX 10 may 91: somewhere there's a bug in our management of the + * cached buffer for this scan. wei discovered it. the following + * is a workaround so he can work until i figure out what's going on. + */ + + if (!BufferIsValid(so->hashso_curbuf)) { + so->hashso_curbuf = _hash_getbuf(rel, + ItemPointerGetBlockNumber(current), + HASH_READ); + } + + /* we still have the buffer pinned and locked */ + buf = so->hashso_curbuf; + + /* + * step to next valid tuple. note that _hash_step releases our + * lock on 'metabuf'; if we switch to a new 'buf' while looking + * for the next tuple, we come back with a lock on that buffer. + */ + if (!_hash_step(scan, &buf, dir, metabuf)) { + return ((RetrieveIndexResult) NULL); + } + + /* if we're here, _hash_step found a valid tuple */ + current = &(scan->currentItemData); + offnum = ItemPointerGetOffsetNumber(current); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); + hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum)); + itup = &hitem->hash_itup; + iptr = (ItemPointer) palloc(sizeof(ItemPointerData)); + memmove((char *) iptr, (char *) &(itup->t_tid), sizeof(ItemPointerData)); + res = FormRetrieveIndexResult(current, iptr); + + return (res); +} + +static void +_hash_readnext(Relation rel, + Buffer *bufp, Page *pagep, HashPageOpaque *opaquep) +{ + BlockNumber blkno; + + blkno = (*opaquep)->hasho_nextblkno; + _hash_relbuf(rel, *bufp, HASH_READ); + *bufp = InvalidBuffer; + if (BlockNumberIsValid(blkno)) { + *bufp = _hash_getbuf(rel, blkno, HASH_READ); + *pagep = BufferGetPage(*bufp); + _hash_checkpage(*pagep, LH_OVERFLOW_PAGE); + *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); + Assert(!PageIsEmpty(*pagep)); + } +} + +static void +_hash_readprev(Relation rel, + Buffer *bufp, Page *pagep, HashPageOpaque *opaquep) +{ + BlockNumber blkno; + + blkno = (*opaquep)->hasho_prevblkno; + _hash_relbuf(rel, *bufp, HASH_READ); + *bufp = InvalidBuffer; + if (BlockNumberIsValid(blkno)) { + *bufp = _hash_getbuf(rel, blkno, HASH_READ); + *pagep = BufferGetPage(*bufp); + _hash_checkpage(*pagep, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); + *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); + if (PageIsEmpty(*pagep)) { + Assert((*opaquep)->hasho_flag & LH_BUCKET_PAGE); + _hash_relbuf(rel, *bufp, HASH_READ); + *bufp = InvalidBuffer; + } + } +} + +/* + * _hash_first() -- Find the first item in a scan. + * + * Return the RetrieveIndexResult of the first item in the tree that + * satisfies the qualificatin associated with the scan descriptor. On + * exit, the page containing the current index tuple is read locked + * and pinned, and the scan's opaque data entry is updated to + * include the buffer. + */ +RetrieveIndexResult +_hash_first(IndexScanDesc scan, ScanDirection dir) +{ + Relation rel; + Buffer buf; + Buffer metabuf; + Page page; + HashPageOpaque opaque; + HashMetaPage metap; |