77 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
10- * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.34 2004/12/31 22:03:29 pgsql Exp $
10+ * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.35 2005/03/06 22:15:05 tgl Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
2020/* ----------------------------------------------------------------
2121 * hash-join hash table structures
2222 *
23- * Each active hashjoin has a HashJoinTable control block which is
23+ * Each active hashjoin has a HashJoinTable control block, which is
2424 * palloc'd in the executor's per-query context. All other storage needed
2525 * for the hashjoin is kept in private memory contexts, two for each hashjoin.
2626 * This makes it easy and fast to release the storage when we don't need it
27- * anymore.
27+ * anymore. (Exception: data associated with the temp files lives in the
28+ * per-query context too, since we always call buffile.c in that context.)
2829 *
2930 * The hashtable contexts are made children of the per-query context, ensuring
3031 * that they will be discarded at end of statement even if the join is
3536 * "hashCxt", while storage that is only wanted for the current batch is
3637 * allocated in the "batchCxt". By resetting the batchCxt at the end of
3738 * each batch, we free all the per-batch storage reliably and without tedium.
39+ *
40+ * During first scan of inner relation, we get its tuples from executor.
41+ * If nbatch > 1 then tuples that don't belong in first batch get saved
42+ * into inner-batch temp files. The same statements apply for the
43+ * first scan of the outer relation, except we write tuples to outer-batch
44+ * temp files. After finishing the first scan, we do the following for
45+ * each remaining batch:
46+ * 1. Read tuples from inner batch file, load into hash buckets.
47+ * 2. Read tuples from outer batch file, match to hash buckets and output.
48+ *
49+ * It is possible to increase nbatch on the fly if the in-memory hash table
50+ * gets too big. The hash-value-to-batch computation is arranged so that this
51+ * can only cause a tuple to go into a later batch than previously thought,
52+ * never into an earlier batch. When we increase nbatch, we rescan the hash
53+ * table and dump out any tuples that are now of a later batch to the correct
54+ * inner batch file. Subsequently, while reading either inner or outer batch
55+ * files, we might find tuples that no longer belong to the current batch;
56+ * if so, we just dump them out to the correct batch file.
3857 * ----------------------------------------------------------------
3958 */
4059
60+ /* these are in nodes/execnodes.h: */
61+ /* typedef struct HashJoinTupleData *HashJoinTuple; */
62+ /* typedef struct HashJoinTableData *HashJoinTable; */
63+
4164typedef struct HashJoinTupleData
4265{
43- struct HashJoinTupleData * next ; /* link to next tuple in same
44- * bucket */
66+ struct HashJoinTupleData * next ; /* link to next tuple in same bucket */
67+ uint32 hashvalue ; /* tuple's hash code */
4568 HeapTupleData htup ; /* tuple header */
4669} HashJoinTupleData ;
4770
48- typedef HashJoinTupleData * HashJoinTuple ;
49-
5071typedef struct HashJoinTableData
5172{
52- int nbuckets ; /* buckets in use during this batch */
53- int totalbuckets ; /* total number of (virtual) buckets */
54- HashJoinTuple * buckets ; /* buckets[i] is head of list of tuples */
73+ int nbuckets ; /* # buckets in the in-memory hash table */
74+ /* buckets[i] is head of list of tuples in i'th in-memory bucket */
75+ struct HashJoinTupleData * * buckets ;
5576 /* buckets array is per-batch storage, as are all the tuples */
5677
57- int nbatch ; /* number of batches; 0 means 1-pass join */
58- int curbatch ; /* current batch #, or 0 during 1st pass */
78+ int nbatch ; /* number of batches */
79+ int curbatch ; /* current batch #; 0 during 1st pass */
80+
81+ int nbatch_original ; /* nbatch when we started inner scan */
82+ int nbatch_outstart ; /* nbatch when we started outer scan */
83+
84+ bool growEnabled ; /* flag to shut off nbatch increases */
5985
6086 bool hashNonEmpty ; /* did inner plan produce any rows? */
6187
6288 /*
63- * all these arrays are allocated for the life of the hash join, but
64- * only if nbatch > 0:
89+ * These arrays are allocated for the life of the hash join, but
90+ * only if nbatch > 1. A file is opened only when we first write
91+ * a tuple into it (otherwise its pointer remains NULL). Note that
92+ * the zero'th array elements never get used, since we will process
93+ * rather than dump out any tuples of batch zero.
6594 */
6695 BufFile * * innerBatchFile ; /* buffered virtual temp file per batch */
6796 BufFile * * outerBatchFile ; /* buffered virtual temp file per batch */
68- long * outerBatchSize ; /* count of tuples in each outer batch
69- * file */
70- long * innerBatchSize ; /* count of tuples in each inner batch
71- * file */
7297
7398 /*
7499 * Info about the datatype-specific hash functions for the datatypes
@@ -79,21 +104,11 @@ typedef struct HashJoinTableData
79104 */
80105 FmgrInfo * hashfunctions ; /* lookup data for hash functions */
81106
82- /*
83- * During 1st scan of inner relation, we get tuples from executor. If
84- * nbatch > 0 then tuples that don't belong in first nbuckets logical
85- * buckets get dumped into inner-batch temp files. The same statements
86- * apply for the 1st scan of the outer relation, except we write
87- * tuples to outer-batch temp files. If nbatch > 0 then we do the
88- * following for each batch: 1. Read tuples from inner batch file,
89- * load into hash buckets. 2. Read tuples from outer batch file, match
90- * to hash buckets and output.
91- */
107+ Size spaceUsed ; /* memory space currently used by tuples */
108+ Size spaceAllowed ; /* upper limit for space used */
92109
93110 MemoryContext hashCxt ; /* context for whole-hash-join storage */
94111 MemoryContext batchCxt ; /* context for this-batch-only storage */
95112} HashJoinTableData ;
96113
97- typedef HashJoinTableData * HashJoinTable ;
98-
99114#endif /* HASHJOIN_H */
0 commit comments