summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2000-04-18 05:43:02 +0000
committerTom Lane2000-04-18 05:43:02 +0000
commit25442d8d2fd35389813062f523488821f4fc31d4 (patch)
treed58f9ec0b5a7d3074c222008a9d58d5c35db5251
parent24864d048eec2c579346eb31a42c87be1c92644e (diff)
Correct oversight in hashjoin cost estimation: nodeHash sizes its hash
table for an average of NTUP_PER_BUCKET tuples/bucket, but cost_hashjoin was assuming a target load of one tuple/bucket. This was causing a noticeable underestimate of hashjoin costs.
-rw-r--r--src/backend/executor/nodeHash.c3
-rw-r--r--src/backend/optimizer/path/costsize.c16
-rw-r--r--src/include/executor/nodeHash.h5
3 files changed, 16 insertions, 8 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index bee4a10f35d..1d841576fea 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
*
- * $Id: nodeHash.c,v 1.44 2000/01/26 05:56:22 momjian Exp $
+ * $Id: nodeHash.c,v 1.45 2000/04/18 05:43:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -221,7 +221,6 @@ ExecEndHash(Hash *node)
* create a hashtable in shared memory for hashjoin.
* ----------------------------------------------------------------
*/
-#define NTUP_PER_BUCKET 10
#define FUDGE_FAC 2.0
HashJoinTable
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 6ecfb2a4713..df3c6d5c429 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.57 2000/04/12 17:15:19 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.58 2000/04/18 05:43:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -51,6 +51,7 @@
#include <math.h>
+#include "executor/nodeHash.h"
#include "miscadmin.h"
#include "nodes/plannodes.h"
#include "optimizer/clauses.h"
@@ -604,12 +605,17 @@ cost_hashjoin(Path *path,
run_cost += cpu_operator_cost * outer_path->parent->rows;
/*
- * the number of tuple comparisons needed is the number of outer
- * tuples times the typical hash bucket size, which we estimate
- * conservatively as the inner disbursion times the inner tuple count.
+ * The number of tuple comparisons needed is the number of outer
+ * tuples times the typical hash bucket size. nodeHash.c tries for
+ * average bucket loading of NTUP_PER_BUCKET, but that goal will
+ * be reached only if data values are uniformly distributed among
+ * the buckets. To be conservative, we scale up the target bucket
+ * size by the number of inner rows times inner disbursion, giving
+ * an estimate of the typical number of duplicates of each value.
+ * We then charge one cpu_operator_cost per tuple comparison.
*/
run_cost += cpu_operator_cost * outer_path->parent->rows *
- ceil(inner_path->parent->rows * innerdisbursion);
+ NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdisbursion);
/*
* Estimate the number of tuples that get through the hashing filter
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h
index 0460368d8b1..b61ced7cdc1 100644
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: nodeHash.h,v 1.15 2000/01/26 05:58:05 momjian Exp $
+ * $Id: nodeHash.h,v 1.16 2000/04/18 05:43:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,6 +16,9 @@
#include "nodes/plannodes.h"
+/* NTUP_PER_BUCKET is exported because planner wants to see it */
+#define NTUP_PER_BUCKET 10
+
extern TupleTableSlot *ExecHash(Hash *node);
extern bool ExecInitHash(Hash *node, EState *estate, Plan *parent);
extern int ExecCountSlotsHash(Hash *node);