summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Haas2017-11-10 21:50:50 +0000
committerRobert Haas2017-11-10 21:50:50 +0000
commit5edc63bda68a77c4d38f0cbeae1c4271f9ef4100 (patch)
tree214cd7c2d8fe70017061fb3e4fc803437d78f19a
parent0c98d0dd5c85ce0c8485ae1a8351a26b83c4338b (diff)
Account for the effect of lossy pages when costing bitmap scans.
Dilip Kumar, reviewed by Alexander Kumenkov, Amul Sul, and me. Some final adjustments by me. Discussion: http://postgr.es/m/CAFiTN-sYtqUOXQ4SpuhTv0Z9gD0si3YxZGv_PQAAMX8qbOotcg@mail.gmail.com
-rw-r--r--src/backend/nodes/tidbitmap.c37
-rw-r--r--src/backend/optimizer/path/costsize.c59
-rw-r--r--src/include/nodes/tidbitmap.h1
3 files changed, 75 insertions, 22 deletions
diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index c47d5849ef..acfe6b263c 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -265,7 +265,6 @@ TIDBitmap *
tbm_create(long maxbytes, dsa_area *dsa)
{
TIDBitmap *tbm;
- long nbuckets;
/* Create the TIDBitmap struct and zero all its fields */
tbm = makeNode(TIDBitmap);
@@ -273,17 +272,7 @@ tbm_create(long maxbytes, dsa_area *dsa)
tbm->mcxt = CurrentMemoryContext;
tbm->status = TBM_EMPTY;
- /*
- * Estimate number of hashtable entries we can have within maxbytes. This
- * estimates the hash cost as sizeof(PagetableEntry), which is good enough
- * for our purpose. Also count an extra Pointer per entry for the arrays
- * created during iteration readout.
- */
- nbuckets = maxbytes /
- (sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
- nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
- nbuckets = Max(nbuckets, 16); /* sanity limit */
- tbm->maxentries = (int) nbuckets;
+ tbm->maxentries = (int) tbm_calculate_entries(maxbytes);
tbm->lossify_start = 0;
tbm->dsa = dsa;
tbm->dsapagetable = InvalidDsaPointer;
@@ -1546,3 +1535,27 @@ pagetable_free(pagetable_hash *pagetable, void *pointer)
tbm->dsapagetableold = InvalidDsaPointer;
}
}
+
+/*
+ * tbm_calculate_entries
+ *
+ * Estimate number of hashtable entries we can have within maxbytes.
+ */
+long
+tbm_calculate_entries(double maxbytes)
+{
+ long nbuckets;
+
+ /*
+ * Estimate number of hashtable entries we can have within maxbytes. This
+ * estimates the hash cost as sizeof(PagetableEntry), which is good enough
+ * for our purpose. Also count an extra Pointer per entry for the arrays
+ * created during iteration readout.
+ */
+ nbuckets = maxbytes /
+ (sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
+ nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
+ nbuckets = Max(nbuckets, 16); /* sanity limit */
+
+ return nbuckets;
+}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 98fb16e85a..2d2df60886 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -5171,6 +5171,8 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
double T;
double pages_fetched;
double tuples_fetched;
+ double heap_pages;
+ long maxentries;
/*
* Fetch total cost of obtaining the bitmap, as well as its total
@@ -5185,6 +5187,24 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
+ /*
+ * For a single scan, the number of heap pages that need to be fetched is
+ * the same as the Mackert and Lohman formula for the case T <= b (ie, no
+ * re-reads needed).
+ */
+ pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+
+ /*
+ * Calculate the number of pages fetched from the heap. Then based on
+ * current work_mem estimate get the estimated maxentries in the bitmap.
+ * (Note that we always do this calculation based on the number of pages
+ * that would be fetched in a single iteration, even if loop_count > 1.
+ * That's correct, because only that number of entries will be stored in
+ * the bitmap at one time.)
+ */
+ heap_pages = Min(pages_fetched, baserel->pages);
+ maxentries = tbm_calculate_entries(work_mem * 1024L);
+
if (loop_count > 1)
{
/*
@@ -5199,22 +5219,41 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
root);
pages_fetched /= loop_count;
}
- else
- {
- /*
- * For a single scan, the number of heap pages that need to be fetched
- * is the same as the Mackert and Lohman formula for the case T <= b
- * (ie, no re-reads needed).
- */
- pages_fetched =
- (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
- }
if (pages_fetched >= T)
pages_fetched = T;
else
pages_fetched = ceil(pages_fetched);
+ if (maxentries < heap_pages)
+ {
+ double exact_pages;
+ double lossy_pages;
+
+ /*
+ * Crude approximation of the number of lossy pages. Because of the
+ * way tbm_lossify() is coded, the number of lossy pages increases
+ * very sharply as soon as we run short of memory; this formula has
+ * that property and seems to perform adequately in testing, but it's
+ * possible we could do better somehow.
+ */
+ lossy_pages = Max(0, heap_pages - maxentries / 2);
+ exact_pages = heap_pages - lossy_pages;
+
+ /*
+ * If there are lossy pages then recompute the number of tuples
+ * processed by the bitmap heap node. We assume here that the chance
+ * of a given tuple coming from an exact page is the same as the
+ * chance that a given page is exact. This might not be true, but
+ * it's not clear how we can do any better.
+ */
+ if (lossy_pages > 0)
+ tuples_fetched =
+ clamp_row_est(indexSelectivity *
+ (exact_pages / heap_pages) * baserel->tuples +
+ (lossy_pages / heap_pages) * baserel->tuples);
+ }
+
if (cost)
*cost = indexTotalCost;
if (tuple)
diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h
index f9a1902da8..d3ad0a5566 100644
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@@ -70,5 +70,6 @@ extern void tbm_end_iterate(TBMIterator *iterator);
extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
dsa_pointer dp);
+extern long tbm_calculate_entries(double maxbytes);
#endif /* TIDBITMAP_H */