diff --git i/src/include/access/heapam.h w/src/include/access/heapam.h
index 9dab35551e1..dff7616abeb 100644
--- i/src/include/access/heapam.h
+++ w/src/include/access/heapam.h
@@ -74,7 +74,8 @@ typedef struct HeapScanDescData
 	/* these fields only used in page-at-a-time mode and for bitmap scans */
 	int			rs_cindex;		/* current tuple's index in vistuples */
 	int			rs_ntuples;		/* number of visible tuples on page */
-	OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];	/* their offsets */
+	OffsetNumber *rs_vistuples;
+	OffsetNumber rs_vistuples_d[MaxHeapTuplesPerPage];	/* their offsets */
 }			HeapScanDescData;
 typedef struct HeapScanDescData *HeapScanDesc;
 
diff --git i/src/backend/access/heap/heapam.c w/src/backend/access/heap/heapam.c
index 12be87efed4..632f315f4e1 100644
--- i/src/backend/access/heap/heapam.c
+++ w/src/backend/access/heap/heapam.c
@@ -448,30 +448,99 @@ heapgetpage(TableScanDesc sscan, BlockNumber page)
 	 */
 	all_visible = PageIsAllVisible(dp) && !snapshot->takenDuringRecovery;
 
-	for (lineoff = FirstOffsetNumber, lpp = PageGetItemId(dp, lineoff);
-		 lineoff <= lines;
-		 lineoff++, lpp++)
+	if (all_visible)
 	{
-		if (ItemIdIsNormal(lpp))
-		{
-			HeapTupleData loctup;
-			bool		valid;
+		HeapTupleData loctup;
+
+		loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
+
+		scan->rs_vistuples = scan->rs_vistuples_d;
+
+		for (lineoff = FirstOffsetNumber, lpp = PageGetItemId(dp, lineoff);
+			 lineoff <= lines;
+			 lineoff++, lpp++)
+		{
+			if (!ItemIdIsNormal(lpp))
+				continue;
 
-			loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
 			loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
 			loctup.t_len = ItemIdGetLength(lpp);
 			ItemPointerSet(&(loctup.t_self), page, lineoff);
 
-			if (all_visible)
-				valid = true;
-			else
-				valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
+			HeapCheckForSerializableConflictOut(true, scan->rs_base.rs_rd,
+												&loctup, buffer, snapshot);
+			scan->rs_vistuples[ntup++] = lineoff;
+		}
+	}
+	else
+	{
+		HeapTupleData loctup;
+		int			normcount = 0;
+		OffsetNumber normoffsets[MaxHeapTuplesPerPage];
+
+		loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
+
+		for (lineoff = FirstOffsetNumber, lpp = PageGetItemId(dp, lineoff);
+			 lineoff <= lines;
+			 lineoff++, lpp++)
+
+		/*
+		 * Iterate forward over line items, they're laid out in increasing
+		 * order in memory. Doing this separately allows to benefit from
+		 * out-of-order capabilities of the CPU and simplifies the next loop.
+		 *
+		 * FIXME: Worth unrolling so that we don't fetch the same cacheline
+		 * over and over, due to line items being smaller than a cacheline?
+		 */
+		for (lineoff = FirstOffsetNumber, lpp = PageGetItemId(dp, lineoff);
+			 lineoff <= lines;
+			 lineoff++, lpp++)
+		{
+			pg_prefetch_mem(PageGetItemId(dp, lineoff+5));
+			if (!ItemIdIsNormal(lpp))
+				continue;
+			normoffsets[normcount++] = lineoff;
+		}
+
+		/*
+		 * Process tuples in reverse order. That'll most often lead to memory
+		 * accesses in increasing order, which typically is more efficient for
+		 * the CPUs prefetcher. To avoid affecting sort order, we store the
+		 * visible tuples in decreasing order in rs_vistuples_d and then set
+		 * rs_vistuple to the last tuple found.
+		 *
+		 * FIXME: We should likely compute rs_cindex in a smarter way, rather
+		 * than changing rs_vistuples.
+		 */
+		scan->rs_vistuples = scan->rs_vistuples_d + (MaxHeapTuplesPerPage);
+		for (int i = normcount - 1; i >= 0; i--)
+		{
+			bool valid;
+
+			/* doesn't appear to be beneficial */
+#if 0
+			if (i > 0)
+				pg_prefetch_mem(PageGetItem(dp, PageGetItemId(dp, normoffsets[i - 1])));
+#endif
+
+			lineoff = normoffsets[i];
+			lpp = PageGetItemId(dp, lineoff);
+
+			loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
+			loctup.t_len = ItemIdGetLength(lpp);
+			ItemPointerSet(&(loctup.t_self), page, lineoff);
+			valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
 
 			HeapCheckForSerializableConflictOut(valid, scan->rs_base.rs_rd,
 												&loctup, buffer, snapshot);
 
 			if (valid)
-				scan->rs_vistuples[ntup++] = lineoff;
+			{
+				scan->rs_vistuples--;
+				*scan->rs_vistuples = lineoff;
+				ntup++;
+			}
+
 		}
 	}
 
diff --git i/src/backend/access/heap/heapam_handler.c w/src/backend/access/heap/heapam_handler.c
index 41f1ca65d01..f2876ecbc60 100644
--- i/src/backend/access/heap/heapam_handler.c
+++ w/src/backend/access/heap/heapam_handler.c
@@ -2162,6 +2162,8 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 		 */
 		int			curslot;
 
+		hscan->rs_vistuples = hscan->rs_vistuples_d;
+
 		for (curslot = 0; curslot < tbmres->ntuples; curslot++)
 		{
 			OffsetNumber offnum = tbmres->offsets[curslot];
@@ -2184,6 +2186,8 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 		OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
 		OffsetNumber offnum;
 
+		hscan->rs_vistuples = hscan->rs_vistuples_d;
+
 		for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
 		{
 			ItemId		lp;
