Add defenses against integer overflow in dynahash numbuckets calculations.

author Tom Lane <[email protected]>

Wed, 12 Dec 2012 03:09:39 +0000 (22:09 -0500)

committer Tom Lane <[email protected]>

Wed, 12 Dec 2012 03:09:39 +0000 (22:09 -0500)
author Tom Lane <[email protected]>
Wed, 12 Dec 2012 03:09:39 +0000 (22:09 -0500)
committer Tom Lane <[email protected]>
Wed, 12 Dec 2012 03:09:39 +0000 (22:09 -0500)
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c

index b50cb231d198137547cbdd51707c87f1ccf4e724..f46a3fde9669af66c35891c157e2eb970b5888ad 100644 (file)
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -431,7 +431,9 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
      * Both nbuckets and nbatch must be powers of 2 to make
      * ExecHashGetBucketAndBatch fast.  We already fixed nbatch; now inflate
      * nbuckets to the next larger power of 2.  We also force nbuckets to not
-    * be real small, by starting the search at 2^10.
+    * be real small, by starting the search at 2^10.  (Note: above we made
+    * sure that nbuckets is not more than INT_MAX / 2, so this loop cannot
+    * overflow, nor can the final shift to recalculate nbuckets.)
      */
     i = 10;
     while ((1 << i) < nbuckets)
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c

index 3bcfcbdd761aada4827fddeaf68117d732e5b730..cac8bf01d2942ae4c13f4f70fdb709c2236d8586 100644 (file)
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -63,6 +63,8 @@
  
  #include "postgres.h"
  
+#include <limits.h>
+
  #include "access/xact.h"
  #include "storage/shmem.h"
  #include "storage/spin.h"
@@ -199,6 +201,8 @@ static void hdefault(HTAB *hashp);
  static int choose_nelem_alloc(Size entrysize);
  static bool init_htab(HTAB *hashp, long nelem);
  static void hash_corrupted(HTAB *hashp);
+static long next_pow2_long(long num);
+static int next_pow2_int(long num);
  static void register_seq_scan(HTAB *hashp);
  static void deregister_seq_scan(HTAB *hashp);
  static bool has_seq_scans(HTAB *hashp);
@@ -373,8 +377,13 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
     {
         /* Doesn't make sense to partition a local hash table */
         Assert(flags & HASH_SHARED_MEM);
-       /* # of partitions had better be a power of 2 */
-       Assert(info->num_partitions == (1L << my_log2(info->num_partitions)));
+
+       /*
+        * The number of partitions had better be a power of 2. Also, it must
+        * be less than INT_MAX (see init_htab()), so call the int version of
+        * next_pow2.
+        */
+       Assert(info->num_partitions == next_pow2_int(info->num_partitions));
  
         hctl->num_partitions = info->num_partitions;
     }
@@ -515,7 +524,6 @@ init_htab(HTAB *hashp, long nelem)
  {
     HASHHDR    *hctl = hashp->hctl;
     HASHSEGMENT *segp;
-   long        lnbuckets;
     int         nbuckets;
     int         nsegs;
  
@@ -530,9 +538,7 @@ init_htab(HTAB *hashp, long nelem)
      * number of buckets.  Allocate space for the next greater power of two
      * number of buckets
      */
-   lnbuckets = (nelem - 1) / hctl->ffactor + 1;
-
-   nbuckets = 1 << my_log2(lnbuckets);
+   nbuckets = next_pow2_int((nelem - 1) / hctl->ffactor + 1);
  
     /*
      * In a partitioned table, nbuckets must be at least equal to
@@ -550,7 +556,7 @@ init_htab(HTAB *hashp, long nelem)
      * Figure number of directory segments needed, round up to a power of 2
      */
     nsegs = (nbuckets - 1) / hctl->ssize + 1;
-   nsegs = 1 << my_log2(nsegs);
+   nsegs = next_pow2_int(nsegs);
  
     /*
      * Make sure directory is big enough. If pre-allocated directory is too
@@ -620,9 +626,9 @@ hash_estimate_size(long num_entries, Size entrysize)
                 elementAllocCnt;
  
     /* estimate number of buckets wanted */
-   nBuckets = 1L << my_log2((num_entries - 1) / DEF_FFACTOR + 1);
+   nBuckets = next_pow2_long((num_entries - 1) / DEF_FFACTOR + 1);
     /* # of segments needed for nBuckets */
-   nSegments = 1L << my_log2((nBuckets - 1) / DEF_SEGSIZE + 1);
+   nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
     /* directory entries */
     nDirEntries = DEF_DIRSIZE;
     while (nDirEntries < nSegments)
@@ -663,9 +669,9 @@ hash_select_dirsize(long num_entries)
                 nDirEntries;
  
     /* estimate number of buckets wanted */
-   nBuckets = 1L << my_log2((num_entries - 1) / DEF_FFACTOR + 1);
+   nBuckets = next_pow2_long((num_entries - 1) / DEF_FFACTOR + 1);
     /* # of segments needed for nBuckets */
-   nSegments = 1L << my_log2((nBuckets - 1) / DEF_SEGSIZE + 1);
+   nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
     /* directory entries */
     nDirEntries = DEF_DIRSIZE;
     while (nDirEntries < nSegments)
@@ -1397,11 +1403,32 @@ my_log2(long num)
     int         i;
     long        limit;
  
+   /* guard against too-large input, which would put us into infinite loop */
+   if (num > LONG_MAX / 2)
+       num = LONG_MAX / 2;
+
     for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
         ;
     return i;
  }
  
+/* calculate first power of 2 >= num, bounded to what will fit in a long */
+static long
+next_pow2_long(long num)
+{
+   /* my_log2's internal range check is sufficient */
+   return 1L << my_log2(num);
+}
+
+/* calculate first power of 2 >= num, bounded to what will fit in an int */
+static int
+next_pow2_int(long num)
+{
+   if (num > INT_MAX / 2)
+       num = INT_MAX / 2;
+   return 1 << my_log2(num);
+}
+
  
  /************************* SEQ SCAN TRACKING ************************/
author	Tom Lane <[email protected]>
	Wed, 12 Dec 2012 03:09:39 +0000 (22:09 -0500)
committer	Tom Lane <[email protected]>
	Wed, 12 Dec 2012 03:09:39 +0000 (22:09 -0500)
src/backend/executor/nodeHash.c		patch \| blob \| blame \| history
src/backend/utils/hash/dynahash.c		patch \| blob \| blame \| history