@@ -773,7 +773,7 @@ intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
773
773
*
774
774
* Simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
775
775
* called "codewords". The number of integers packed into a single codeword
776
- * depends on the integers being packed: small integers are encoded using
776
+ * depends on the integers being packed; small integers are encoded using
777
777
* fewer bits than large integers. A single codeword can store a single
778
778
* 60-bit integer, or two 30-bit integers, for example.
779
779
*
@@ -783,11 +783,11 @@ intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
783
783
* of the absolute values.
784
784
*
785
785
* In Simple-8b, each codeword consists of a 4-bit selector, which indicates
786
- * how many integers are encoded in the codeword, and the encoded integers
786
+ * how many integers are encoded in the codeword, and the encoded integers are
787
787
* packed into the remaining 60 bits. The selector allows for 16 different
788
- * ways of using the remaining 60 bits, "modes". The number of integers
789
- * packed into a single codeword is listed in the simple8b_modes table below.
790
- * For example, consider the following codeword:
788
+ * ways of using the remaining 60 bits, called "modes". The number of integers
789
+ * packed into a single codeword in each mode is listed in the simple8b_modes
790
+ * table below. For example, consider the following codeword:
791
791
*
792
792
* 20-bit integer 20-bit integer 20-bit integer
793
793
* 1101 00000000000000010010 01111010000100100000 00000000000000010100
@@ -835,22 +835,28 @@ static const struct
835
835
{20 , 3 }, /* mode 13: three 20-bit integers */
836
836
{30 , 2 }, /* mode 14: two 30-bit integers */
837
837
{60 , 1 }, /* mode 15: one 60-bit integer */
838
+
838
839
{0 , 0 } /* sentinel value */
839
840
};
840
841
841
842
/*
842
843
* EMPTY_CODEWORD is a special value, used to indicate "no values".
843
844
* It is used if the next value is too large to be encoded with Simple-8b.
844
845
*
845
- * This value looks like a 0-mode codeword, but we check for it
846
+ * This value looks like a 0-mode codeword, but we check for it
846
847
* specifically. (In a real 0-mode codeword, all the unused bits are zero.)
847
848
*/
848
- #define EMPTY_CODEWORD UINT64CONST(0xFFFFFFFFFFFFFFF0 )
849
+ #define EMPTY_CODEWORD UINT64CONST(0x0FFFFFFFFFFFFFFF )
849
850
850
851
/*
851
852
* Encode a number of integers into a Simple-8b codeword.
852
853
*
853
- * Returns the number of integers that were encoded.
854
+ * The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD
855
+ * elements.
856
+ *
857
+ * Returns the encoded codeword, and sets *num_encoded to the number
858
+ * input integers that were encoded. It can be zero, if the first input is
859
+ * too large to be encoded.
854
860
*/
855
861
static uint64
856
862
simple8b_encode (uint64 * ints , int * num_encoded , uint64 base )
@@ -861,7 +867,6 @@ simple8b_encode(uint64 *ints, int *num_encoded, uint64 base)
861
867
uint64 diff ;
862
868
uint64 last_val ;
863
869
uint64 codeword ;
864
- uint64 diffs [60 ];
865
870
int i ;
866
871
867
872
Assert (ints [0 ] > base );
@@ -891,13 +896,12 @@ simple8b_encode(uint64 *ints, int *num_encoded, uint64 base)
891
896
selector ++ ;
892
897
nints = simple8b_modes [selector ].num_ints ;
893
898
bits = simple8b_modes [selector ].bits_per_int ;
899
+
894
900
if (i >= nints )
895
901
break ;
896
902
}
897
903
else
898
904
{
899
- if (i < 60 )
900
- diffs [i ] = diff ;
901
905
i ++ ;
902
906
if (i >= nints )
903
907
break ;
@@ -910,7 +914,13 @@ simple8b_encode(uint64 *ints, int *num_encoded, uint64 base)
910
914
911
915
if (nints == 0 )
912
916
{
913
- /* The next value is too large and be encoded with Simple-8b */
917
+ /*
918
+ * The first value is too large to be encoded with Simple-8b.
919
+ *
920
+ * If there is at least one not-too-large integer in the input, we
921
+ * will encode it using mode 15 (or a more compact mode). Hence, we
922
+ * only get here, if the *first* input integer is >= 2^60.
923
+ */
914
924
Assert (i == 0 );
915
925
* num_encoded = 0 ;
916
926
return EMPTY_CODEWORD ;
@@ -924,16 +934,18 @@ simple8b_encode(uint64 *ints, int *num_encoded, uint64 base)
924
934
codeword = 0 ;
925
935
if (bits > 0 )
926
936
{
927
- for (i = nints - 1 ; i >= 0 ; i -- )
937
+ for (i = nints - 1 ; i > 0 ; i -- )
928
938
{
939
+ diff = ints [i ] - ints [i - 1 ] - 1 ;
940
+ codeword |= diff ;
929
941
codeword <<= bits ;
930
- codeword |= diffs [i ];
931
942
}
943
+ diff = ints [0 ] - base - 1 ;
944
+ codeword |= diff ;
932
945
}
933
946
934
947
/* add selector to the codeword, and return */
935
- codeword <<= 4 ;
936
- codeword |= selector ;
948
+ codeword |= (uint64 ) selector << 60 ;
937
949
938
950
* num_encoded = nints ;
939
951
return codeword ;
@@ -945,7 +957,7 @@ simple8b_encode(uint64 *ints, int *num_encoded, uint64 base)
945
957
static int
946
958
simple8b_decode (uint64 codeword , uint64 * decoded , uint64 base )
947
959
{
948
- int selector = codeword & 0x0f ;
960
+ int selector = ( codeword >> 60 ) ;
949
961
int nints = simple8b_modes [selector ].num_ints ;
950
962
uint64 bits = simple8b_modes [selector ].bits_per_int ;
951
963
uint64 mask = (UINT64CONST (1 ) << bits ) - 1 ;
@@ -954,8 +966,6 @@ simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
954
966
if (codeword == EMPTY_CODEWORD )
955
967
return 0 ;
956
968
957
- codeword >>= 4 ; /* shift out the selector */
958
-
959
969
prev_value = base ;
960
970
for (int i = 0 ; i < nints ; i ++ )
961
971
{
@@ -976,15 +986,13 @@ simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
976
986
static bool
977
987
simple8b_contains (uint64 codeword , uint64 key , uint64 base )
978
988
{
979
- int selector = codeword & 0x0f ;
989
+ int selector = ( codeword >> 60 ) ;
980
990
int nints = simple8b_modes [selector ].num_ints ;
981
991
int bits = simple8b_modes [selector ].bits_per_int ;
982
992
983
993
if (codeword == EMPTY_CODEWORD )
984
994
return false;
985
995
986
- codeword >>= 4 ; /* shift out the selector */
987
-
988
996
if (bits == 0 )
989
997
{
990
998
/* Special handling for 0-bit cases. */
0 commit comments