@@ -106,19 +106,23 @@ const uint8 pg_number_of_ones[256] = {
106106static inline int pg_popcount32_slow (uint32 word );
107107static inline int pg_popcount64_slow (uint64 word );
108108static uint64 pg_popcount_slow (const char * buf , int bytes );
109+ static uint64 pg_popcount_masked_slow (const char * buf , int bytes , bits8 mask );
109110
110111#ifdef TRY_POPCNT_FAST
111112static bool pg_popcount_available (void );
112113static int pg_popcount32_choose (uint32 word );
113114static int pg_popcount64_choose (uint64 word );
114115static uint64 pg_popcount_choose (const char * buf , int bytes );
116+ static uint64 pg_popcount_masked_choose (const char * buf , int bytes , bits8 mask );
115117static inline int pg_popcount32_fast (uint32 word );
116118static inline int pg_popcount64_fast (uint64 word );
117119static uint64 pg_popcount_fast (const char * buf , int bytes );
120+ static uint64 pg_popcount_masked_fast (const char * buf , int bytes , bits8 mask );
118121
119122int (* pg_popcount32 ) (uint32 word ) = pg_popcount32_choose ;
120123int (* pg_popcount64 ) (uint64 word ) = pg_popcount64_choose ;
121124uint64 (* pg_popcount_optimized ) (const char * buf , int bytes ) = pg_popcount_choose ;
125+ uint64 (* pg_popcount_masked_optimized ) (const char * buf , int bytes , bits8 mask ) = pg_popcount_masked_choose ;
122126#endif /* TRY_POPCNT_FAST */
123127
124128#ifdef TRY_POPCNT_FAST
@@ -156,17 +160,22 @@ choose_popcount_functions(void)
156160 pg_popcount32 = pg_popcount32_fast ;
157161 pg_popcount64 = pg_popcount64_fast ;
158162 pg_popcount_optimized = pg_popcount_fast ;
163+ pg_popcount_masked_optimized = pg_popcount_masked_fast ;
159164 }
160165 else
161166 {
162167 pg_popcount32 = pg_popcount32_slow ;
163168 pg_popcount64 = pg_popcount64_slow ;
164169 pg_popcount_optimized = pg_popcount_slow ;
170+ pg_popcount_masked_optimized = pg_popcount_masked_slow ;
165171 }
166172
167173#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
168174 if (pg_popcount_avx512_available ())
175+ {
169176 pg_popcount_optimized = pg_popcount_avx512 ;
177+ pg_popcount_masked_optimized = pg_popcount_masked_avx512 ;
178+ }
170179#endif
171180}
172181
@@ -191,6 +200,13 @@ pg_popcount_choose(const char *buf, int bytes)
191200 return pg_popcount_optimized (buf , bytes );
192201}
193202
203+ static uint64
204+ pg_popcount_masked_choose (const char * buf , int bytes , bits8 mask )
205+ {
206+ choose_popcount_functions ();
207+ return pg_popcount_masked (buf , bytes , mask );
208+ }
209+
194210/*
195211 * pg_popcount32_fast
196212 * Return the number of 1 bits set in word
@@ -271,6 +287,56 @@ pg_popcount_fast(const char *buf, int bytes)
271287 return popcnt ;
272288}
273289
290+ /*
291+ * pg_popcount_masked_fast
292+ * Returns the number of 1-bits in buf after applying the mask to each byte
293+ */
294+ static uint64
295+ pg_popcount_masked_fast (const char * buf , int bytes , bits8 mask )
296+ {
297+ uint64 popcnt = 0 ;
298+
299+ #if SIZEOF_VOID_P >= 8
300+ /* Process in 64-bit chunks if the buffer is aligned */
301+ uint64 maskv = ~UINT64CONST (0 ) / 0xFF * mask ;
302+
303+ if (buf == (const char * ) TYPEALIGN (8 , buf ))
304+ {
305+ const uint64 * words = (const uint64 * ) buf ;
306+
307+ while (bytes >= 8 )
308+ {
309+ popcnt += pg_popcount64_fast (* words ++ & maskv );
310+ bytes -= 8 ;
311+ }
312+
313+ buf = (const char * ) words ;
314+ }
315+ #else
316+ /* Process in 32-bit chunks if the buffer is aligned. */
317+ uint32 maskv = ~((uint32 ) 0 ) / 0xFF * mask ;
318+
319+ if (buf == (const char * ) TYPEALIGN (4 , buf ))
320+ {
321+ const uint32 * words = (const uint32 * ) buf ;
322+
323+ while (bytes >= 4 )
324+ {
325+ popcnt += pg_popcount32_fast (* words ++ & maskv );
326+ bytes -= 4 ;
327+ }
328+
329+ buf = (const char * ) words ;
330+ }
331+ #endif
332+
333+ /* Process any remaining bytes */
334+ while (bytes -- )
335+ popcnt += pg_number_of_ones [(unsigned char ) * buf ++ & mask ];
336+
337+ return popcnt ;
338+ }
339+
274340#endif /* TRY_POPCNT_FAST */
275341
276342
@@ -370,6 +436,56 @@ pg_popcount_slow(const char *buf, int bytes)
370436 return popcnt ;
371437}
372438
439+ /*
440+ * pg_popcount_masked_slow
441+ * Returns the number of 1-bits in buf after applying the mask to each byte
442+ */
443+ static uint64
444+ pg_popcount_masked_slow (const char * buf , int bytes , bits8 mask )
445+ {
446+ uint64 popcnt = 0 ;
447+
448+ #if SIZEOF_VOID_P >= 8
449+ /* Process in 64-bit chunks if the buffer is aligned */
450+ uint64 maskv = ~UINT64CONST (0 ) / 0xFF * mask ;
451+
452+ if (buf == (const char * ) TYPEALIGN (8 , buf ))
453+ {
454+ const uint64 * words = (const uint64 * ) buf ;
455+
456+ while (bytes >= 8 )
457+ {
458+ popcnt += pg_popcount64_slow (* words ++ & maskv );
459+ bytes -= 8 ;
460+ }
461+
462+ buf = (const char * ) words ;
463+ }
464+ #else
465+ /* Process in 32-bit chunks if the buffer is aligned. */
466+ uint32 maskv = ~((uint32 ) 0 ) / 0xFF * mask ;
467+
468+ if (buf == (const char * ) TYPEALIGN (4 , buf ))
469+ {
470+ const uint32 * words = (const uint32 * ) buf ;
471+
472+ while (bytes >= 4 )
473+ {
474+ popcnt += pg_popcount32_slow (* words ++ & maskv );
475+ bytes -= 4 ;
476+ }
477+
478+ buf = (const char * ) words ;
479+ }
480+ #endif
481+
482+ /* Process any remaining bytes */
483+ while (bytes -- )
484+ popcnt += pg_number_of_ones [(unsigned char ) * buf ++ & mask ];
485+
486+ return popcnt ;
487+ }
488+
373489#ifndef TRY_POPCNT_FAST
374490
375491/*
@@ -401,4 +517,14 @@ pg_popcount_optimized(const char *buf, int bytes)
401517 return pg_popcount_slow (buf , bytes );
402518}
403519
520+ /*
521+ * pg_popcount_masked_optimized
522+ * Returns the number of 1-bits in buf after applying the mask to each byte
523+ */
524+ uint64
525+ pg_popcount_masked_optimized (const char * buf , int bytes , bits8 mask )
526+ {
527+ return pg_popcount_masked_slow (buf , bytes , mask );
528+ }
529+
404530#endif /* !TRY_POPCNT_FAST */
0 commit comments