Skip to content

Commit 3e4fbc1

Browse files
author
Nikita Malakhov
committed
Introducing de-TOAST iterator for Generic Toaster
Detoasting is performed as an iterator - part by part, when full value detoast is not required, as for textpos function. Iterator is introduced as new structure DetoastIterator and iterator creation and next functions are available with TOAST API virtual functions table. Detoast iterator is not a part of TOAST API but is a revival of patch in [2]. Iterators are very useful for cases when full detoasting is not required - like searching in TOASTed data. Detoast iterator implementation is hidded inside Generic Toaster, and provides two additional functions to user: detoast_iterator_create returning DetoastIterator as Datum, to hide internal datails from user; detoast_iterate_next receiving DetoastIterator and buffer to fill as parameters. Author: Teodor Sigaev <[email protected]> Author: Oleg Bartunov <[email protected]> Author: Nikita Glukhov <[email protected]> Author: Nikita Malakhov <[email protected]> [1] Discussion: https://www.postgresql.org/message-id/flat/[email protected] [2] Discussion: https://www.postgresql.org/message-id/flat/CAL-OGktjn9wHLHhEem58qJXMBUAVWa8-VrDoCmceAxXGTUM_BA%40mail.gmail.com#17cd4f49591c2ea3cba364c13e129147 Detoast Iterator code cleanup and fix
1 parent 18245cb commit 3e4fbc1

File tree

7 files changed

+1113
-24
lines changed

7 files changed

+1113
-24
lines changed

src/backend/access/common/toast_compression.c

+357
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "access/toasterapi.h"
2121
#include "access/toast_compression.h"
22+
#include "access/generic_toaster.h"
2223
#include "common/pg_lzcompress.h"
2324
#include "fmgr.h"
2425
#include "utils/builtins.h"
@@ -247,6 +248,362 @@ lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
247248
#endif
248249
}
249250

251+
/* ----------
252+
* pglz_decompress -
253+
*
254+
* Decompresses source into dest. Returns the number of bytes
255+
* decompressed into the destination buffer, or -1 if the
256+
* compressed data is corrupted.
257+
*
258+
* If check_complete is true, the data is considered corrupted
259+
* if we don't exactly fill the destination buffer. Callers that
260+
* are extracting a slice typically can't apply this check.
261+
* ----------
262+
*/
263+
int32
264+
pglz_decompress_state(const char *source, int32 *slen, char *dest,
265+
int32 dlen, bool check_complete, bool last_cource_chunk,
266+
void **pstate)
267+
{
268+
pglz_state *state = pstate ? *pstate : NULL;
269+
const unsigned char *sp = (const unsigned char *) source;
270+
const unsigned char *srcend = sp + *slen;
271+
unsigned char *dp = (unsigned char *) dest;
272+
unsigned char *destend = dp + dlen;
273+
unsigned char ctrl;
274+
int ctrlc;
275+
int32 len;
276+
int32 remlen;
277+
int32 off;
278+
279+
if (state)
280+
{
281+
ctrl = state->ctrl;
282+
ctrlc = state->ctrlc;
283+
284+
if (state->len)
285+
{
286+
int32 copylen;
287+
288+
len = state->len;
289+
off = state->off;
290+
291+
copylen = Min(len, destend - dp);
292+
remlen = len - copylen;
293+
while (copylen--)
294+
{
295+
*dp = dp[-off];
296+
dp++;
297+
}
298+
299+
if (dp >= destend)
300+
{
301+
state->len = remlen;
302+
*slen = 0;
303+
return (char *) dp - dest;
304+
}
305+
306+
Assert(remlen == 0);
307+
}
308+
309+
remlen = 0;
310+
off = 0;
311+
312+
if (ctrlc < 8 && sp < srcend && dp < destend)
313+
goto ctrl_loop;
314+
}
315+
else
316+
{
317+
ctrl = 0;
318+
ctrlc = 8;
319+
remlen = 0;
320+
off = 0;
321+
}
322+
323+
while (sp < srcend && dp < destend)
324+
{
325+
/*
326+
* Read one control byte and process the next 8 items (or as many as
327+
* remain in the compressed input).
328+
*/
329+
ctrl = *sp++;
330+
331+
for (ctrlc = 0; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++)
332+
{
333+
ctrl_loop:
334+
if (ctrl & 1)
335+
{
336+
int32 copylen;
337+
338+
/*
339+
* Set control bit means we must read a match tag. The match
340+
* is coded with two bytes. First byte uses lower nibble to
341+
* code length - 3. Higher nibble contains upper 4 bits of the
342+
* offset. The next following byte contains the lower 8 bits
343+
* of the offset. If the length is coded as 18, another
344+
* extension tag byte tells how much longer the match really
345+
* was (0-255).
346+
*/
347+
len = (sp[0] & 0x0f) + 3;
348+
off = ((sp[0] & 0xf0) << 4) | sp[1];
349+
sp += 2;
350+
if (len == 18)
351+
len += *sp++;
352+
353+
/*
354+
* Check for corrupt data: if we fell off the end of the
355+
* source, or if we obtained off = 0, we have problems. (We
356+
* must check this, else we risk an infinite loop below in the
357+
* face of corrupt data.)
358+
*/
359+
if (unlikely((sp > srcend && last_cource_chunk) || off == 0))
360+
return -1;
361+
362+
/*
363+
* Don't emit more data than requested.
364+
*/
365+
copylen = Min(len, destend - dp);
366+
remlen = len - copylen;
367+
368+
/*
369+
* Now we copy the bytes specified by the tag from OUTPUT to
370+
* OUTPUT (copy len bytes from dp - off to dp). The copied
371+
* areas could overlap; to prevent possible uncertainty, we
372+
* copy only non-overlapping regions.
373+
*/
374+
while (off < copylen)
375+
{
376+
/*
377+
* We can safely copy "off" bytes since that clearly
378+
* results in non-overlapping source and destination.
379+
*/
380+
memcpy(dp, dp - off, off);
381+
copylen -= off;
382+
dp += off;
383+
384+
/*----------
385+
* This bit is less obvious: we can double "off" after
386+
* each such step. Consider this raw input:
387+
* 112341234123412341234
388+
* This will be encoded as 5 literal bytes "11234" and
389+
* then a match tag with length 16 and offset 4. After
390+
* memcpy'ing the first 4 bytes, we will have emitted
391+
* 112341234
392+
* so we can double "off" to 8, then after the next step
393+
* we have emitted
394+
* 11234123412341234
395+
* Then we can double "off" again, after which it is more
396+
* than the remaining "len" so we fall out of this loop
397+
* and finish with a non-overlapping copy of the
398+
* remainder. In general, a match tag with off < len
399+
* implies that the decoded data has a repeat length of
400+
* "off". We can handle 1, 2, 4, etc repetitions of the
401+
* repeated string per memcpy until we get to a situation
402+
* where the final copy step is non-overlapping.
403+
*
404+
* (Another way to understand this is that we are keeping
405+
* the copy source point dp - off the same throughout.)
406+
*----------
407+
*/
408+
off += off;
409+
}
410+
memcpy(dp, dp - off, copylen);
411+
dp += copylen;
412+
}
413+
else
414+
{
415+
/*
416+
* An unset control bit means LITERAL BYTE. So we just copy
417+
* one from INPUT to OUTPUT.
418+
*/
419+
*dp++ = *sp++;
420+
}
421+
422+
/*
423+
* Advance the control bit
424+
*/
425+
ctrl >>= 1;
426+
}
427+
}
428+
429+
/*
430+
* If requested, check we decompressed the right amount.
431+
*/
432+
if (check_complete && (dp != destend || sp != srcend))
433+
return -1;
434+
435+
if (pstate)
436+
{
437+
if (!state)
438+
*pstate = state = palloc(sizeof(*state));
439+
440+
state->ctrl = ctrl;
441+
state->ctrlc = ctrlc;
442+
state->len = remlen;
443+
state->off = off;
444+
445+
*slen = (const char *) sp - source;
446+
}
447+
448+
/*
449+
* That's it.
450+
*/
451+
return (char *) dp - dest;
452+
}
453+
454+
#if 0
455+
/* ----------
456+
* pglz_decompress_iterate -
457+
*
458+
* This function is based on pglz_decompress(), with these additional
459+
* requirements:
460+
*
461+
* 1. We need to save the current control byte and byte position for the
462+
* caller's next iteration.
463+
*
464+
* 2. In pglz_decompress(), we can assume we have all the source bytes
465+
* available. This is not the case when we decompress one chunk at a
466+
* time, so we have to make sure that we only read bytes available in the
467+
* current chunk.
468+
* ----------
469+
*/
470+
void
471+
pglz_decompress_iterate(ToastBuffer *source, ToastBuffer *dest,
472+
DetoastIterator iter, const char *destend)
473+
{
474+
const unsigned char *sp;
475+
const unsigned char *srcend;
476+
unsigned char *dp;
477+
478+
/*
479+
* In the while loop, sp may be incremented such that it points beyond
480+
* srcend. To guard against reading beyond the end of the current chunk,
481+
* we set srcend such that we exit the loop when we are within four bytes
482+
* of the end of the current chunk. When source->limit reaches
483+
* source->capacity, we are decompressing the last chunk, so we can (and
484+
* need to) read every byte.
485+
*/
486+
srcend = (const unsigned char *)
487+
(source->limit == source->capacity ? source->limit : (source->limit - 4));
488+
sp = (const unsigned char *) source->position;
489+
dp = (unsigned char *) dest->limit;
490+
if (destend > (unsigned char *) dest->capacity)
491+
destend = (unsigned char *) dest->capacity;
492+
493+
if (iter->len)
494+
{
495+
int32 len = iter->len;
496+
int32 off = iter->off;
497+
int32 copylen = Min(len, destend - dp);
498+
int32 remlen = len - copylen;
499+
500+
while (copylen--)
501+
{
502+
*dp = dp[-off];
503+
dp++;
504+
}
505+
506+
iter->len = remlen;
507+
508+
if (dp >= destend)
509+
{
510+
dest->limit = (char *) dp;
511+
return;
512+
}
513+
514+
Assert(remlen == 0);
515+
}
516+
517+
while (sp < srcend && dp < destend)
518+
{
519+
/*
520+
* Read one control byte and process the next 8 items (or as many as
521+
* remain in the compressed input).
522+
*/
523+
unsigned char ctrl;
524+
int ctrlc;
525+
526+
if (iter->ctrlc != INVALID_CTRLC)
527+
{
528+
ctrl = iter->ctrl;
529+
ctrlc = iter->ctrlc;
530+
}
531+
else
532+
{
533+
ctrl = *sp++;
534+
ctrlc = 0;
535+
}
536+
537+
for (; ctrlc < INVALID_CTRLC && sp < srcend && dp < destend; ctrlc++)
538+
{
539+
540+
if (ctrl & 1)
541+
{
542+
/*
543+
* Set control bit means we must read a match tag. The match
544+
* is coded with two bytes. First byte uses lower nibble to
545+
* code length - 3. Higher nibble contains upper 4 bits of the
546+
* offset. The next following byte contains the lower 8 bits
547+
* of the offset. If the length is coded as 18, another
548+
* extension tag byte tells how much longer the match really
549+
* was (0-255).
550+
*/
551+
int32 len;
552+
int32 off;
553+
int32 copylen;
554+
555+
len = (sp[0] & 0x0f) + 3;
556+
off = ((sp[0] & 0xf0) << 4) | sp[1];
557+
sp += 2;
558+
if (len == 18)
559+
len += *sp++;
560+
561+
/*
562+
* Now we copy the bytes specified by the tag from OUTPUT to
563+
* OUTPUT (copy len bytes from dp - off to dp). The copied
564+
* areas could overlap; to prevent possible uncertainty, we
565+
* copy only non-overlapping regions.
566+
*/
567+
copylen = Min(len, destend - dp);
568+
iter->len = len - copylen;
569+
570+
while (off < copylen)
571+
{
572+
/* see comments in common/pg_lzcompress.c */
573+
memcpy(dp, dp - off, off);
574+
copylen -= off;
575+
dp += off;
576+
off += off;
577+
}
578+
memcpy(dp, dp - off, copylen);
579+
dp += copylen;
580+
581+
iter->off = off;
582+
}
583+
else
584+
{
585+
/*
586+
* An unset control bit means LITERAL BYTE. So we just copy
587+
* one from INPUT to OUTPUT.
588+
*/
589+
*dp++ = *sp++;
590+
}
591+
592+
/*
593+
* Advance the control bit
594+
*/
595+
ctrl >>= 1;
596+
}
597+
598+
iter->ctrlc = ctrlc;
599+
iter->ctrl = ctrl;
600+
}
601+
602+
source->position = (char *) sp;
603+
dest->limit = (char *) dp;
604+
}
605+
#endif
606+
250607
/*
251608
* Extract compression ID from a varlena.
252609
*

0 commit comments

Comments
 (0)