Skip to content

Commit d85e0f3

Browse files
committed
Fix memory alignment in pg_mcv_list serialization
Blind attempt at fixing ia64, hppa an sparc builds. The serialized representation of MCV lists did not enforce proper memory alignment for internal fields, resulting in deserialization issues on platforms that are more sensitive to this (ia64, sparc and hppa). This forces a catalog version bump, because the layout of serialized pg_mcv_list changes. Broken since 7300a69.
1 parent d3a5fc1 commit d85e0f3

File tree

2 files changed

+77
-39
lines changed

2 files changed

+77
-39
lines changed

src/backend/statistics/mcv.c

+76-38
Original file line numberDiff line numberDiff line change
@@ -451,9 +451,9 @@ statext_mcv_load(Oid mvoid)
451451
*
452452
* The overall structure of the serialized representation looks like this:
453453
*
454-
* +--------+----------------+---------------------+-------+
455-
* | header | dimension info | deduplicated values | items |
456-
* +--------+----------------+---------------------+-------+
454+
* +---------------+----------------+---------------------+-------+
455+
* | header fields | dimension info | deduplicated values | items |
456+
* +---------------+----------------+---------------------+-------+
457457
*
458458
* Where dimension info stores information about type of K-th attribute (e.g.
459459
* typlen, typbyval and length of deduplicated values). Deduplicated values
@@ -492,6 +492,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
492492

493493
/* serialized items (indexes into arrays, etc.) */
494494
bytea *output;
495+
char *raw;
495496
char *ptr;
496497

497498
/* values per dimension (and number of non-NULL values) */
@@ -593,18 +594,26 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
593594
info[dim].nbytes = 0;
594595
for (i = 0; i < info[dim].nvalues; i++)
595596
{
597+
Size len;
598+
596599
values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
597-
info[dim].nbytes += VARSIZE_ANY(values[dim][i]);
600+
601+
len = VARSIZE_ANY(values[dim][i]);
602+
info[dim].nbytes += MAXALIGN(len);
598603
}
599604
}
600605
else if (info[dim].typlen == -2) /* cstring */
601606
{
602607
info[dim].nbytes = 0;
603608
for (i = 0; i < info[dim].nvalues; i++)
604609
{
610+
Size len;
611+
605612
/* c-strings include terminator, so +1 byte */
606613
values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
607-
info[dim].nbytes += strlen(DatumGetCString(values[dim][i])) + 1;
614+
615+
len = strlen(DatumGetCString(values[dim][i])) + 1;
616+
info[dim].nbytes += MAXALIGN(len);
608617
}
609618
}
610619

@@ -617,28 +626,30 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
617626
* whole serialized MCV list (varlena header, MCV header, dimension info
618627
* for each attribute, deduplicated values and items).
619628
*/
620-
total_length = VARHDRSZ + offsetof(MCVList, items)
621-
+ (ndims * sizeof(DimensionInfo))
622-
+ (mcvlist->nitems * itemsize);
629+
total_length = offsetof(MCVList, items)
630+
+ MAXALIGN(ndims * sizeof(DimensionInfo));
623631

624632
/* add space for the arrays of deduplicated values */
625633
for (i = 0; i < ndims; i++)
626-
total_length += info[i].nbytes;
634+
total_length += MAXALIGN(info[i].nbytes);
627635

628-
/* allocate space for the whole serialized MCV list */
629-
output = (bytea *) palloc(total_length);
630-
SET_VARSIZE(output, total_length);
636+
/* and finally the items (no additional alignment needed) */
637+
total_length += mcvlist->nitems * itemsize;
631638

632-
/* 'ptr' points to the current position in the output buffer */
633-
ptr = VARDATA(output);
639+
/*
640+
* Allocate space for the whole serialized MCV list (we'll skip bytes,
641+
* so we set them to zero to make the result more compressible).
642+
*/
643+
raw = palloc0(total_length);
644+
ptr = raw;
634645

635646
/* copy the MCV list header */
636647
memcpy(ptr, mcvlist, offsetof(MCVList, items));
637648
ptr += offsetof(MCVList, items);
638649

639650
/* store information about the attributes */
640651
memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
641-
ptr += sizeof(DimensionInfo) * ndims;
652+
ptr += MAXALIGN(sizeof(DimensionInfo) * ndims);
642653

643654
/* Copy the deduplicated values for all attributes to the output. */
644655
for (dim = 0; dim < ndims; dim++)
@@ -670,6 +681,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
670681
}
671682
else if (info[dim].typlen > 0) /* pased by reference */
672683
{
684+
/* no special alignment needed, treated as char array */
673685
memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
674686
ptr += info[dim].typlen;
675687
}
@@ -678,14 +690,14 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
678690
int len = VARSIZE_ANY(value);
679691

680692
memcpy(ptr, DatumGetPointer(value), len);
681-
ptr += len;
693+
ptr += MAXALIGN(len);
682694
}
683695
else if (info[dim].typlen == -2) /* cstring */
684696
{
685697
Size len = strlen(DatumGetCString(value)) + 1; /* terminator */
686698

687699
memcpy(ptr, DatumGetCString(value), len);
688-
ptr += len;
700+
ptr += MAXALIGN(len);
689701
}
690702

691703
/* no underflows or overflows */
@@ -694,6 +706,9 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
694706

695707
/* we should get exactly nbytes of data for this dimension */
696708
Assert((ptr - start) == info[dim].nbytes);
709+
710+
/* make sure the pointer is aligned correctly after each dimension */
711+
ptr = raw + MAXALIGN(ptr - raw);
697712
}
698713

699714
/* Serialize the items, with uint16 indexes instead of the values. */
@@ -702,7 +717,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
702717
MCVItem *mcvitem = &mcvlist->items[i];
703718

704719
/* don't write beyond the allocated space */
705-
Assert(ptr <= (char *) output + total_length - itemsize);
720+
Assert(ptr <= raw + total_length - itemsize);
706721

707722
/* reset the item (we only allocate it once and reuse it) */
708723
memset(item, 0, itemsize);
@@ -741,12 +756,19 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
741756
}
742757

743758
/* at this point we expect to match the total_length exactly */
744-
Assert((ptr - (char *) output) == total_length);
759+
Assert((ptr - raw) == total_length);
745760

746761
pfree(item);
747762
pfree(values);
748763
pfree(counts);
749764

765+
output = (bytea *) palloc(VARHDRSZ + total_length);
766+
SET_VARSIZE(output, VARHDRSZ + total_length);
767+
768+
memcpy(VARDATA_ANY(output), raw, total_length);
769+
770+
pfree(raw);
771+
750772
return output;
751773
}
752774

@@ -764,6 +786,7 @@ statext_mcv_deserialize(bytea *data)
764786
i;
765787
Size expected_size;
766788
MCVList *mcvlist;
789+
char *raw;
767790
char *ptr;
768791

769792
int ndims,
@@ -781,6 +804,7 @@ statext_mcv_deserialize(bytea *data)
781804
Size datalen;
782805
char *dataptr;
783806
char *valuesptr;
807+
char *isnullptr;
784808

785809
if (data == NULL)
786810
return NULL;
@@ -797,7 +821,10 @@ statext_mcv_deserialize(bytea *data)
797821
mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
798822

799823
/* initialize pointer to the data part (skip the varlena header) */
800-
ptr = VARDATA_ANY(data);
824+
raw = palloc(VARSIZE_ANY_EXHDR(data));
825+
ptr = raw;
826+
827+
memcpy(raw, VARDATA_ANY(data), VARSIZE_ANY_EXHDR(data));
801828

802829
/* get the header and perform further sanity checks */
803830
memcpy(mcvlist, ptr, offsetof(MCVList, items));
@@ -848,7 +875,7 @@ statext_mcv_deserialize(bytea *data)
848875

849876
/* Now it's safe to access the dimension info. */
850877
info = (DimensionInfo *) ptr;
851-
ptr += ndims * sizeof(DimensionInfo);
878+
ptr += MAXALIGN(ndims * sizeof(DimensionInfo));
852879

853880
/* account for the value arrays */
854881
for (dim = 0; dim < ndims; dim++)
@@ -860,7 +887,7 @@ statext_mcv_deserialize(bytea *data)
860887
Assert(info[dim].nvalues >= 0);
861888
Assert(info[dim].nbytes >= 0);
862889

863-
expected_size += info[dim].nbytes;
890+
expected_size += MAXALIGN(info[dim].nbytes);
864891
}
865892

866893
/*
@@ -890,7 +917,7 @@ statext_mcv_deserialize(bytea *data)
890917

891918
/* space needed for a copy of data for by-ref types */
892919
if (!info[dim].typbyval)
893-
datalen += info[dim].nbytes;
920+
datalen += MAXALIGN(info[dim].nbytes);
894921
}
895922

896923
/*
@@ -899,19 +926,25 @@ statext_mcv_deserialize(bytea *data)
899926
* original data - it may disappear while we're still using the MCV list,
900927
* e.g. due to catcache release. Only needed for by-ref types.
901928
*/
902-
mcvlen = offsetof(MCVList, items) +
903-
+(sizeof(MCVItem) * nitems) /* array of MCVItem */
904-
+ ((sizeof(Datum) + sizeof(bool)) * ndims * nitems) +
905-
+datalen; /* by-ref data */
929+
mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
930+
931+
/* arrays of values and isnull flags for all MCV items */
932+
mcvlen += MAXALIGN(sizeof(Datum) * ndims * nitems);
933+
mcvlen += MAXALIGN(sizeof(bool) * ndims * nitems);
906934

935+
/* we don't quite need to align this, but it makes some assers easier */
936+
mcvlen += MAXALIGN(datalen);
937+
938+
/* now resize the deserialized MCV list, and compute pointers to parts */
907939
mcvlist = repalloc(mcvlist, mcvlen);
908940

909-
/* pointer to the beginning of values/isnull space */
910-
valuesptr = (char *) mcvlist + offsetof(MCVList, items)
911-
+ (sizeof(MCVItem) * nitems);
941+
/* pointer to the beginning of values/isnull arrays */
942+
valuesptr = (char *) mcvlist
943+
+ MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
944+
945+
isnullptr = valuesptr + (MAXALIGN(sizeof(Datum) * ndims * nitems));
912946

913-
/* get pointer where to store the data */
914-
dataptr = (char *) mcvlist + (mcvlen - datalen);
947+
dataptr = isnullptr + (MAXALIGN(sizeof(bool) * ndims * nitems));
915948

916949
/*
917950
* Build mapping (index => value) for translating the serialized data into
@@ -963,11 +996,11 @@ statext_mcv_deserialize(bytea *data)
963996
Size len = VARSIZE_ANY(ptr);
964997

965998
memcpy(dataptr, ptr, len);
966-
ptr += len;
999+
ptr += MAXALIGN(len);
9671000

9681001
/* just point into the array */
9691002
map[dim][i] = PointerGetDatum(dataptr);
970-
dataptr += len;
1003+
dataptr += MAXALIGN(len);
9711004
}
9721005
}
9731006
else if (info[dim].typlen == -2)
@@ -978,11 +1011,11 @@ statext_mcv_deserialize(bytea *data)
9781011
Size len = (strlen(ptr) + 1); /* don't forget the \0 */
9791012

9801013
memcpy(dataptr, ptr, len);
981-
ptr += len;
1014+
ptr += MAXALIGN(len);
9821015

9831016
/* just point into the array */
9841017
map[dim][i] = PointerGetDatum(dataptr);
985-
dataptr += len;
1018+
dataptr += MAXALIGN(len);
9861019
}
9871020
}
9881021

@@ -995,6 +1028,9 @@ statext_mcv_deserialize(bytea *data)
9951028

9961029
/* check we consumed input data for this dimension exactly */
9971030
Assert(ptr == (start + info[dim].nbytes));
1031+
1032+
/* ensure proper alignment of the data */
1033+
ptr = raw + MAXALIGN(ptr - raw);
9981034
}
9991035

10001036
/* we should have also filled the MCV list exactly */
@@ -1027,16 +1063,18 @@ statext_mcv_deserialize(bytea *data)
10271063
ptr += ITEM_SIZE(ndims);
10281064

10291065
/* check we're not overflowing the input */
1030-
Assert(ptr <= (char *) data + VARSIZE_ANY(data));
1066+
Assert(ptr <= (char *) raw + VARSIZE_ANY_EXHDR(data));
10311067
}
10321068

10331069
/* check that we processed all the data */
1034-
Assert(ptr == (char *) data + VARSIZE_ANY(data));
1070+
Assert(ptr == raw + VARSIZE_ANY_EXHDR(data));
10351071

10361072
/* release the buffers used for mapping */
10371073
for (dim = 0; dim < ndims; dim++)
10381074
pfree(map[dim]);
1075+
10391076
pfree(map);
1077+
pfree(raw);
10401078

10411079
return mcvlist;
10421080
}

src/include/catalog/catversion.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 201903271
56+
#define CATALOG_VERSION_NO 201903291
5757

5858
#endif

0 commit comments

Comments
 (0)