@@ -451,9 +451,9 @@ statext_mcv_load(Oid mvoid)
451
451
*
452
452
* The overall structure of the serialized representation looks like this:
453
453
*
454
- * +--------+----------------+---------------------+-------+
455
- * | header | dimension info | deduplicated values | items |
456
- * +--------+----------------+---------------------+-------+
454
+ * +--------------- +----------------+---------------------+-------+
455
+ * | header fields | dimension info | deduplicated values | items |
456
+ * +--------------- +----------------+---------------------+-------+
457
457
*
458
458
* Where dimension info stores information about type of K-th attribute (e.g.
459
459
* typlen, typbyval and length of deduplicated values). Deduplicated values
@@ -492,6 +492,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
492
492
493
493
/* serialized items (indexes into arrays, etc.) */
494
494
bytea * output ;
495
+ char * raw ;
495
496
char * ptr ;
496
497
497
498
/* values per dimension (and number of non-NULL values) */
@@ -593,18 +594,26 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
593
594
info [dim ].nbytes = 0 ;
594
595
for (i = 0 ; i < info [dim ].nvalues ; i ++ )
595
596
{
597
+ Size len ;
598
+
596
599
values [dim ][i ] = PointerGetDatum (PG_DETOAST_DATUM (values [dim ][i ]));
597
- info [dim ].nbytes += VARSIZE_ANY (values [dim ][i ]);
600
+
601
+ len = VARSIZE_ANY (values [dim ][i ]);
602
+ info [dim ].nbytes += MAXALIGN (len );
598
603
}
599
604
}
600
605
else if (info [dim ].typlen == -2 ) /* cstring */
601
606
{
602
607
info [dim ].nbytes = 0 ;
603
608
for (i = 0 ; i < info [dim ].nvalues ; i ++ )
604
609
{
610
+ Size len ;
611
+
605
612
/* c-strings include terminator, so +1 byte */
606
613
values [dim ][i ] = PointerGetDatum (PG_DETOAST_DATUM (values [dim ][i ]));
607
- info [dim ].nbytes += strlen (DatumGetCString (values [dim ][i ])) + 1 ;
614
+
615
+ len = strlen (DatumGetCString (values [dim ][i ])) + 1 ;
616
+ info [dim ].nbytes += MAXALIGN (len );
608
617
}
609
618
}
610
619
@@ -617,28 +626,30 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
617
626
* whole serialized MCV list (varlena header, MCV header, dimension info
618
627
* for each attribute, deduplicated values and items).
619
628
*/
620
- total_length = VARHDRSZ + offsetof(MCVList , items )
621
- + (ndims * sizeof (DimensionInfo ))
622
- + (mcvlist -> nitems * itemsize );
629
+ total_length = offsetof(MCVList , items )
630
+ + MAXALIGN (ndims * sizeof (DimensionInfo ));
623
631
624
632
/* add space for the arrays of deduplicated values */
625
633
for (i = 0 ; i < ndims ; i ++ )
626
- total_length += info [i ].nbytes ;
634
+ total_length += MAXALIGN ( info [i ].nbytes ) ;
627
635
628
- /* allocate space for the whole serialized MCV list */
629
- output = (bytea * ) palloc (total_length );
630
- SET_VARSIZE (output , total_length );
636
+ /* and finally the items (no additional alignment needed) */
637
+ total_length += mcvlist -> nitems * itemsize ;
631
638
632
- /* 'ptr' points to the current position in the output buffer */
633
- ptr = VARDATA (output );
639
+ /*
640
+ * Allocate space for the whole serialized MCV list (we'll skip bytes,
641
+ * so we set them to zero to make the result more compressible).
642
+ */
643
+ raw = palloc0 (total_length );
644
+ ptr = raw ;
634
645
635
646
/* copy the MCV list header */
636
647
memcpy (ptr , mcvlist , offsetof(MCVList , items ));
637
648
ptr += offsetof(MCVList , items );
638
649
639
650
/* store information about the attributes */
640
651
memcpy (ptr , info , sizeof (DimensionInfo ) * ndims );
641
- ptr += sizeof (DimensionInfo ) * ndims ;
652
+ ptr += MAXALIGN ( sizeof (DimensionInfo ) * ndims ) ;
642
653
643
654
/* Copy the deduplicated values for all attributes to the output. */
644
655
for (dim = 0 ; dim < ndims ; dim ++ )
@@ -670,6 +681,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
670
681
}
671
682
else if (info [dim ].typlen > 0 ) /* pased by reference */
672
683
{
684
+ /* no special alignment needed, treated as char array */
673
685
memcpy (ptr , DatumGetPointer (value ), info [dim ].typlen );
674
686
ptr += info [dim ].typlen ;
675
687
}
@@ -678,14 +690,14 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
678
690
int len = VARSIZE_ANY (value );
679
691
680
692
memcpy (ptr , DatumGetPointer (value ), len );
681
- ptr += len ;
693
+ ptr += MAXALIGN ( len ) ;
682
694
}
683
695
else if (info [dim ].typlen == -2 ) /* cstring */
684
696
{
685
697
Size len = strlen (DatumGetCString (value )) + 1 ; /* terminator */
686
698
687
699
memcpy (ptr , DatumGetCString (value ), len );
688
- ptr += len ;
700
+ ptr += MAXALIGN ( len ) ;
689
701
}
690
702
691
703
/* no underflows or overflows */
@@ -694,6 +706,9 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
694
706
695
707
/* we should get exactly nbytes of data for this dimension */
696
708
Assert ((ptr - start ) == info [dim ].nbytes );
709
+
710
+ /* make sure the pointer is aligned correctly after each dimension */
711
+ ptr = raw + MAXALIGN (ptr - raw );
697
712
}
698
713
699
714
/* Serialize the items, with uint16 indexes instead of the values. */
@@ -702,7 +717,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
702
717
MCVItem * mcvitem = & mcvlist -> items [i ];
703
718
704
719
/* don't write beyond the allocated space */
705
- Assert (ptr <= ( char * ) output + total_length - itemsize );
720
+ Assert (ptr <= raw + total_length - itemsize );
706
721
707
722
/* reset the item (we only allocate it once and reuse it) */
708
723
memset (item , 0 , itemsize );
@@ -741,12 +756,19 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
741
756
}
742
757
743
758
/* at this point we expect to match the total_length exactly */
744
- Assert ((ptr - ( char * ) output ) == total_length );
759
+ Assert ((ptr - raw ) == total_length );
745
760
746
761
pfree (item );
747
762
pfree (values );
748
763
pfree (counts );
749
764
765
+ output = (bytea * ) palloc (VARHDRSZ + total_length );
766
+ SET_VARSIZE (output , VARHDRSZ + total_length );
767
+
768
+ memcpy (VARDATA_ANY (output ), raw , total_length );
769
+
770
+ pfree (raw );
771
+
750
772
return output ;
751
773
}
752
774
@@ -764,6 +786,7 @@ statext_mcv_deserialize(bytea *data)
764
786
i ;
765
787
Size expected_size ;
766
788
MCVList * mcvlist ;
789
+ char * raw ;
767
790
char * ptr ;
768
791
769
792
int ndims ,
@@ -781,6 +804,7 @@ statext_mcv_deserialize(bytea *data)
781
804
Size datalen ;
782
805
char * dataptr ;
783
806
char * valuesptr ;
807
+ char * isnullptr ;
784
808
785
809
if (data == NULL )
786
810
return NULL ;
@@ -797,7 +821,10 @@ statext_mcv_deserialize(bytea *data)
797
821
mcvlist = (MCVList * ) palloc0 (offsetof(MCVList , items ));
798
822
799
823
/* initialize pointer to the data part (skip the varlena header) */
800
- ptr = VARDATA_ANY (data );
824
+ raw = palloc (VARSIZE_ANY_EXHDR (data ));
825
+ ptr = raw ;
826
+
827
+ memcpy (raw , VARDATA_ANY (data ), VARSIZE_ANY_EXHDR (data ));
801
828
802
829
/* get the header and perform further sanity checks */
803
830
memcpy (mcvlist , ptr , offsetof(MCVList , items ));
@@ -848,7 +875,7 @@ statext_mcv_deserialize(bytea *data)
848
875
849
876
/* Now it's safe to access the dimension info. */
850
877
info = (DimensionInfo * ) ptr ;
851
- ptr += ndims * sizeof (DimensionInfo );
878
+ ptr += MAXALIGN ( ndims * sizeof (DimensionInfo ) );
852
879
853
880
/* account for the value arrays */
854
881
for (dim = 0 ; dim < ndims ; dim ++ )
@@ -860,7 +887,7 @@ statext_mcv_deserialize(bytea *data)
860
887
Assert (info [dim ].nvalues >= 0 );
861
888
Assert (info [dim ].nbytes >= 0 );
862
889
863
- expected_size += info [dim ].nbytes ;
890
+ expected_size += MAXALIGN ( info [dim ].nbytes ) ;
864
891
}
865
892
866
893
/*
@@ -890,7 +917,7 @@ statext_mcv_deserialize(bytea *data)
890
917
891
918
/* space needed for a copy of data for by-ref types */
892
919
if (!info [dim ].typbyval )
893
- datalen += info [dim ].nbytes ;
920
+ datalen += MAXALIGN ( info [dim ].nbytes ) ;
894
921
}
895
922
896
923
/*
@@ -899,19 +926,25 @@ statext_mcv_deserialize(bytea *data)
899
926
* original data - it may disappear while we're still using the MCV list,
900
927
* e.g. due to catcache release. Only needed for by-ref types.
901
928
*/
902
- mcvlen = offsetof(MCVList , items ) +
903
- + (sizeof (MCVItem ) * nitems ) /* array of MCVItem */
904
- + ((sizeof (Datum ) + sizeof (bool )) * ndims * nitems ) +
905
- + datalen ; /* by-ref data */
929
+ mcvlen = MAXALIGN (offsetof(MCVList , items ) + (sizeof (MCVItem ) * nitems ));
930
+
931
+ /* arrays of values and isnull flags for all MCV items */
932
+ mcvlen += MAXALIGN (sizeof (Datum ) * ndims * nitems );
933
+ mcvlen += MAXALIGN (sizeof (bool ) * ndims * nitems );
906
934
935
+ /* we don't quite need to align this, but it makes some assers easier */
936
+ mcvlen += MAXALIGN (datalen );
937
+
938
+ /* now resize the deserialized MCV list, and compute pointers to parts */
907
939
mcvlist = repalloc (mcvlist , mcvlen );
908
940
909
- /* pointer to the beginning of values/isnull space */
910
- valuesptr = (char * ) mcvlist + offsetof(MCVList , items )
911
- + (sizeof (MCVItem ) * nitems );
941
+ /* pointer to the beginning of values/isnull arrays */
942
+ valuesptr = (char * ) mcvlist
943
+ + MAXALIGN (offsetof(MCVList , items ) + (sizeof (MCVItem ) * nitems ));
944
+
945
+ isnullptr = valuesptr + (MAXALIGN (sizeof (Datum ) * ndims * nitems ));
912
946
913
- /* get pointer where to store the data */
914
- dataptr = (char * ) mcvlist + (mcvlen - datalen );
947
+ dataptr = isnullptr + (MAXALIGN (sizeof (bool ) * ndims * nitems ));
915
948
916
949
/*
917
950
* Build mapping (index => value) for translating the serialized data into
@@ -963,11 +996,11 @@ statext_mcv_deserialize(bytea *data)
963
996
Size len = VARSIZE_ANY (ptr );
964
997
965
998
memcpy (dataptr , ptr , len );
966
- ptr += len ;
999
+ ptr += MAXALIGN ( len ) ;
967
1000
968
1001
/* just point into the array */
969
1002
map [dim ][i ] = PointerGetDatum (dataptr );
970
- dataptr += len ;
1003
+ dataptr += MAXALIGN ( len ) ;
971
1004
}
972
1005
}
973
1006
else if (info [dim ].typlen == -2 )
@@ -978,11 +1011,11 @@ statext_mcv_deserialize(bytea *data)
978
1011
Size len = (strlen (ptr ) + 1 ); /* don't forget the \0 */
979
1012
980
1013
memcpy (dataptr , ptr , len );
981
- ptr += len ;
1014
+ ptr += MAXALIGN ( len ) ;
982
1015
983
1016
/* just point into the array */
984
1017
map [dim ][i ] = PointerGetDatum (dataptr );
985
- dataptr += len ;
1018
+ dataptr += MAXALIGN ( len ) ;
986
1019
}
987
1020
}
988
1021
@@ -995,6 +1028,9 @@ statext_mcv_deserialize(bytea *data)
995
1028
996
1029
/* check we consumed input data for this dimension exactly */
997
1030
Assert (ptr == (start + info [dim ].nbytes ));
1031
+
1032
+ /* ensure proper alignment of the data */
1033
+ ptr = raw + MAXALIGN (ptr - raw );
998
1034
}
999
1035
1000
1036
/* we should have also filled the MCV list exactly */
@@ -1027,16 +1063,18 @@ statext_mcv_deserialize(bytea *data)
1027
1063
ptr += ITEM_SIZE (ndims );
1028
1064
1029
1065
/* check we're not overflowing the input */
1030
- Assert (ptr <= (char * ) data + VARSIZE_ANY (data ));
1066
+ Assert (ptr <= (char * ) raw + VARSIZE_ANY_EXHDR (data ));
1031
1067
}
1032
1068
1033
1069
/* check that we processed all the data */
1034
- Assert (ptr == ( char * ) data + VARSIZE_ANY (data ));
1070
+ Assert (ptr == raw + VARSIZE_ANY_EXHDR (data ));
1035
1071
1036
1072
/* release the buffers used for mapping */
1037
1073
for (dim = 0 ; dim < ndims ; dim ++ )
1038
1074
pfree (map [dim ]);
1075
+
1039
1076
pfree (map );
1077
+ pfree (raw );
1040
1078
1041
1079
return mcvlist ;
1042
1080
}
0 commit comments