Skip to content

Commit 489247b

Browse files
committed
Improve pruning of a default partition
When querying a partitioned table containing a default partition, we were wrongly deciding to include it in the scan too early in the process, failing to exclude it in some cases. If we reinterpret the PruneStepResult.scan_default flag slightly, we can do a better job at detecting that it can be excluded. The change is that we avoid setting the flag for that pruning step unless the step absolutely requires the default partition to be scanned (in contrast with the previous arrangement, which was to set it unless the step was able to prune it). So get_matching_partitions() must explicitly check the partition that each returned bound value corresponds to in order to determine whether the default one needs to be included, rather than relying on the flag from the final step result. Author: Yuzuko Hosoya <[email protected]> Reviewed-by: Amit Langote <[email protected]> Discussion: https://postgr.es/m/[email protected]
1 parent 69edf4f commit 489247b

File tree

4 files changed

+111
-130
lines changed

4 files changed

+111
-130
lines changed

src/backend/partitioning/partprune.c

+98-121
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,7 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
735735
PruneStepResult **results,
736736
*final_result;
737737
ListCell *lc;
738+
bool scan_default;
738739

739740
/* If there are no pruning steps then all partitions match. */
740741
if (num_steps == 0)
@@ -786,30 +787,39 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
786787
Assert(final_result != NULL);
787788
i = -1;
788789
result = NULL;
790+
scan_default = final_result->scan_default;
789791
while ((i = bms_next_member(final_result->bound_offsets, i)) >= 0)
790792
{
791793
int partindex = context->boundinfo->indexes[i];
792794

793-
/*
794-
* In range and hash partitioning cases, some slots may contain -1,
795-
* indicating that no partition has been defined to accept a given
796-
* range of data or for a given remainder, respectively. The default
797-
* partition, if any, in case of range partitioning, will be added to
798-
* the result, because the specified range still satisfies the query's
799-
* conditions.
800-
*/
801-
if (partindex >= 0)
802-
result = bms_add_member(result, partindex);
795+
if (partindex < 0)
796+
{
797+
/*
798+
* In range partitioning cases, if a partition index is -1 it
799+
* means that the bound at the offset is the upper bound for a
800+
* range not covered by any partition (other than a possible
801+
* default partition). In hash partitioning, the same means no
802+
* partition has been defined for the corresponding remainder
803+
* value.
804+
*
805+
* In either case, the value is still part of the queried range of
806+
* values, so mark to scan the default partition if one exists.
807+
*/
808+
scan_default |= partition_bound_has_default(context->boundinfo);
809+
continue;
810+
}
811+
812+
result = bms_add_member(result, partindex);
803813
}
804814

805-
/* Add the null and/or default partition if needed and if present. */
815+
/* Add the null and/or default partition if needed and present. */
806816
if (final_result->scan_null)
807817
{
808818
Assert(context->strategy == PARTITION_STRATEGY_LIST);
809819
Assert(partition_bound_accepts_nulls(context->boundinfo));
810820
result = bms_add_member(result, context->boundinfo->null_index);
811821
}
812-
if (final_result->scan_default)
822+
if (scan_default)
813823
{
814824
Assert(context->strategy == PARTITION_STRATEGY_LIST ||
815825
context->strategy == PARTITION_STRATEGY_RANGE);
@@ -2438,6 +2448,11 @@ get_matching_hash_bounds(PartitionPruneContext *context,
24382448
* get_matching_list_bounds
24392449
* Determine the offsets of list bounds matching the specified value,
24402450
* according to the semantics of the given operator strategy
2451+
*
2452+
* scan_default will be set in the returned struct, if the default partition
2453+
* needs to be scanned, provided one exists at all. scan_null will be set if
2454+
* the special null-accepting partition needs to be scanned.
2455+
*
24412456
* 'opstrategy' if non-zero must be a btree strategy number.
24422457
*
24432458
* 'value' contains the value to use for pruning.
@@ -2640,8 +2655,13 @@ get_matching_list_bounds(PartitionPruneContext *context,
26402655
* Each datum whose offset is in result is to be treated as the upper bound of
26412656
* the partition that will contain the desired values.
26422657
*
2643-
* If default partition needs to be scanned for given values, set scan_default
2644-
* in result if present.
2658+
* scan_default is set in the returned struct if a default partition exists
2659+
* and we're absolutely certain that it needs to be scanned. We do *not* set
2660+
* it just because values match portions of the key space uncovered by
2661+
* partitions other than default (space which we normally assume to belong to
2662+
* the default partition): the final set of bounds obtained after combining
2663+
* multiple pruning steps might exclude it, so we infer its inclusion
2664+
* elsewhere.
26452665
*
26462666
* 'opstrategy' if non-zero must be a btree strategy number.
26472667
*
@@ -2667,8 +2687,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
26672687
int *partindices = boundinfo->indexes;
26682688
int off,
26692689
minoff,
2670-
maxoff,
2671-
i;
2690+
maxoff;
26722691
bool is_equal;
26732692
bool inclusive = false;
26742693

@@ -2698,13 +2717,15 @@ get_matching_range_bounds(PartitionPruneContext *context,
26982717
*/
26992718
if (nvalues == 0)
27002719
{
2720+
/* ignore key space not covered by any partitions */
27012721
if (partindices[minoff] < 0)
27022722
minoff++;
27032723
if (partindices[maxoff] < 0)
27042724
maxoff--;
27052725

27062726
result->scan_default = partition_bound_has_default(boundinfo);
2707-
Assert(minoff >= 0 && maxoff >= 0);
2727+
Assert(partindices[minoff] >= 0 &&
2728+
partindices[maxoff] >= 0);
27082729
result->bound_offsets = bms_add_range(NULL, minoff, maxoff);
27092730

27102731
return result;
@@ -2732,11 +2753,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
27322753
if (nvalues == partnatts)
27332754
{
27342755
/* There can only be zero or one matching partition. */
2735-
if (partindices[off + 1] >= 0)
2736-
result->bound_offsets = bms_make_singleton(off + 1);
2737-
else
2738-
result->scan_default =
2739-
partition_bound_has_default(boundinfo);
2756+
result->bound_offsets = bms_make_singleton(off + 1);
27402757
return result;
27412758
}
27422759
else
@@ -2824,57 +2841,21 @@ get_matching_range_bounds(PartitionPruneContext *context,
28242841
maxoff = off + 1;
28252842
}
28262843

2827-
/*
2828-
* Skip if minoff/maxoff are actually the upper bound of a
2829-
* un-assigned portion of values.
2830-
*/
2831-
if (partindices[minoff] < 0 && minoff < boundinfo->ndatums)
2832-
minoff++;
2833-
if (partindices[maxoff] < 0 && maxoff >= 1)
2834-
maxoff--;
2835-
2836-
/*
2837-
* There may exist a range of values unassigned to any
2838-
* non-default partition between the datums at minoff and
2839-
* maxoff. Add the default partition in that case.
2840-
*/
2841-
if (partition_bound_has_default(boundinfo))
2842-
{
2843-
for (i = minoff; i <= maxoff; i++)
2844-
{
2845-
if (partindices[i] < 0)
2846-
{
2847-
result->scan_default = true;
2848-
break;
2849-
}
2850-
}
2851-
}
2852-
28532844
Assert(minoff >= 0 && maxoff >= 0);
28542845
result->bound_offsets = bms_add_range(NULL, minoff, maxoff);
28552846
}
2856-
else if (off >= 0) /* !is_equal */
2847+
else
28572848
{
28582849
/*
28592850
* The lookup value falls in the range between some bounds in
28602851
* boundinfo. 'off' would be the offset of the greatest bound
28612852
* that is <= lookup value, so add off + 1 to the result
28622853
* instead as the offset of the upper bound of the only
2863-
* partition that may contain the lookup value.
2864-
*/
2865-
if (partindices[off + 1] >= 0)
2866-
result->bound_offsets = bms_make_singleton(off + 1);
2867-
else
2868-
result->scan_default =
2869-
partition_bound_has_default(boundinfo);
2870-
}
2871-
else
2872-
{
2873-
/*
2874-
* off < 0: the lookup value is smaller than all bounds, so
2875-
* only the default partition qualifies, if there is one.
2854+
* partition that may contain the lookup value. If 'off' is
2855+
* -1 indicating that all bounds are greater, then we simply
2856+
* end up adding the first bound's offset, that is, 0.
28762857
*/
2877-
result->scan_default = partition_bound_has_default(boundinfo);
2858+
result->bound_offsets = bms_make_singleton(off + 1);
28782859
}
28792860

28802861
return result;
@@ -2945,16 +2926,18 @@ get_matching_range_bounds(PartitionPruneContext *context,
29452926

29462927
minoff = inclusive ? off : off + 1;
29472928
}
2948-
2949-
/*
2950-
* lookup value falls in the range between some bounds in
2951-
* boundinfo. off would be the offset of the greatest bound
2952-
* that is <= lookup value, so add off + 1 to the result
2953-
* instead as the offset of the upper bound of the smallest
2954-
* partition that may contain the lookup value.
2955-
*/
29562929
else
2930+
{
2931+
2932+
/*
2933+
* lookup value falls in the range between some bounds in
2934+
* boundinfo. off would be the offset of the greatest
2935+
* bound that is <= lookup value, so add off + 1 to the
2936+
* result instead as the offset of the upper bound of the
2937+
* smallest partition that may contain the lookup value.
2938+
*/
29572939
minoff = off + 1;
2940+
}
29582941
}
29592942
break;
29602943

@@ -2972,16 +2955,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
29722955
boundinfo,
29732956
nvalues, values,
29742957
&is_equal);
2975-
if (off < 0)
2976-
{
2977-
/*
2978-
* All bounds are greater than the key, so we could only
2979-
* expect to find the lookup key in the default partition.
2980-
*/
2981-
result->scan_default = partition_bound_has_default(boundinfo);
2982-
return result;
2983-
}
2984-
else
2958+
if (off >= 0)
29852959
{
29862960
/*
29872961
* See the comment above.
@@ -3029,65 +3003,58 @@ get_matching_range_bounds(PartitionPruneContext *context,
30293003
else
30303004
maxoff = off;
30313005
}
3006+
else
3007+
{
3008+
/*
3009+
* 'off' is -1 indicating that all bounds are greater, so just
3010+
* set the first bound's offset as maxoff.
3011+
*/
3012+
maxoff = off + 1;
3013+
}
30323014
break;
30333015

30343016
default:
30353017
elog(ERROR, "invalid strategy number %d", opstrategy);
30363018
break;
30373019
}
30383020

3021+
Assert(minoff >= 0 && minoff <= boundinfo->ndatums);
3022+
Assert(maxoff >= 0 && maxoff <= boundinfo->ndatums);
3023+
30393024
/*
3040-
* Skip a gap and when doing so, check if the bound contains a finite
3041-
* value to decide if we need to add the default partition. If it's an
3042-
* infinite bound, we need not add the default partition, as having an
3043-
* infinite bound means the partition in question catches any values that
3044-
* would otherwise be in the default partition.
3025+
* If the smallest partition to return has MINVALUE (negative infinity) as
3026+
* its lower bound, increment it to point to the next finite bound
3027+
* (supposedly its upper bound), so that we don't advertently end up
3028+
* scanning the default partition.
30453029
*/
3046-
if (partindices[minoff] < 0)
3030+
if (minoff < boundinfo->ndatums && partindices[minoff] < 0)
30473031
{
30483032
int lastkey = nvalues - 1;
30493033

3050-
if (minoff >= 0 &&
3051-
minoff < boundinfo->ndatums &&
3052-
boundinfo->kind[minoff][lastkey] ==
3053-
PARTITION_RANGE_DATUM_VALUE)
3054-
result->scan_default = partition_bound_has_default(boundinfo);
3055-
3056-
minoff++;
3034+
if (boundinfo->kind[minoff][lastkey] ==
3035+
PARTITION_RANGE_DATUM_MINVALUE)
3036+
{
3037+
minoff++;
3038+
Assert(boundinfo->indexes[minoff] >= 0);
3039+
}
30573040
}
30583041

30593042
/*
3060-
* Skip a gap. See the above comment about how we decide whether or not
3061-
* to scan the default partition based whether the datum that will become
3062-
* the maximum datum is finite or not.
3043+
* If the previous greatest partition has MAXVALUE (positive infinity) as
3044+
* its upper bound (something only possible to do with multi-column range
3045+
* partitioning), we scan switch to it as the greatest partition to
3046+
* return. Again, so that we don't advertently end up scanning the
3047+
* default partition.
30633048
*/
30643049
if (maxoff >= 1 && partindices[maxoff] < 0)
30653050
{
30663051
int lastkey = nvalues - 1;
30673052

3068-
if (maxoff >= 0 &&
3069-
maxoff <= boundinfo->ndatums &&
3070-
boundinfo->kind[maxoff - 1][lastkey] ==
3071-
PARTITION_RANGE_DATUM_VALUE)
3072-
result->scan_default = partition_bound_has_default(boundinfo);
3073-
3074-
maxoff--;
3075-
}
3076-
3077-
if (partition_bound_has_default(boundinfo))
3078-
{
3079-
/*
3080-
* There may exist a range of values unassigned to any non-default
3081-
* partition between the datums at minoff and maxoff. Add the default
3082-
* partition in that case.
3083-
*/
3084-
for (i = minoff; i <= maxoff; i++)
3053+
if (boundinfo->kind[maxoff - 1][lastkey] ==
3054+
PARTITION_RANGE_DATUM_MAXVALUE)
30853055
{
3086-
if (partindices[i] < 0)
3087-
{
3088-
result->scan_default = true;
3089-
break;
3090-
}
3056+
maxoff--;
3057+
Assert(boundinfo->indexes[maxoff] >= 0);
30913058
}
30923059
}
30933060

@@ -3332,14 +3299,24 @@ perform_pruning_combine_step(PartitionPruneContext *context,
33323299

33333300
/*
33343301
* A combine step without any source steps is an indication to not perform
3335-
* any partition pruning, we just return all partitions.
3302+
* any partition pruning. Return all datum indexes in that case.
33363303
*/
33373304
result = (PruneStepResult *) palloc0(sizeof(PruneStepResult));
33383305
if (list_length(cstep->source_stepids) == 0)
33393306
{
33403307
PartitionBoundInfo boundinfo = context->boundinfo;
3308+
int rangemax;
3309+
3310+
/*
3311+
* Add all valid offsets into the boundinfo->indexes array. For range
3312+
* partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1)
3313+
* valid entries; otherwise there are boundinfo->ndatums.
3314+
*/
3315+
rangemax = context->strategy == PARTITION_STRATEGY_RANGE ?
3316+
boundinfo->ndatums : boundinfo->ndatums - 1;
33413317

3342-
result->bound_offsets = bms_add_range(NULL, 0, boundinfo->ndatums - 1);
3318+
result->bound_offsets =
3319+
bms_add_range(result->bound_offsets, 0, rangemax);
33433320
result->scan_default = partition_bound_has_default(boundinfo);
33443321
result->scan_null = partition_bound_accepts_nulls(boundinfo);
33453322
return result;

src/include/partitioning/partbounds.h

-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
* pointed by remainder produced when hash value of the datum-tuple is divided
5757
* by the greatest modulus.
5858
*/
59-
6059
typedef struct PartitionBoundInfoData
6160
{
6261
char strategy; /* hash, list or range? */

0 commit comments

Comments
 (0)