@@ -189,6 +189,12 @@ static void create_one_window_path(PlannerInfo *root,
189
189
List * activeWindows );
190
190
static RelOptInfo * create_distinct_paths (PlannerInfo * root ,
191
191
RelOptInfo * input_rel );
192
+ static void create_partial_distinct_paths (PlannerInfo * root ,
193
+ RelOptInfo * input_rel ,
194
+ RelOptInfo * final_distinct_rel );
195
+ static RelOptInfo * create_final_distinct_paths (PlannerInfo * root ,
196
+ RelOptInfo * input_rel ,
197
+ RelOptInfo * distinct_rel );
192
198
static RelOptInfo * create_ordered_paths (PlannerInfo * root ,
193
199
RelOptInfo * input_rel ,
194
200
PathTarget * target ,
@@ -1570,6 +1576,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
1570
1576
*/
1571
1577
root -> upper_targets [UPPERREL_FINAL ] = final_target ;
1572
1578
root -> upper_targets [UPPERREL_ORDERED ] = final_target ;
1579
+ root -> upper_targets [UPPERREL_PARTIAL_DISTINCT ] = sort_input_target ;
1573
1580
root -> upper_targets [UPPERREL_DISTINCT ] = sort_input_target ;
1574
1581
root -> upper_targets [UPPERREL_WINDOW ] = sort_input_target ;
1575
1582
root -> upper_targets [UPPERREL_GROUP_AGG ] = grouping_target ;
@@ -4227,16 +4234,9 @@ create_one_window_path(PlannerInfo *root,
4227
4234
* Sort/Unique won't project anything.
4228
4235
*/
4229
4236
static RelOptInfo *
4230
- create_distinct_paths (PlannerInfo * root ,
4231
- RelOptInfo * input_rel )
4237
+ create_distinct_paths (PlannerInfo * root , RelOptInfo * input_rel )
4232
4238
{
4233
- Query * parse = root -> parse ;
4234
- Path * cheapest_input_path = input_rel -> cheapest_total_path ;
4235
4239
RelOptInfo * distinct_rel ;
4236
- double numDistinctRows ;
4237
- bool allow_hash ;
4238
- Path * path ;
4239
- ListCell * lc ;
4240
4240
4241
4241
/* For now, do all work in the (DISTINCT, NULL) upperrel */
4242
4242
distinct_rel = fetch_upper_rel (root , UPPERREL_DISTINCT , NULL );
@@ -4258,6 +4258,184 @@ create_distinct_paths(PlannerInfo *root,
4258
4258
distinct_rel -> useridiscurrent = input_rel -> useridiscurrent ;
4259
4259
distinct_rel -> fdwroutine = input_rel -> fdwroutine ;
4260
4260
4261
+ /* build distinct paths based on input_rel's pathlist */
4262
+ create_final_distinct_paths (root , input_rel , distinct_rel );
4263
+
4264
+ /* now build distinct paths based on input_rel's partial_pathlist */
4265
+ create_partial_distinct_paths (root , input_rel , distinct_rel );
4266
+
4267
+ /* Give a helpful error if we failed to create any paths */
4268
+ if (distinct_rel -> pathlist == NIL )
4269
+ ereport (ERROR ,
4270
+ (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
4271
+ errmsg ("could not implement DISTINCT" ),
4272
+ errdetail ("Some of the datatypes only support hashing, while others only support sorting." )));
4273
+
4274
+ /*
4275
+ * If there is an FDW that's responsible for all baserels of the query,
4276
+ * let it consider adding ForeignPaths.
4277
+ */
4278
+ if (distinct_rel -> fdwroutine &&
4279
+ distinct_rel -> fdwroutine -> GetForeignUpperPaths )
4280
+ distinct_rel -> fdwroutine -> GetForeignUpperPaths (root ,
4281
+ UPPERREL_DISTINCT ,
4282
+ input_rel ,
4283
+ distinct_rel ,
4284
+ NULL );
4285
+
4286
+ /* Let extensions possibly add some more paths */
4287
+ if (create_upper_paths_hook )
4288
+ (* create_upper_paths_hook ) (root , UPPERREL_DISTINCT , input_rel ,
4289
+ distinct_rel , NULL );
4290
+
4291
+ /* Now choose the best path(s) */
4292
+ set_cheapest (distinct_rel );
4293
+
4294
+ return distinct_rel ;
4295
+ }
4296
+
4297
+ /*
4298
+ * create_partial_distinct_paths
4299
+ *
4300
+ * Process 'input_rel' partial paths and add unique/aggregate paths to the
4301
+ * UPPERREL_PARTIAL_DISTINCT rel. For paths created, add Gather/GatherMerge
4302
+ * paths on top and add a final unique/aggregate path to remove any duplicate
4303
+ * produced from combining rows from parallel workers.
4304
+ */
4305
+ static void
4306
+ create_partial_distinct_paths (PlannerInfo * root , RelOptInfo * input_rel ,
4307
+ RelOptInfo * final_distinct_rel )
4308
+ {
4309
+ RelOptInfo * partial_distinct_rel ;
4310
+ Query * parse ;
4311
+ List * distinctExprs ;
4312
+ double numDistinctRows ;
4313
+ Path * cheapest_partial_path ;
4314
+ ListCell * lc ;
4315
+
4316
+ /* nothing to do when there are no partial paths in the input rel */
4317
+ if (!input_rel -> consider_parallel || input_rel -> partial_pathlist == NIL )
4318
+ return ;
4319
+
4320
+ parse = root -> parse ;
4321
+
4322
+ /* can't do parallel DISTINCT ON */
4323
+ if (parse -> hasDistinctOn )
4324
+ return ;
4325
+
4326
+ partial_distinct_rel = fetch_upper_rel (root , UPPERREL_PARTIAL_DISTINCT ,
4327
+ NULL );
4328
+ partial_distinct_rel -> reltarget = root -> upper_targets [UPPERREL_PARTIAL_DISTINCT ];
4329
+ partial_distinct_rel -> consider_parallel = input_rel -> consider_parallel ;
4330
+
4331
+ /*
4332
+ * If input_rel belongs to a single FDW, so does the partial_distinct_rel.
4333
+ */
4334
+ partial_distinct_rel -> serverid = input_rel -> serverid ;
4335
+ partial_distinct_rel -> userid = input_rel -> userid ;
4336
+ partial_distinct_rel -> useridiscurrent = input_rel -> useridiscurrent ;
4337
+ partial_distinct_rel -> fdwroutine = input_rel -> fdwroutine ;
4338
+
4339
+ cheapest_partial_path = linitial (input_rel -> partial_pathlist );
4340
+
4341
+ distinctExprs = get_sortgrouplist_exprs (parse -> distinctClause ,
4342
+ parse -> targetList );
4343
+
4344
+ /* estimate how many distinct rows we'll get from each worker */
4345
+ numDistinctRows = estimate_num_groups (root , distinctExprs ,
4346
+ cheapest_partial_path -> rows ,
4347
+ NULL , NULL );
4348
+
4349
+ /* first try adding unique paths atop of sorted paths */
4350
+ if (grouping_is_sortable (parse -> distinctClause ))
4351
+ {
4352
+ foreach (lc , input_rel -> partial_pathlist )
4353
+ {
4354
+ Path * path = (Path * ) lfirst (lc );
4355
+
4356
+ if (pathkeys_contained_in (root -> distinct_pathkeys , path -> pathkeys ))
4357
+ {
4358
+ add_partial_path (partial_distinct_rel , (Path * )
4359
+ create_upper_unique_path (root ,
4360
+ partial_distinct_rel ,
4361
+ path ,
4362
+ list_length (root -> distinct_pathkeys ),
4363
+ numDistinctRows ));
4364
+ }
4365
+ }
4366
+ }
4367
+
4368
+ /*
4369
+ * Now try hash aggregate paths, if enabled and hashing is possible. Since
4370
+ * we're not on the hook to ensure we do our best to create at least one
4371
+ * path here, we treat enable_hashagg as a hard off-switch rather than the
4372
+ * slightly softer variant in create_final_distinct_paths.
4373
+ */
4374
+ if (enable_hashagg && grouping_is_hashable (parse -> distinctClause ))
4375
+ {
4376
+ add_partial_path (partial_distinct_rel , (Path * )
4377
+ create_agg_path (root ,
4378
+ partial_distinct_rel ,
4379
+ cheapest_partial_path ,
4380
+ cheapest_partial_path -> pathtarget ,
4381
+ AGG_HASHED ,
4382
+ AGGSPLIT_SIMPLE ,
4383
+ parse -> distinctClause ,
4384
+ NIL ,
4385
+ NULL ,
4386
+ numDistinctRows ));
4387
+ }
4388
+
4389
+ /*
4390
+ * If there is an FDW that's responsible for all baserels of the query,
4391
+ * let it consider adding ForeignPaths.
4392
+ */
4393
+ if (partial_distinct_rel -> fdwroutine &&
4394
+ partial_distinct_rel -> fdwroutine -> GetForeignUpperPaths )
4395
+ partial_distinct_rel -> fdwroutine -> GetForeignUpperPaths (root ,
4396
+ UPPERREL_PARTIAL_DISTINCT ,
4397
+ input_rel ,
4398
+ partial_distinct_rel ,
4399
+ NULL );
4400
+
4401
+ /* Let extensions possibly add some more partial paths */
4402
+ if (create_upper_paths_hook )
4403
+ (* create_upper_paths_hook ) (root , UPPERREL_PARTIAL_DISTINCT ,
4404
+ input_rel , partial_distinct_rel , NULL );
4405
+
4406
+ if (partial_distinct_rel -> partial_pathlist != NIL )
4407
+ {
4408
+ generate_gather_paths (root , partial_distinct_rel , true);
4409
+ set_cheapest (partial_distinct_rel );
4410
+
4411
+ /*
4412
+ * Finally, create paths to distinctify the final result. This step
4413
+ * is needed to remove any duplicates due to combining rows from
4414
+ * parallel workers.
4415
+ */
4416
+ create_final_distinct_paths (root , partial_distinct_rel ,
4417
+ final_distinct_rel );
4418
+ }
4419
+ }
4420
+
4421
+ /*
4422
+ * create_final_distinct_paths
4423
+ * Create distinct paths in 'distinct_rel' based on 'input_rel' pathlist
4424
+ *
4425
+ * input_rel: contains the source-data paths
4426
+ * distinct_rel: destination relation for storing created paths
4427
+ */
4428
+ static RelOptInfo *
4429
+ create_final_distinct_paths (PlannerInfo * root , RelOptInfo * input_rel ,
4430
+ RelOptInfo * distinct_rel )
4431
+ {
4432
+ Query * parse = root -> parse ;
4433
+ Path * cheapest_input_path = input_rel -> cheapest_total_path ;
4434
+ double numDistinctRows ;
4435
+ bool allow_hash ;
4436
+ Path * path ;
4437
+ ListCell * lc ;
4438
+
4261
4439
/* Estimate number of distinct rows there will be */
4262
4440
if (parse -> groupClause || parse -> groupingSets || parse -> hasAggs ||
4263
4441
root -> hasHavingQual )
@@ -4384,31 +4562,6 @@ create_distinct_paths(PlannerInfo *root,
4384
4562
numDistinctRows ));
4385
4563
}
4386
4564
4387
- /* Give a helpful error if we failed to find any implementation */
4388
- if (distinct_rel -> pathlist == NIL )
4389
- ereport (ERROR ,
4390
- (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
4391
- errmsg ("could not implement DISTINCT" ),
4392
- errdetail ("Some of the datatypes only support hashing, while others only support sorting." )));
4393
-
4394
- /*
4395
- * If there is an FDW that's responsible for all baserels of the query,
4396
- * let it consider adding ForeignPaths.
4397
- */
4398
- if (distinct_rel -> fdwroutine &&
4399
- distinct_rel -> fdwroutine -> GetForeignUpperPaths )
4400
- distinct_rel -> fdwroutine -> GetForeignUpperPaths (root , UPPERREL_DISTINCT ,
4401
- input_rel , distinct_rel ,
4402
- NULL );
4403
-
4404
- /* Let extensions possibly add some more paths */
4405
- if (create_upper_paths_hook )
4406
- (* create_upper_paths_hook ) (root , UPPERREL_DISTINCT ,
4407
- input_rel , distinct_rel , NULL );
4408
-
4409
- /* Now choose the best path(s) */
4410
- set_cheapest (distinct_rel );
4411
-
4412
4565
return distinct_rel ;
4413
4566
}
4414
4567
0 commit comments