diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 73d78617009d..256568d05a29 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -4339,9 +4339,19 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 	}
 	else
 	{
+		List	   *otherclauses;
+
 		innerbucketsize = 1.0;
 		innermcvfreq = 1.0;
-		foreach(hcl, hashclauses)
+
+		/* At first, try to estimate bucket size using extended statistics. */
+		otherclauses = estimate_multivariate_bucketsize(root,
+														inner_path->parent,
+														hashclauses,
+														&innerbucketsize);
+
+		/* Pass through the remaining clauses */
+		foreach(hcl, otherclauses)
 		{
 			RestrictInfo *restrictinfo = lfirst_node(RestrictInfo, hcl);
 			Selectivity thisbucketsize;
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index c2918c9c8313..fb75b1ec46da 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3765,6 +3765,183 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	return numdistinct;
 }
 
+/*
+ * Try to estimate the bucket size of the hash join inner side when the join
+ * condition contains two or more clauses by employing extended statistics.
+ *
+ * The main idea of this approach is that the distinct value generated by
+ * multivariate estimation on two or more columns would provide less bucket size
+ * than estimation on one separate column.
+ *
+ * IMPORTANT: It is crucial to synchronize the approach of combining different
+ * estimations with the caller's method.
+ *
+ * Return a list of clauses that didn't fetch any extended statistics.
+ */
+List *
+estimate_multivariate_bucketsize(PlannerInfo *root, RelOptInfo *inner,
+								 List *hashclauses,
+								 Selectivity *innerbucketsize)
+{
+	List	   *clauses = list_copy(hashclauses);
+	List	   *otherclauses = NIL;
+	double		ndistinct = 1.0;
+
+	if (list_length(hashclauses) <= 1)
+
+		/*
+		 * Nothing to do for a single clause. Could we employ univariate
+		 * extended stat here?
+		 */
+		return hashclauses;
+
+	while (clauses != NIL)
+	{
+		ListCell   *lc;
+		int			relid = -1;
+		List	   *varinfos = NIL;
+		List	   *origin_rinfos = NIL;
+		double		mvndistinct;
+		List	   *origin_varinfos;
+		int			group_relid = -1;
+		RelOptInfo *group_rel = NULL;
+		ListCell   *lc1,
+				   *lc2;
+
+		/*
+		 * Find clauses, referencing the same single base relation and try to
+		 * estimate such a group with extended statistics.  Create varinfo for
+		 * an approved clause, push it to otherclauses, if it can't be
+		 * estimated here or ignore to process at the next iteration.
+		 */
+		foreach(lc, clauses)
+		{
+			RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+			Node	   *expr;
+			Relids		relids;
+			GroupVarInfo *varinfo;
+
+			/*
+			 * Find the inner side of the join, which we need to estimate the
+			 * number of buckets.  Use outer_is_left because the
+			 * clause_sides_match_join routine has called on hash clauses.
+			 */
+			relids = rinfo->outer_is_left ?
+				rinfo->right_relids : rinfo->left_relids;
+			expr = rinfo->outer_is_left ?
+				get_rightop(rinfo->clause) : get_leftop(rinfo->clause);
+
+			if (bms_get_singleton_member(relids, &relid) &&
+				root->simple_rel_array[relid]->statlist != NIL)
+			{
+				/*
+				 * This inner-side expression references only one relation.
+				 * Extended statistics on this clause can exist.
+				 */
+				if (group_relid < 0)
+				{
+					RangeTblEntry *rte = root->simple_rte_array[relid];
+
+					if (!rte || (rte->relkind != RELKIND_RELATION &&
+								 rte->relkind != RELKIND_MATVIEW &&
+								 rte->relkind != RELKIND_FOREIGN_TABLE &&
+								 rte->relkind != RELKIND_PARTITIONED_TABLE))
+					{
+						/* Extended statistics can't exist in principle */
+						otherclauses = lappend(otherclauses, rinfo);
+						clauses = foreach_delete_current(clauses, lc);
+						continue;
+					}
+
+					group_relid = relid;
+					group_rel = root->simple_rel_array[relid];
+				}
+				else if (group_relid != relid)
+
+					/*
+					 * Being in the group forming state we don't need other
+					 * clauses.
+					 */
+					continue;
+
+				varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
+				varinfo->var = expr;
+				varinfo->rel = root->simple_rel_array[relid];
+				varinfo->ndistinct = 0.0;
+				varinfo->isdefault = false;
+				varinfos = lappend(varinfos, varinfo);
+
+				/*
+				 * Remember the link to RestrictInfo for the case the clause
+				 * is failed to be estimated.
+				 */
+				origin_rinfos = lappend(origin_rinfos, rinfo);
+			}
+			else
+				/* This clause can't be estimated with extended statistics */
+				otherclauses = lappend(otherclauses, rinfo);
+
+			clauses = foreach_delete_current(clauses, lc);
+		}
+
+		if (list_length(varinfos) < 2)
+		{
+			/*
+			 * Multivariate statistics doesn't apply to single columns except
+			 * for expressions, but it has not been implemented yet.
+			 */
+			otherclauses = list_concat(otherclauses, origin_rinfos);
+			list_free_deep(varinfos);
+			list_free(origin_rinfos);
+			continue;
+		}
+
+		Assert(group_rel != NULL);
+
+		/* Let's employ extended statistics. */
+		origin_varinfos = varinfos;
+		for (;;)
+		{
+			bool		estimated = estimate_multivariate_ndistinct(root,
+																	group_rel,
+																	&varinfos,
+																	&mvndistinct);
+
+			if (!estimated)
+				break;
+
+			/*
+			 * We've got an estimation. Use ndistinct value in a consistent
+			 * way - according to the caller's logic (see
+			 * final_cost_hashjoin).
+			 */
+			if (ndistinct < mvndistinct)
+				ndistinct = mvndistinct;
+			Assert(ndistinct >= 1.0);
+		}
+
+		Assert(list_length(origin_varinfos) == list_length(origin_rinfos));
+
+		/*
+		 * Remove already estimated clauses from the origin_rinfos
+		 */
+		forboth(lc1, origin_varinfos, lc2, origin_rinfos)
+		{
+			GroupVarInfo *vinfo = lfirst(lc1);
+
+			if (!list_member_ptr(varinfos, vinfo))
+				/* Already estimated */
+				continue;
+
+			/* Can't be estimated here - push to the returning list */
+			otherclauses = lappend(otherclauses, lfirst(lc2));
+		}
+	}
+
+	*innerbucketsize = 1.0 / ndistinct;
+	return otherclauses;
+}
+
 /*
  * Estimate hash bucket statistics when the specified expression is used
  * as a hash key for the given number of buckets.
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index d35939651f30..82ac8c6d9dad 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -218,6 +218,10 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
 								  double input_rows, List **pgset,
 								  EstimationInfo *estinfo);
 
+extern List *estimate_multivariate_bucketsize(PlannerInfo *root,
+											  RelOptInfo *inner,
+											  List *hashclauses,
+											  Selectivity *innerbucketsize);
 extern void estimate_hash_bucket_stats(PlannerInfo *root,
 									   Node *hashkey, double nbuckets,
 									   Selectivity *mcv_freq,
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 904d3e623f5f..686d8c93aa87 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -3383,3 +3383,48 @@ SELECT * FROM check_estimated_rows('
 (1 row)
 
 DROP TABLE grouping_unique;
+--
+-- Extended statistics on sb_2 (x, y, z) improve a bucket size estimation,
+-- and the optimizer may choose hash join.
+--
+CREATE TABLE sb_1 AS
+  SELECT gs % 10 AS x, gs % 10 AS y, gs % 10 AS z
+  FROM generate_series(1, 1e4) AS gs;
+CREATE TABLE sb_2 AS
+  SELECT gs % 49 AS x, gs % 51 AS y, gs % 73 AS z, 'abc' || gs AS payload
+  FROM generate_series(1, 1e4) AS gs;
+ANALYZE sb_1, sb_2;
+-- During hash join estimation, the number of distinct values on each column
+-- is calculated. The optimizer selects the smallest number of distinct values
+-- and the largest hash bucket size. The optimizer decides that the hash
+-- bucket size is quite big because there are possibly many correlations.
+EXPLAIN (COSTS OFF) -- Choose merge join
+SELECT * FROM sb_1 a, sb_2 b WHERE a.x = b.x AND a.y = b.y AND a.z = b.z;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Merge Join
+   Merge Cond: ((a.z = b.z) AND (a.x = b.x) AND (a.y = b.y))
+   ->  Sort
+         Sort Key: a.z, a.x, a.y
+         ->  Seq Scan on sb_1 a
+   ->  Sort
+         Sort Key: b.z, b.x, b.y
+         ->  Seq Scan on sb_2 b
+(8 rows)
+
+-- The ndistinct extended statistics on (x, y, z) provides more reliable value
+-- of bucket size.
+CREATE STATISTICS extstat_sb_2 (ndistinct) ON x, y, z FROM sb_2;
+ANALYZE sb_2;
+EXPLAIN (COSTS OFF) -- Choose hash join
+SELECT * FROM sb_1 a, sb_2 b WHERE a.x = b.x AND a.y = b.y AND a.z = b.z;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((a.x = b.x) AND (a.y = b.y) AND (a.z = b.z))
+   ->  Seq Scan on sb_1 a
+   ->  Hash
+         ->  Seq Scan on sb_2 b
+(5 rows)
+
+DROP TABLE sb_1, sb_2 CASCADE;
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 88b33ccaef82..b71a6cd089fc 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1719,3 +1719,32 @@ SELECT * FROM check_estimated_rows('
   ON t1.t1 = q1.x;
 ');
 DROP TABLE grouping_unique;
+
+--
+-- Extended statistics on sb_2 (x, y, z) improve a bucket size estimation,
+-- and the optimizer may choose hash join.
+--
+CREATE TABLE sb_1 AS
+  SELECT gs % 10 AS x, gs % 10 AS y, gs % 10 AS z
+  FROM generate_series(1, 1e4) AS gs;
+CREATE TABLE sb_2 AS
+  SELECT gs % 49 AS x, gs % 51 AS y, gs % 73 AS z, 'abc' || gs AS payload
+  FROM generate_series(1, 1e4) AS gs;
+ANALYZE sb_1, sb_2;
+
+-- During hash join estimation, the number of distinct values on each column
+-- is calculated. The optimizer selects the smallest number of distinct values
+-- and the largest hash bucket size. The optimizer decides that the hash
+-- bucket size is quite big because there are possibly many correlations.
+EXPLAIN (COSTS OFF) -- Choose merge join
+SELECT * FROM sb_1 a, sb_2 b WHERE a.x = b.x AND a.y = b.y AND a.z = b.z;
+
+-- The ndistinct extended statistics on (x, y, z) provides more reliable value
+-- of bucket size.
+CREATE STATISTICS extstat_sb_2 (ndistinct) ON x, y, z FROM sb_2;
+ANALYZE sb_2;
+
+EXPLAIN (COSTS OFF) -- Choose hash join
+SELECT * FROM sb_1 a, sb_2 b WHERE a.x = b.x AND a.y = b.y AND a.z = b.z;
+
+DROP TABLE sb_1, sb_2 CASCADE;