apache · jackye1995 · Jan 21, 2023 · Jan 18, 2023 · Jan 19, 2023 · Jan 19, 2023
diff --git a/...ns/src/main/antlr/org.apache.spark.sql.catalyst.parser.extensions/IcebergSqlExtensions.g4 b/...ns/src/main/antlr/org.apache.spark.sql.catalyst.parser.extensions/IcebergSqlExtensions.g4
@@ -73,6 +73,13 @@ statement
     | ALTER TABLE multipartIdentifier WRITE writeSpec                                       #setWriteDistributionAndOrdering
     | ALTER TABLE multipartIdentifier SET IDENTIFIER_KW FIELDS fieldList                    #setIdentifierFields
     | ALTER TABLE multipartIdentifier DROP IDENTIFIER_KW FIELDS fieldList                   #dropIdentifierFields
+    | ALTER TABLE multipartIdentifier CREATE BRANCH identifier (AS OF VERSION snapshotId)? (RETAIN snapshotRefRetain snapshotRefRetainTimeUnit)? (snapshotRetentionClause)?    #createBranch
+    ;
+
+snapshotRetentionClause
+    : WITH SNAPSHOT RETENTION numSnapshots SNAPSHOTS
+    | WITH SNAPSHOT RETENTION snapshotRetain snapshotRetainTimeUnit
+    | WITH SNAPSHOT RETENTION numSnapshots SNAPSHOTS snapshotRetain snapshotRetainTimeUnit
     ;
 
 writeSpec
@@ -168,34 +175,76 @@ fieldList
     ;
 
 nonReserved
-    : ADD | ALTER | AS | ASC | BY | CALL | DESC | DROP | FIELD | FIRST | LAST | NULLS | ORDERED | PARTITION | TABLE | WRITE
-    | DISTRIBUTED | LOCALLY | UNORDERED | REPLACE | WITH | IDENTIFIER_KW | FIELDS | SET
+    : ADD | ALTER | AS | ASC | BRANCH | BY | CALL | CREATE | DAYS | DESC | DROP | FIELD | FIRST | HOURS | LAST | NULLS | OF | ORDERED | PARTITION | TABLE | WRITE
+    | DISTRIBUTED | LOCALLY | MINUTES | MONTHS | UNORDERED | REPLACE | RETAIN | VERSION | WITH | IDENTIFIER_KW | FIELDS | SET | SNAPSHOT | SNAPSHOTS
     | TRUE | FALSE
     | MAP
     ;
 
+snapshotId
+    : number
+    ;
+
+numSnapshots
+    : number
+    ;
+
+snapshotRetain
+    : number
+    ;
+
+snapshotRefRetain
+    : number
+    ;
+
+snapshotRefRetainTimeUnit
+    : timeUnit
+    ;
+
+snapshotRetainTimeUnit
+    : timeUnit
+    ;
+
+timeUnit
+    : DAYS
+    | HOURS
+    | MINUTES
+    ;
+
 ADD: 'ADD';
 ALTER: 'ALTER';
 AS: 'AS';
 ASC: 'ASC';
+BRANCH: 'BRANCH';
 BY: 'BY';
 CALL: 'CALL';
+DAYS: 'DAYS';
 DESC: 'DESC';
 DISTRIBUTED: 'DISTRIBUTED';
 DROP: 'DROP';
 FIELD: 'FIELD';
 FIELDS: 'FIELDS';
 FIRST: 'FIRST';
+HOURS: 'HOURS';
 LAST: 'LAST';
 LOCALLY: 'LOCALLY';
+MINUTES: 'MINUTES';
+MONTHS: 'MONTHS';
+CREATE: 'CREATE';
 NULLS: 'NULLS';
+OF: 'OF';
 ORDERED: 'ORDERED';
 PARTITION: 'PARTITION';
 REPLACE: 'REPLACE';
+RETAIN: 'RETAIN';
+RETENTION: 'RETENTION';
 IDENTIFIER_KW: 'IDENTIFIER';
 SET: 'SET';
+SNAPSHOT: 'SNAPSHOT';
+SNAPSHOTS: 'SNAPSHOTS';
 TABLE: 'TABLE';
 UNORDERED: 'UNORDERED';
+VERSION: 'VERSION';
 WITH: 'WITH';
 WRITE: 'WRITE';
 

diff --git a/...ala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala b/...ala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala
@@ -205,7 +205,9 @@ class IcebergSparkSqlExtensionsParser(delegate: ParserInterface) extends ParserI
             normalized.contains("write distributed by") ||
             normalized.contains("write unordered") ||
             normalized.contains("set identifier fields") ||
-            normalized.contains("drop identifier fields")))
+            normalized.contains("drop identifier fields") ||
+            normalized.contains("create branch")))
+
   }
 
   protected def parse[T](command: String)(toResult: IcebergSqlExtensionsParser => T): T = {

diff --git a/...cala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSqlExtensionsAstBuilder.scala b/...cala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSqlExtensionsAstBuilder.scala
@@ -19,15 +19,15 @@
 
 package org.apache.spark.sql.catalyst.parser.extensions
 
+import java.util.Locale
+import java.util.concurrent.TimeUnit
 import org.antlr.v4.runtime._
 import org.antlr.v4.runtime.misc.Interval
 import org.antlr.v4.runtime.tree.ParseTree
 import org.antlr.v4.runtime.tree.TerminalNode
 import org.apache.iceberg.DistributionMode
 import org.apache.iceberg.NullOrder
 import org.apache.iceberg.SortDirection
-import org.apache.iceberg.SortOrder
-import org.apache.iceberg.UnboundSortOrder
 import org.apache.iceberg.expressions.Term
 import org.apache.iceberg.spark.Spark3Util
 import org.apache.spark.sql.AnalysisException
@@ -39,6 +39,7 @@ import org.apache.spark.sql.catalyst.parser.extensions.IcebergSqlExtensionsParse
 import org.apache.spark.sql.catalyst.plans.logical.AddPartitionField
 import org.apache.spark.sql.catalyst.plans.logical.CallArgument
 import org.apache.spark.sql.catalyst.plans.logical.CallStatement
+import org.apache.spark.sql.catalyst.plans.logical.CreateBranch
 import org.apache.spark.sql.catalyst.plans.logical.DropIdentifierFields
 import org.apache.spark.sql.catalyst.plans.logical.DropPartitionField
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -90,6 +91,26 @@ class IcebergSqlExtensionsAstBuilder(delegate: ParserInterface) extends IcebergS
       typedVisit[Transform](ctx.transform))
   }
 
+  /**
+   * Create an ADD BRANCH logical command.
+   */
+  override def visitCreateBranch(ctx: CreateBranchContext): CreateBranch = withOrigin(ctx) {
+    val snapshotRetention = Option(ctx.snapshotRetentionClause())
+
+    CreateBranch(
+      typedVisit[Seq[String]](ctx.multipartIdentifier),
+      ctx.identifier().getText,
+      Option(ctx.snapshotId()).map(_.getText.toLong),
+      snapshotRetention.flatMap(s => Option(s.numSnapshots())).map(_.getText.toLong),
+      snapshotRetention.flatMap(s => Option(s.snapshotRetain())).map(retain => {
+        TimeUnit.valueOf(ctx.snapshotRetentionClause().snapshotRetainTimeUnit().getText.toUpperCase(Locale.ENGLISH))
+          .toMillis(retain.getText.toLong)
+      }),
+      Option(ctx.snapshotRefRetain()).map(retain => {
+        TimeUnit.valueOf(ctx.snapshotRefRetainTimeUnit().getText.toUpperCase(Locale.ENGLISH))
+          .toMillis(retain.getText.toLong)
+      }))
+  }
 
   /**
    * Create an REPLACE PARTITION FIELD logical command.

diff --git a/...-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateBranch.scala b/...-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/CreateBranch.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+
+case class CreateBranch(table: Seq[String], branch: String, snapshotId: Option[Long], numSnapshots: Option[Long],
+                        snapshotRetain: Option[Long], snapshotRefRetain: Option[Long]) extends LeafCommand {
+
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  override lazy val output: Seq[Attribute] = Nil
+
+  override def simpleString(maxFields: Int): String = {
+    s"Create branch: ${branch} for table: ${table.quoted} "
+  }
+}
diff --git a/...sions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateBranchExec.scala b/...sions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateBranchExec.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.iceberg.spark.source.SparkTable
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.CreateBranch
+import org.apache.spark.sql.connector.catalog.Identifier
+import org.apache.spark.sql.connector.catalog.TableCatalog
+
+case class CreateBranchExec(
+                             catalog: TableCatalog,
+                             ident: Identifier,
+                             createBranch: CreateBranch) extends LeafV2CommandExec {
+
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  override lazy val output: Seq[Attribute] = Nil
+
+  override protected def run(): Seq[InternalRow] = {
+    catalog.loadTable(ident) match {
+      case iceberg: SparkTable =>
+
+        val snapshotId = createBranch.snapshotId.getOrElse(iceberg.table.currentSnapshot().snapshotId())
+        val manageSnapshot = iceberg.table.manageSnapshots()
+          .createBranch(createBranch.branch, snapshotId)
+
+        if (createBranch.numSnapshots.nonEmpty) {
+          manageSnapshot.setMinSnapshotsToKeep(createBranch.branch, createBranch.numSnapshots.get.toInt)
+        }
+
+        if (createBranch.snapshotRetain.nonEmpty) {
+          manageSnapshot.setMaxSnapshotAgeMs(createBranch.branch, createBranch.snapshotRetain.get)
+        }
+
+        if (createBranch.snapshotRefRetain.nonEmpty) {
+          manageSnapshot.setMaxRefAgeMs(createBranch.branch, createBranch.snapshotRefRetain.get)
+        }
+
+        manageSnapshot.commit()
+
+      case table =>
+        throw new UnsupportedOperationException(s"Cannot add branch to non-Iceberg table: $table")
+    }
+
+    Nil
+  }
+
+  override def simpleString(maxFields: Int): String = {
+    s"Create branch: ${createBranch.branch} operation for table: ${ident.quoted}"
+  }
+}
diff --git a/...in/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala b/...in/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.expressions.PredicateHelper
 import org.apache.spark.sql.catalyst.plans.logical.AddPartitionField
 import org.apache.spark.sql.catalyst.plans.logical.Call
+import org.apache.spark.sql.catalyst.plans.logical.CreateBranch
 import org.apache.spark.sql.catalyst.plans.logical.DeleteFromIcebergTable
 import org.apache.spark.sql.catalyst.plans.logical.DropIdentifierFields
 import org.apache.spark.sql.catalyst.plans.logical.DropPartitionField
@@ -61,6 +62,9 @@ case class ExtendedDataSourceV2Strategy(spark: SparkSession) extends Strategy wi
     case AddPartitionField(IcebergCatalogAndIdentifier(catalog, ident), transform, name) =>
       AddPartitionFieldExec(catalog, ident, transform, name) :: Nil
 
+    case CreateBranch(IcebergCatalogAndIdentifier(catalog, ident), _, _, _, _, _) =>
+      CreateBranchExec(catalog, ident, plan.asInstanceOf[CreateBranch]) :: Nil
+
     case DropPartitionField(IcebergCatalogAndIdentifier(catalog, ident), transform) =>
       DropPartitionFieldExec(catalog, ident, transform) :: Nil