apache · RussellSpitzer · Mar 2, 2023 · Jan 25, 2023 · Jan 25, 2023 · Jan 30, 2023
diff --git a/api/src/main/java/org/apache/iceberg/actions/DeleteOrphanFiles.java b/api/src/main/java/org/apache/iceberg/actions/DeleteOrphanFiles.java
@@ -22,6 +22,7 @@
 import java.util.Map;
 import java.util.concurrent.ExecutorService;
 import java.util.function.Consumer;
+import org.apache.iceberg.io.SupportsBulkOperations;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 
 /**
@@ -59,7 +60,7 @@ public interface DeleteOrphanFiles extends Action<DeleteOrphanFiles, DeleteOrpha
   /**
    * Passes an alternative delete implementation that will be used for orphan files.
    *
-   * <p>This method allows users to customize the delete func. For example, one may set a custom
+   * <p>This method allows users to customize the delete function. For example, one may set a custom
    * delete func and collect all orphan files into a set instead of physically removing them.
    *
    * <p>If not set, defaults to using the table's {@link org.apache.iceberg.io.FileIO io}
@@ -71,12 +72,14 @@ public interface DeleteOrphanFiles extends Action<DeleteOrphanFiles, DeleteOrpha
   DeleteOrphanFiles deleteWith(Consumer<String> deleteFunc);
 
   /**
-   * Passes an alternative executor service that will be used for removing orphaned files.
-   *
-   * <p>If this method is not called, orphaned manifests and data files will still be deleted in the
-   * current thread.
-   *
-   * <p>
+   * Passes an alternative executor service that will be used for removing orphaned files. This
+   * service will only be used if a custom delete function is provided by {@link
+   * #deleteWith(Consumer)} or if the FileIO does not {@link SupportsBulkOperations support bulk
+   * deletes}. Otherwise, parallelism should be controlled by the IO specific {@link
+   * SupportsBulkOperations#deleteFiles(Iterable) deleteFiles} method.
+   *
+   * <p>If this method is not called and bulk deletes are not supported, orphaned manifests and data
+   * files will still be deleted in the current thread.
    *
    * @param executorService the service to use
    * @return this for method chaining

diff --git a/api/src/main/java/org/apache/iceberg/actions/DeleteReachableFiles.java b/api/src/main/java/org/apache/iceberg/actions/DeleteReachableFiles.java
@@ -21,6 +21,7 @@
 import java.util.concurrent.ExecutorService;
 import java.util.function.Consumer;
 import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.io.SupportsBulkOperations;
 
 /**
  * An action that deletes all files referenced by a table metadata file.
@@ -44,9 +45,11 @@ public interface DeleteReachableFiles
   DeleteReachableFiles deleteWith(Consumer<String> deleteFunc);
 
   /**
-   * Passes an alternative executor service that will be used for files removal.
-   *
-   * <p>If this method is not called, files will be deleted in the current thread.
+   * Passes an alternative executor service that will be used for files removal. This service will
+   * only be used if a custom delete function is provided by {@link #deleteWith(Consumer)} or if the
+   * FileIO does not {@link SupportsBulkOperations support bulk deletes}. Otherwise, parallelism
+   * should be controlled by the IO specific {@link SupportsBulkOperations#deleteFiles(Iterable)
+   * deleteFiles} method.
    *
    * @param executorService the service to use
    * @return this for method chaining

diff --git a/api/src/main/java/org/apache/iceberg/actions/ExpireSnapshots.java b/api/src/main/java/org/apache/iceberg/actions/ExpireSnapshots.java
@@ -21,6 +21,7 @@
 import java.util.concurrent.ExecutorService;
 import java.util.function.Consumer;
 import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.io.SupportsBulkOperations;
 
 /**
  * An action that expires snapshots in a table.
@@ -80,11 +81,14 @@ public interface ExpireSnapshots extends Action<ExpireSnapshots, ExpireSnapshots
   ExpireSnapshots deleteWith(Consumer<String> deleteFunc);
 
   /**
-   * Passes an alternative executor service that will be used for manifests, data and delete files
-   * deletion.
+   * Passes an alternative executor service that will be used for files removal. This service will
+   * only be used if a custom delete function is provided by {@link #deleteWith(Consumer)} or if the
+   * FileIO does not {@link SupportsBulkOperations support bulk deletes}. Otherwise, parallelism
+   * should be controlled by the IO specific {@link SupportsBulkOperations#deleteFiles(Iterable)
+   * deleteFiles} method.
    *
-   * <p>If this method is not called, unnecessary manifests and content files will still be deleted
-   * in the current thread.
+   * <p>If this method is not called and bulk deletes are not supported, unnecessary manifests and
+   * content files will still be deleted in the current thread.
    *
    * <p>Identical to {@link org.apache.iceberg.ExpireSnapshots#executeDeleteWith(ExecutorService)}
    *

diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java
@@ -18,27 +18,44 @@
  */
 package org.apache.iceberg.hadoop;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Function;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.iceberg.exceptions.RuntimeIOException;
+import org.apache.iceberg.io.BulkDeletionFailureException;
 import org.apache.iceberg.io.FileIO;
 import org.apache.iceberg.io.FileInfo;
 import org.apache.iceberg.io.InputFile;
 import org.apache.iceberg.io.OutputFile;
+import org.apache.iceberg.io.SupportsBulkOperations;
 import org.apache.iceberg.io.SupportsPrefixOperations;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.Streams;
 import org.apache.iceberg.util.SerializableMap;
 import org.apache.iceberg.util.SerializableSupplier;
+import org.apache.iceberg.util.Tasks;
+import org.apache.iceberg.util.ThreadPools;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-public class HadoopFileIO implements FileIO, HadoopConfigurable, SupportsPrefixOperations {
+public class HadoopFileIO
+    implements FileIO, HadoopConfigurable, SupportsPrefixOperations, SupportsBulkOperations {
+
+  private static final Logger LOG = LoggerFactory.getLogger(HadoopFileIO.class);
+  private static final String DELETE_FILE_PARALLELISM = "iceberg.hadoop.delete-file-parallelism";
+  private static final String DELETE_FILE_POOL_NAME = "iceberg-hadoopfileio-delete";
+  private static final int DELETE_RETRY_ATTEMPTS = 3;
+  private static final int DEFAULT_DELETE_CORE_MULTIPLE = 4;
+  private static volatile ExecutorService executorService;
 
   private SerializableSupplier<Configuration> hadoopConf;
   private SerializableMap<String, String> properties = SerializableMap.copyOf(ImmutableMap.of());
@@ -149,6 +166,43 @@ public void deletePrefix(String prefix) {
     }
   }
 
+  @Override
+  public void deleteFiles(Iterable<String> pathsToDelete) throws BulkDeletionFailureException {
+    AtomicInteger failureCount = new AtomicInteger(0);
+    Tasks.foreach(pathsToDelete)
+        .executeWith(executorService())
+        .retry(DELETE_RETRY_ATTEMPTS)
+        .stopRetryOn(FileNotFoundException.class)
+        .suppressFailureWhenFinished()
+        .onFailure(
+            (f, e) -> {
+              LOG.error("Failure during bulk delete on file: {} ", f, e);
+              failureCount.incrementAndGet();
   runTaskWithRetry(task, item); 
   succeeded.add(item); 
 } catch (Exception e) { 
   exceptions.add(e); 
   if (onFailure != null) { 
     tryRunOnFailure(item, e); 
   runTaskWithRetry(task, item); 
   succeeded.add(item); 
 } catch (Exception e) { 
   exceptions.add(e); 
  
   if (onFailure != null) { 
     tryRunOnFailure(item, e); 
+            })
+        .run(this::deleteFile);
+
+    if (failureCount.get() != 0) {
+      throw new BulkDeletionFailureException(failureCount.get());
+    }
+  }
+
+  private int deleteThreads() {
+    int defaultValue = Runtime.getRuntime().availableProcessors() * DEFAULT_DELETE_CORE_MULTIPLE;
+    return conf().getInt(DELETE_FILE_PARALLELISM, defaultValue);
+  }
+
+  private ExecutorService executorService() {
+    if (executorService == null) {
+      synchronized (HadoopFileIO.class) {
+        if (executorService == null) {
+          executorService = ThreadPools.newWorkerPool(DELETE_FILE_POOL_NAME, deleteThreads());
+        }
+      }
+    }
+
+    return executorService;
+  }
+
   /**
    * This class is a simple adaptor to allow for using Hadoop's RemoteIterator as an Iterator.
    *

diff --git a/core/src/test/java/org/apache/iceberg/hadoop/HadoopFileIOTest.java b/core/src/test/java/org/apache/iceberg/hadoop/HadoopFileIOTest.java
@@ -27,10 +27,12 @@
 import java.util.List;
 import java.util.Random;
 import java.util.UUID;
+import java.util.stream.Collectors;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.iceberg.TestHelpers;
+import org.apache.iceberg.io.BulkDeletionFailureException;
 import org.apache.iceberg.io.FileIO;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
@@ -120,6 +122,26 @@ public void testDeletePrefix() {
         () -> hadoopFileIO.listPrefix(parent.toUri().toString()).iterator());
   }
 
+  @Test
+  public void testDeleteFiles() {
+    Path parent = new Path(tempDir.toURI());
+    List<Path> filesCreated = createRandomFiles(parent, 10);
+    hadoopFileIO.deleteFiles(
+        filesCreated.stream().map(Path::toString).collect(Collectors.toList()));
+    filesCreated.forEach(
+        file -> Assert.assertFalse(hadoopFileIO.newInputFile(file.toString()).exists()));
+  }
+
+  @Test
+  public void testDeleteFilesErrorHandling() {
+    List<String> filesCreated =
+        random.ints(2).mapToObj(x -> "fakefsnotreal://file-" + x).collect(Collectors.toList());
+    Assert.assertThrows(
+        "Should throw a BulkDeletionFailure Exceptions when files can't be deleted",
+        BulkDeletionFailureException.class,
+        () -> hadoopFileIO.deleteFiles(filesCreated));
+  }
+
   @Test
   public void testHadoopFileIOKryoSerialization() throws IOException {
     FileIO testHadoopFileIO = new HadoopFileIO();
@@ -142,17 +164,21 @@ public void testHadoopFileIOJavaSerialization() throws IOException, ClassNotFoun
     Assert.assertEquals(testHadoopFileIO.properties(), roundTripSerializedFileIO.properties());
   }
 
-  private void createRandomFiles(Path parent, int count) {
+  private List<Path> createRandomFiles(Path parent, int count) {
+    List<Path> paths = Lists.newArrayList();
     random
         .ints(count)
         .parallel()
         .forEach(
             i -> {
               try {
-                fs.createNewFile(new Path(parent, "file-" + i));
+                Path path = new Path(parent, "file-" + i);
+                paths.add(path);
+                fs.createNewFile(path);
               } catch (IOException e) {
                 throw new UncheckedIOException(e);
               }
             });
+    return paths;
   }
 }
diff --git a/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java b/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java
@@ -22,6 +22,7 @@
 import static org.apache.spark.sql.functions.col;
 import static org.apache.spark.sql.functions.lit;
 
+import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -46,14 +47,19 @@
 import org.apache.iceberg.TableMetadata;
 import org.apache.iceberg.exceptions.NotFoundException;
 import org.apache.iceberg.exceptions.ValidationException;
+import org.apache.iceberg.io.BulkDeletionFailureException;
 import org.apache.iceberg.io.CloseableIterator;
 import org.apache.iceberg.io.ClosingIterator;
 import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.io.SupportsBulkOperations;
 import org.apache.iceberg.relocated.com.google.common.base.Joiner;
 import org.apache.iceberg.relocated.com.google.common.base.Splitter;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterators;
+import org.apache.iceberg.relocated.com.google.common.collect.ListMultimap;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.iceberg.relocated.com.google.common.collect.Multimaps;
 import org.apache.iceberg.spark.JobGroupInfo;
 import org.apache.iceberg.spark.JobGroupUtils;
 import org.apache.iceberg.spark.SparkTableUtil;
@@ -85,6 +91,7 @@ abstract class BaseSparkAction<ThisT> {
   private static final Logger LOG = LoggerFactory.getLogger(BaseSparkAction.class);
   private static final AtomicInteger JOB_COUNTER = new AtomicInteger();
   private static final int DELETE_NUM_RETRIES = 3;
+  private static final int DELETE_GROUP_SIZE = 100000;
 
   private final SparkSession spark;
   private final JavaSparkContext sparkContext;
@@ -253,6 +260,37 @@ protected DeleteSummary deleteFiles(
     return summary;
   }
 
+  protected DeleteSummary deleteFiles(SupportsBulkOperations io, Iterator<FileInfo> files) {
+    DeleteSummary summary = new DeleteSummary();
+    Iterator<List<FileInfo>> fileGroups = Iterators.partition(files, DELETE_GROUP_SIZE);
+
+    Tasks.foreach(fileGroups)
+        .suppressFailureWhenFinished()
+        .run(fileGroup -> deleteFileGroup(fileGroup, io, summary));
+
+    return summary;
+  }
+
+  private static void deleteFileGroup(
+      List<FileInfo> fileGroup, SupportsBulkOperations io, DeleteSummary summary) {
+
+    ListMultimap<String, FileInfo> filesByType = Multimaps.index(fileGroup, FileInfo::getType);
+    ListMultimap<String, String> pathsByType =
+        Multimaps.transformValues(filesByType, FileInfo::getPath);
+
+    for (Map.Entry<String, Collection<String>> entry : pathsByType.asMap().entrySet()) {
+      String type = entry.getKey();
+      Collection<String> paths = entry.getValue();
+      int failures = 0;
+      try {
+        io.deleteFiles(paths);
+      } catch (BulkDeletionFailureException e) {
+        failures = e.numberFailedObjects();
+      }
+      summary.deletedFiles(type, paths.size() - failures);
+    }
+  }
+
   static class DeleteSummary {
     private final AtomicLong dataFilesCount = new AtomicLong(0L);
     private final AtomicLong positionDeleteFilesCount = new AtomicLong(0L);
@@ -261,6 +299,30 @@ static class DeleteSummary {
     private final AtomicLong manifestListsCount = new AtomicLong(0L);
     private final AtomicLong otherFilesCount = new AtomicLong(0L);
 
+    public void deletedFiles(String type, int numFiles) {
+      if (FileContent.DATA.name().equalsIgnoreCase(type)) {
+        dataFilesCount.addAndGet(numFiles);
+
+      } else if (FileContent.POSITION_DELETES.name().equalsIgnoreCase(type)) {
+        positionDeleteFilesCount.addAndGet(numFiles);
+
+      } else if (FileContent.EQUALITY_DELETES.name().equalsIgnoreCase(type)) {
+        equalityDeleteFilesCount.addAndGet(numFiles);
+
+      } else if (MANIFEST.equalsIgnoreCase(type)) {
+        manifestsCount.addAndGet(numFiles);
+
+      } else if (MANIFEST_LIST.equalsIgnoreCase(type)) {
+        manifestListsCount.addAndGet(numFiles);
+
+      } else if (OTHERS.equalsIgnoreCase(type)) {
+        otherFilesCount.addAndGet(numFiles);
+
+      } else {
+        throw new ValidationException("Illegal file type: %s", type);
+      }
+    }
+
     public void deletedFile(String path, String type) {
       if (FileContent.DATA.name().equalsIgnoreCase(type)) {
         dataFilesCount.incrementAndGet();