Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.aws.lakeformation;

import java.util.Map;
import java.util.UUID;
import org.apache.iceberg.aws.AwsIntegTestUtil;
import org.apache.iceberg.aws.AwsProperties;
import org.apache.iceberg.aws.glue.GlueCatalog;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.glue.model.AccessDeniedException;
import software.amazon.awssdk.services.glue.model.GlueException;
import software.amazon.awssdk.services.iam.IamClient;
import software.amazon.awssdk.services.iam.model.CreateRoleRequest;
import software.amazon.awssdk.services.iam.model.CreateRoleResponse;
import software.amazon.awssdk.services.iam.model.DeleteRolePolicyRequest;
import software.amazon.awssdk.services.iam.model.DeleteRoleRequest;
import software.amazon.awssdk.services.iam.model.PutRolePolicyRequest;

public class TestLakeFormationAwsClientFactory {

private static final Logger LOG = LoggerFactory.getLogger(TestLakeFormationAwsClientFactory.class);
private static final int IAM_PROPAGATION_DELAY = 10000;
private static final int ASSUME_ROLE_SESSION_DURATION = 3600;

private IamClient iam;
private String roleName;
private Map<String, String> assumeRoleProperties;
private String policyName;

@Before
public void before() {
roleName = UUID.randomUUID().toString();
iam = IamClient.builder()
.region(Region.AWS_GLOBAL)
.httpClientBuilder(UrlConnectionHttpClient.builder())
.build();
CreateRoleResponse response = iam.createRole(CreateRoleRequest.builder()
.roleName(roleName)
.assumeRolePolicyDocument("{" +
"\"Version\":\"2012-10-17\"," +
"\"Statement\":[{" +
"\"Effect\":\"Allow\"," +
"\"Principal\":{" +
"\"AWS\":\"arn:aws:iam::" + AwsIntegTestUtil.testAccountId() + ":root\"}," +
"\"Action\": [\"sts:AssumeRole\"," +
"\"sts:TagSession\"]}]}")
.maxSessionDuration(ASSUME_ROLE_SESSION_DURATION)
.build());
assumeRoleProperties = Maps.newHashMap();
assumeRoleProperties.put(AwsProperties.CLIENT_ASSUME_ROLE_REGION, "us-east-1");
assumeRoleProperties.put(AwsProperties.GLUE_LAKEFORMATION_ENABLED, "true");
assumeRoleProperties.put(AwsProperties.HTTP_CLIENT_TYPE, AwsProperties.HTTP_CLIENT_TYPE_APACHE);
assumeRoleProperties.put(AwsProperties.CLIENT_ASSUME_ROLE_ARN, response.role().arn());
assumeRoleProperties.put(AwsProperties.CLIENT_ASSUME_ROLE_TAGS_PREFIX +
LakeFormationAwsClientFactory.LF_AUTHORIZED_CALLER, "emr");
policyName = UUID.randomUUID().toString();
}

@After
public void after() {
iam.deleteRolePolicy(DeleteRolePolicyRequest.builder().roleName(roleName).policyName(policyName).build());
iam.deleteRole(DeleteRoleRequest.builder().roleName(roleName).build());
}

@Test
public void testLakeFormationEnabledGlueCatalog() throws Exception {
String glueArnPrefix = "arn:aws:glue:*:" + AwsIntegTestUtil.testAccountId();
iam.putRolePolicy(PutRolePolicyRequest.builder()
.roleName(roleName)
.policyName(policyName)
.policyDocument("{" +
"\"Version\":\"2012-10-17\"," +
"\"Statement\":[{" +
"\"Sid\":\"policy1\"," +
"\"Effect\":\"Allow\"," +
"\"Action\":[\"glue:CreateDatabase\",\"glue:DeleteDatabase\"," +
"\"glue:Get*\",\"lakeformation:GetDataAccess\"]," +
"\"Resource\":[\"" + glueArnPrefix + ":catalog\"," +
"\"" + glueArnPrefix + ":database/allowed_*\"," +
"\"" + glueArnPrefix + ":table/allowed_*/*\"," +
"\"" + glueArnPrefix + ":userDefinedFunction/allowed_*/*\"]}]}")
.build());
waitForIamConsistency();

GlueCatalog glueCatalog = new GlueCatalog();
assumeRoleProperties.put("warehouse", "s3://path");
glueCatalog.initialize("test", assumeRoleProperties);
Namespace deniedNamespace = Namespace.of("denied_" + UUID.randomUUID().toString().replace("-", ""));
try {
glueCatalog.createNamespace(deniedNamespace);
Assert.fail("Access to Glue should be denied");
} catch (GlueException e) {
Assert.assertEquals(AccessDeniedException.class, e.getClass());
} catch (AssertionError e) {
glueCatalog.dropNamespace(deniedNamespace);
throw e;
}

Namespace allowedNamespace = Namespace.of("allowed_" + UUID.randomUUID().toString().replace("-", ""));
try {
glueCatalog.createNamespace(allowedNamespace);
} catch (GlueException e) {
LOG.error("fail to create Glue database", e);
Assert.fail("create namespace should succeed");
} finally {
glueCatalog.dropNamespace(allowedNamespace);
try {
glueCatalog.close();
} catch (Exception e) {
// swallow exception during closing
LOG.error("Error closing GlueCatalog", e);
}
}
}

private void waitForIamConsistency() throws Exception {
Thread.sleep(IAM_PROPAGATION_DELAY); // sleep to make sure IAM up to date
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public void initialize(Map<String, String> properties) {
AwsProperties.HTTP_CLIENT_TYPE, AwsProperties.HTTP_CLIENT_TYPE_DEFAULT);
}

private <T extends AwsClientBuilder & AwsSyncClientBuilder> T configure(T clientBuilder) {
protected <T extends AwsClientBuilder & AwsSyncClientBuilder> T configure(T clientBuilder) {
AssumeRoleRequest request = AssumeRoleRequest.builder()
.roleArn(roleArn)
.roleSessionName(genSessionName())
Expand All @@ -109,6 +109,22 @@ private <T extends AwsClientBuilder & AwsSyncClientBuilder> T configure(T client
return clientBuilder;
}

protected Set<Tag> tags() {
return tags;
}

protected String region() {
return region;
}

protected String s3Endpoint() {
return s3Endpoint;
}

protected String httpClientType() {
return httpClientType;
}

private StsClient sts() {
return StsClient.builder()
.httpClientBuilder(AwsClientFactories.configureHttpClientBuilder(httpClientType))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ public void initialize(Map<String, String> properties) {
}
}

static SdkHttpClient.Builder configureHttpClientBuilder(String httpClientType) {
public static SdkHttpClient.Builder configureHttpClientBuilder(String httpClientType) {
String clientType = httpClientType;
if (Strings.isNullOrEmpty(clientType)) {
clientType = AwsProperties.HTTP_CLIENT_TYPE_DEFAULT;
Expand All @@ -142,7 +142,7 @@ static SdkHttpClient.Builder configureHttpClientBuilder(String httpClientType) {
}
}

static <T extends SdkClientBuilder> void configureEndpoint(T builder, String endpoint) {
public static <T extends SdkClientBuilder> void configureEndpoint(T builder, String endpoint) {
if (endpoint != null) {
builder.endpointOverride(URI.create(endpoint));
}
Expand Down
40 changes: 40 additions & 0 deletions aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.iceberg.aws.dynamodb.DynamoDbCatalog;
import org.apache.iceberg.aws.lakeformation.LakeFormationAwsClientFactory;
import org.apache.iceberg.aws.s3.S3FileIO;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
Expand Down Expand Up @@ -87,6 +88,11 @@ public class AwsProperties implements Serializable {
*/
public static final String GLUE_CATALOG_ID = "glue.id";

/**
* The account ID used in a Glue resource ARN, e.g. arn:aws:glue:us-east-1:1000000000000:table/db1/table1
*/
public static final String GLUE_ACCOUNT_ID = "glue.account-id";

/**
* If Glue should skip archiving an old table version when creating a new version in a commit.
* By default Glue archives all old table versions after an UpdateTable call,
Expand All @@ -96,6 +102,15 @@ public class AwsProperties implements Serializable {
public static final String GLUE_CATALOG_SKIP_ARCHIVE = "glue.skip-archive";
public static final boolean GLUE_CATALOG_SKIP_ARCHIVE_DEFAULT = false;

/**
* If set, GlueCatalog will use Lake Formation for access control.
* For more credential vending details, see: https://docs.aws.amazon.com/lake-formation/latest/dg/api-overview.html.
* If enabled, the {@link AwsClientFactory} implementation must be {@link LakeFormationAwsClientFactory}
* or any class that extends it.
*/
public static final String GLUE_LAKEFORMATION_ENABLED = "glue.lakeformation-enabled";
public static final boolean GLUE_LAKEFORMATION_ENABLED_DEFAULT = false;

/**
* Number of threads to use for uploading parts to S3 (shared pool across all output streams),
* default to {@link Runtime#availableProcessors()}
Expand Down Expand Up @@ -282,6 +297,18 @@ public class AwsProperties implements Serializable {
@Deprecated
public static final boolean CLIENT_ENABLE_ETAG_CHECK_DEFAULT = false;

/**
* Used by {@link LakeFormationAwsClientFactory}.
* The table name used as part of lake formation credentials request.
*/
public static final String LAKE_FORMATION_TABLE_NAME = "lakeformation.table-name";

/**
* Used by {@link LakeFormationAwsClientFactory}.
* The database name used as part of lake formation credentials request.
*/
public static final String LAKE_FORMATION_DB_NAME = "lakeformation.db-name";

private String s3FileIoSseType;
private String s3FileIoSseKey;
private String s3FileIoSseMd5;
Expand All @@ -296,6 +323,7 @@ public class AwsProperties implements Serializable {

private String glueCatalogId;
private boolean glueCatalogSkipArchive;
private boolean glueLakeFormationEnabled;

private String dynamoDbTableName;

Expand All @@ -315,6 +343,7 @@ public AwsProperties() {

this.glueCatalogId = null;
this.glueCatalogSkipArchive = GLUE_CATALOG_SKIP_ARCHIVE_DEFAULT;
this.glueLakeFormationEnabled = GLUE_LAKEFORMATION_ENABLED_DEFAULT;

this.dynamoDbTableName = DYNAMODB_TABLE_NAME_DEFAULT;
}
Expand All @@ -332,6 +361,9 @@ public AwsProperties(Map<String, String> properties) {
this.glueCatalogId = properties.get(GLUE_CATALOG_ID);
this.glueCatalogSkipArchive = PropertyUtil.propertyAsBoolean(properties,
AwsProperties.GLUE_CATALOG_SKIP_ARCHIVE, AwsProperties.GLUE_CATALOG_SKIP_ARCHIVE_DEFAULT);
this.glueLakeFormationEnabled = PropertyUtil.propertyAsBoolean(properties,
GLUE_LAKEFORMATION_ENABLED,
GLUE_LAKEFORMATION_ENABLED_DEFAULT);

this.s3FileIoMultipartUploadThreads = PropertyUtil.propertyAsInt(properties, S3FILEIO_MULTIPART_UPLOAD_THREADS,
Runtime.getRuntime().availableProcessors());
Expand Down Expand Up @@ -424,6 +456,14 @@ public void setGlueCatalogSkipArchive(boolean skipArchive) {
this.glueCatalogSkipArchive = skipArchive;
}

public boolean glueLakeFormationEnabled() {
return glueLakeFormationEnabled;
}

public void setGlueLakeFormationEnabled(boolean glueLakeFormationEnabled) {
this.glueLakeFormationEnabled = glueLakeFormationEnabled;
}

public int s3FileIoMultipartUploadThreads() {
return s3FileIoMultipartUploadThreads;
}
Expand Down
45 changes: 43 additions & 2 deletions aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.aws.AwsClientFactories;
import org.apache.iceberg.aws.AwsClientFactory;
import org.apache.iceberg.aws.AwsProperties;
import org.apache.iceberg.aws.lakeformation.LakeFormationAwsClientFactory;
import org.apache.iceberg.aws.s3.S3FileIO;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
Expand All @@ -49,9 +51,11 @@
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.util.LockManagers;
import org.apache.iceberg.util.PropertyUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.services.glue.GlueClient;
Expand Down Expand Up @@ -87,6 +91,7 @@ public class GlueCatalog extends BaseMetastoreCatalog
private FileIO fileIO;
private LockManager lockManager;
private CloseableGroup closeableGroup;
private Map<String, String> catalogProperties;

// Attempt to set versionId if available on the path
private static final DynMethods.UnboundMethod SET_VERSION_ID = DynMethods.builder("versionId")
Expand All @@ -104,13 +109,36 @@ public GlueCatalog() {

@Override
public void initialize(String name, Map<String, String> properties) {
AwsClientFactory awsClientFactory;
FileIO catalogFileIO;
if (PropertyUtil.propertyAsBoolean(
properties,
AwsProperties.GLUE_LAKEFORMATION_ENABLED,
AwsProperties.GLUE_LAKEFORMATION_ENABLED_DEFAULT)) {
String factoryImpl = PropertyUtil.propertyAsString(properties, AwsProperties.CLIENT_FACTORY, null);
ImmutableMap.Builder<String, String> builder = ImmutableMap.<String, String>builder().putAll(properties);
if (factoryImpl == null) {
builder.put(AwsProperties.CLIENT_FACTORY, LakeFormationAwsClientFactory.class.getName());
}

this.catalogProperties = builder.build();
awsClientFactory = AwsClientFactories.from(catalogProperties);
Preconditions.checkArgument(awsClientFactory instanceof LakeFormationAwsClientFactory,
"Detected LakeFormation enabled for Glue catalog, should use a client factory that extends %s, but found %s",
LakeFormationAwsClientFactory.class.getName(), factoryImpl);
catalogFileIO = null;
} else {
awsClientFactory = AwsClientFactories.from(properties);
catalogFileIO = initializeFileIO(properties);
}

initialize(
name,
properties.get(CatalogProperties.WAREHOUSE_LOCATION),
new AwsProperties(properties),
AwsClientFactories.from(properties).glue(),
awsClientFactory.glue(),
initializeLockManager(properties),
initializeFileIO(properties));
catalogFileIO);
}

private LockManager initializeLockManager(Map<String, String> properties) {
Expand Down Expand Up @@ -162,6 +190,19 @@ private String cleanWarehousePath(String path) {

@Override
protected TableOperations newTableOps(TableIdentifier tableIdentifier) {
if (catalogProperties != null) {
Map<String, String> tableSpecificCatalogProperties = ImmutableMap.<String, String>builder()
.putAll(catalogProperties)
.put(AwsProperties.LAKE_FORMATION_DB_NAME,
IcebergToGlueConverter.getDatabaseName(tableIdentifier))
.put(AwsProperties.LAKE_FORMATION_TABLE_NAME,
IcebergToGlueConverter.getTableName(tableIdentifier))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xiaoxuandev and @jackye1995, I agree with the comment above that this seems confusing. If catalog properties are non-null, then lake formation logic is applied. I think at least the addition of these properties should be conditional on lake formation enabled.

.build();
// FileIO initialization depends on tableSpecificCatalogProperties, so a new FileIO is initialized each time
return new GlueTableOperations(glue, lockManager, catalogName, awsProperties,
initializeFileIO(tableSpecificCatalogProperties), tableIdentifier);
}

return new GlueTableOperations(glue, lockManager, catalogName, awsProperties, fileIO, tableIdentifier);
}

Expand Down
Loading