diff --git a/.github/workflows/.bb-export.yml b/.github/workflows/.bb-export.yml deleted file mode 100644 index 6569e8a..0000000 --- a/.github/workflows/.bb-export.yml +++ /dev/null @@ -1,205 +0,0 @@ -name: Bytebase Export SQL -on: - pull_request: - types: [closed] - branches: - - main - paths: - - 'export/**' - workflow_dispatch: - -jobs: - bytebase-export: - if: github.event.pull_request.merged == true - runs-on: ubuntu-latest - permissions: - pull-requests: write - issues: write - contents: read - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - fetch-depth: 0 - - - name: Login Bytebase - id: bytebase-login - uses: bytebase/login-action@0.0.2 - with: - bytebase-url: ${{ secrets.BYTEBASE_URL }} - service-key: ${{ secrets.BYTEBASE_SERVICE_KEY }} - service-secret: ${{ secrets.BYTEBASE_SERVICE_SECRET }} - - - name: Get changed files - id: changed-files - uses: tj-actions/changed-files@v42 - with: - files: | - export/**/*.sql - since_last_remote_commit: true - - - name: Process SQL files - id: process-sql - if: steps.changed-files.outputs.any_changed == 'true' - run: | - call_api() { - local url="$1" - local method="$2" - local data="$3" - local description="$4" - - echo "=== DEBUG: API Call Details ===" - echo "Description: $description" - echo "URL: $url" - echo "Method: $method" - echo "Data: $data" - - temp_file=$(mktemp) - http_code=$(curl -v -s -w "%{http_code}" \ - --request "$method" "$url" \ - --header "Authorization: Bearer ${{ steps.bytebase-login.outputs.token }}" \ - --header "Content-Type: application/json" \ - --data "$data" \ - -o "$temp_file" 2>&1) - - echo "=== DEBUG: Response Details ===" - echo "HTTP Status: $http_code" - echo "Response body:" - cat "$temp_file" - echo "===========================" - - if [[ $http_code -lt 200 || $http_code -ge 300 ]]; then - echo "Error: Failed $description. Status: $http_code" - cat "$temp_file" - rm "$temp_file" - return 1 - fi - - if [[ ! -s "$temp_file" ]]; then - echo "Error: Empty response from server" - rm "$temp_file" - return 1 - fi - - # Simple one-line JSON validation - if ! python3 -c "import json,sys; json.load(open('$temp_file'))" 2>/dev/null; then - echo "Error: Invalid JSON response" - echo "Response content:" - cat "$temp_file" - rm "$temp_file" - return 1 - fi - - cat "$temp_file" - rm "$temp_file" - return 0 - } - - MANIFEST_PATH="" - for file in ${{ steps.changed-files.outputs.all_changed_files }}; do - DIR_PATH=$(dirname "$file") - while [[ "$DIR_PATH" == export* ]]; do - if [[ -f "$DIR_PATH/manifest.toml" ]]; then - MANIFEST_PATH="$DIR_PATH/manifest.toml" - break 2 - fi - DIR_PATH=$(dirname "$DIR_PATH") - done - done - - if [[ -z "$MANIFEST_PATH" ]]; then - echo "Error: No manifest.toml found in the export directory" - exit 1 - fi - - echo "Found manifest file at: $MANIFEST_PATH" - echo "Manifest contents:" - cat "$MANIFEST_PATH" - - read_toml() { - local key="$1" - python3 -c "import tomllib; print(tomllib.load(open('$MANIFEST_PATH', 'rb')).get('$key', ''))" - } - - PROJECT=$(read_toml "project") || exit 1 - INSTANCE=$(read_toml "instance") || exit 1 - DATABASE=$(read_toml "database") || exit 1 - FORMAT=$(read_toml "format") || FORMAT="JSON" - - echo "=== Parsed Configuration ===" - echo "Project: $PROJECT" - echo "Instance: $INSTANCE" - echo "Database: $DATABASE" - echo "Format: $FORMAT" - echo "===========================" - - for file in ${{ steps.changed-files.outputs.all_changed_files }}; do - echo "Processing $file" - SQL_CONTENT=$(base64 < "$file") - echo "SQL_CONTENT=$SQL_CONTENT" >> $GITHUB_ENV - STEP_ID=$(python3 -c "import uuid; print(str(uuid.uuid4()))") - echo "STEP_ID=$STEP_ID" >> $GITHUB_ENV - BASE_URL="${{ steps.bytebase-login.outputs.api_url }}" - echo "BASE_URL1111=$BASE_URL" - echo "BASE_URL=$BASE_URL" >> $GITHUB_ENV - - sheet_data=$(call_api \ - "$BASE_URL/projects/$PROJECT/sheets" \ - "POST" \ - "{\"title\":\"\",\"content\":\"$SQL_CONTENT\",\"type\":\"TYPE_SQL\",\"source\":\"SOURCE_BYTEBASE_ARTIFACT\",\"visibility\":\"VISIBILITY_PUBLIC\"}" \ - "Create Sheet") - - SHEET_NAME=$(echo "$sheet_data" | python3 -c "import sys, json; print(json.load(sys.stdin)['name'])") - - plan_data=$(call_api \ - "$BASE_URL/projects/$PROJECT/plans" \ - "POST" \ - "{\"steps\":[{\"specs\":[{\"id\":\"$STEP_ID\",\"export_data_config\":{\"target\":\"/instances/$INSTANCE/databases/$DATABASE\",\"format\":\"$FORMAT\",\"sheet\":\"$SHEET_NAME\"}}]}],\"title\":\"Export data from $DATABASE\",\"description\":\"EXPORT\"}" \ - "Create Plan") - - PLAN_NAME=$(echo "$plan_data" | python3 -c "import sys, json; print(json.load(sys.stdin)['name'])") - - issue_data=$(call_api \ - "$BASE_URL/projects/$PROJECT/issues" \ - "POST" \ - "{\"approvers\":[],\"approvalTemplates\":[],\"subscribers\":[],\"title\":\"Issue: Export data from instances/$INSTANCE/databases/$DATABASE\",\"description\":\"SQL request from GitHub\",\"type\":\"DATABASE_DATA_EXPORT\",\"assignee\":\"\",\"plan\":\"$PLAN_NAME\"}" \ - "Create Issue") - - rollout_data=$(call_api \ - "$BASE_URL/projects/$PROJECT/rollouts" \ - "POST" \ - "{\"plan\":\"$PLAN_NAME\"}" \ - "Create Rollout") - - ISSUE_NUMBER=$(echo "$issue_data" | python3 -c "import sys, json; print(json.load(sys.stdin)['name'].split('/')[-1])") - ISSUE_LINK="${{ secrets.BYTEBASE_URL }}/projects/$PROJECT/issues/$ISSUE_NUMBER" - echo "ISSUE_LINK=$ISSUE_LINK" >> $GITHUB_ENV - done - - - name: Comment on PR - uses: actions/github-script@v7 - if: always() - env: - CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} - with: - script: | - const changedFiles = process.env.CHANGED_FILES || ''; - let commentBody = `### SQL Export Summary\n\n`; - - commentBody += `✅ **PR Status:** Merged\n\n`; - - commentBody += `📝 **Processed SQL Files:**\n\n`; - if (changedFiles.trim()) { - commentBody += changedFiles.split(' ').map(f => `- ${f}`).join('\n'); - } else { - commentBody += `None`; - } - - commentBody += `\n\n**Status:** ${process.env.STATUS || 'Completed'}`; - - await github.rest.issues.createComment({ - ...context.repo, - issue_number: context.issue.number, - body: commentBody - }); diff --git a/.github/workflows/bb-masking-semantic-type-global.yml b/.github/workflows/1-bb-masking-semantic-type-global.yml similarity index 100% rename from .github/workflows/bb-masking-semantic-type-global.yml rename to .github/workflows/1-bb-masking-semantic-type-global.yml diff --git a/.github/workflows/bb-masking-column.yml b/.github/workflows/2-bb-masking-column.yml similarity index 100% rename from .github/workflows/bb-masking-column.yml rename to .github/workflows/2-bb-masking-column.yml diff --git a/.github/workflows/3-bb-masking-classification.yml b/.github/workflows/3-bb-masking-classification.yml new file mode 100644 index 0000000..5a5bd7f --- /dev/null +++ b/.github/workflows/3-bb-masking-classification.yml @@ -0,0 +1,222 @@ +name: Bytebase Masking Policy Update Classification +on: + pull_request: + types: [closed] + branches: + - main + workflow_dispatch: + +jobs: + bytebase-masking-classification: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + permissions: + pull-requests: write + issues: write + contents: read + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 + + - name: Login Bytebase + id: bytebase-login + uses: bytebase/login-action@0.0.2 + with: + bytebase-url: ${{ secrets.BYTEBASE_URL }} + service-key: ${{ secrets.BYTEBASE_SERVICE_KEY }} + service-secret: ${{ secrets.BYTEBASE_SERVICE_SECRET }} + + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v42 + with: + files: | + masking/data-classification.json + masking/global-masking-rule-classification.json + masking/databases/**/**/database-catalog-classification.json + since_last_remote_commit: true + fetch_depth: 0 + include_all_old_new_renamed_files: true + + - name: Debug changed files + run: | + echo "All changed and added files:" + echo "Modified files: ${{ steps.changed-files.outputs.modified_files }}" + echo "Added files: ${{ steps.changed-files.outputs.added_files }}" + echo "All changes: ${{ steps.changed-files.outputs.all_changed_files }}" + + - name: Debug changed files in detail + run: | + echo "All changed files:" + echo "${{ steps.changed-files.outputs.all_changed_files }}" + echo "Contains data-classification.json: ${{ contains(steps.changed-files.outputs.all_changed_files, 'data-classification.json') }}" + echo "Contains global-masking-rule-classification.json: ${{ contains(steps.changed-files.outputs.all_changed_files, 'global-masking-rule-classification.json') }}" + echo "Contains database-catalog-classification.json: ${{ contains(steps.changed-files.outputs.all_changed_files, 'database-catalog-classification.json') }}" + echo "Raw output:" + echo "${{ toJSON(steps.changed-files.outputs) }}" + + - name: Apply data classification + id: apply-data-classification + if: ${{ steps.changed-files.outputs.any_changed == 'true' && contains(steps.changed-files.outputs.all_changed_files, 'data-classification.json') }} + run: | + CHANGED_FILE="masking/data-classification.json" + echo "Processing: $CHANGED_FILE" + + response=$(curl -s -w "\n%{http_code}" --request PATCH "${{ steps.bytebase-login.outputs.api_url }}/settings/bb.workspace.data-classification" \ + --header "Authorization: Bearer ${{ steps.bytebase-login.outputs.token }}" \ + --header "Content-Type: application/json" \ + --data @"$CHANGED_FILE") + + # Extract status code and response body + status_code=$(echo "$response" | tail -n1) + body=$(echo "$response" | sed '$d') + + echo "status_code=${status_code}" >> $GITHUB_OUTPUT + echo "response_body<<EOF" >> $GITHUB_OUTPUT + echo "${body}" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + if [[ $status_code -lt 200 || $status_code -ge 300 ]]; then + echo "Failed with status code: $status_code" + exit 1 + fi + + - name: Apply global masking rule + id: apply-global-masking-rule + if: ${{ steps.changed-files.outputs.any_changed == 'true' && contains(steps.changed-files.outputs.all_changed_files, 'global-masking-rule-classification.json') }} + run: | + # Process all global-masking-rule-classification.json files + echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr ' ' '\n' | grep "global-masking-rule-classification.json" | while read -r CHANGED_FILE; do + echo "Processing: $CHANGED_FILE" + + response=$(curl -s -w "\n%{http_code}" --request PATCH "${{ steps.bytebase-login.outputs.api_url }}/policies/masking_rule?allow_missing=true&update_mask=payload" \ + --header "Authorization: Bearer ${{ steps.bytebase-login.outputs.token }}" \ + --header "Content-Type: application/json" \ + --data @"$CHANGED_FILE") + + # Extract status code and response body + status_code=$(echo "$response" | tail -n1) + body=$(echo "$response" | sed '$d') + + echo "Status code: $status_code" + echo "Response body: $body" + + # Append to outputs (with unique identifiers) + if [[ $status_code -ge 200 && $status_code -lt 300 ]]; then + echo "${body}" >> $GITHUB_OUTPUT + else + echo "Failed with status code: $status_code" + echo "Response body: ${body}" + if [[ $status_code -eq 403 ]]; then + echo "Access denied. Please check your permissions and API token." + fi + exit 1 + fi + done + + - name: Apply column masking policy + id: apply-column-masking + if: ${{ steps.changed-files.outputs.any_changed == 'true' && contains(steps.changed-files.outputs.all_changed_files, '/database-catalog-classification.json') }} + run: | + # Process all database-catalog-classification.json files + echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr ' ' '\n' | grep "database-catalog-classification.json" | while read -r CHANGED_FILE; do + echo "Processing: $CHANGED_FILE" + INSTANCE_NAME=$(echo "$CHANGED_FILE" | sed -n 's/masking\/databases\/\([^/]*\)\/\([^/]*\).*/\1/p') + DATABASE_NAME=$(echo "$CHANGED_FILE" | sed -n 's/masking\/databases\/\([^/]*\)\/\([^/]*\).*/\2/p') + echo "INSTANCE_NAME=$INSTANCE_NAME" + echo "DATABASE_NAME=$DATABASE_NAME" + + response=$(curl -s -w "\n%{http_code}" --request PATCH "${{ steps.bytebase-login.outputs.api_url }}/instances/${INSTANCE_NAME}/databases/${DATABASE_NAME}/catalog" \ + --header "Authorization: Bearer ${{ steps.bytebase-login.outputs.token }}" \ + --header "Content-Type: application/json" \ + --data @"$CHANGED_FILE") + + # Extract status code and response body + status_code=$(echo "$response" | tail -n1) + body=$(echo "$response" | sed '$d') + + echo "Status code: $status_code" + echo "Response body: $body" + + # Append to outputs (with unique identifiers) + echo "status_code_${DATABASE_NAME}=${status_code}" >> $GITHUB_OUTPUT + echo "response_${DATABASE_NAME}<<EOF" >> $GITHUB_OUTPUT + echo "${body}" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + if [[ $status_code -lt 200 || $status_code -ge 300 ]]; then + echo "Failed with status code: $status_code for database: $DATABASE_NAME" + exit 1 + fi + done + + - name: Comment on PR + uses: actions/github-script@v7 + env: + CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} + with: + script: | + const changedFiles = process.env.CHANGED_FILES || ''; + let commentBody = `### Classification Summary\n\n`; + + // Add status of merge + commentBody += `✅ **PR Status:** Merged\n\n`; + + // Add changed files section + commentBody += `📝 **Changed Files:**\n\n`; + if (changedFiles.trim()) { + commentBody += changedFiles.split(' ').map(f => `- ${f}`).join('\n'); + } else { + commentBody += `None`; + } + commentBody += '\n\n'; + + // Add API calls summary + commentBody += `🔄 **API Calls:**\n\n`; + let apiCallsFound = false; + + if (changedFiles.includes('data-classification.json')) { + const status = ${{ toJSON(steps.apply-data-classification.outputs) }}.status_code; + if (status) { + apiCallsFound = true; + const success = status >= 200 && status < 300; + commentBody += `- Classification: ${success ? '✅' : '❌'} ${status}\n`; + } + } + + if (changedFiles.includes('global-masking-rule-classification.json')) { + const status = ${{ toJSON(steps.apply-global-masking-rule.outputs) }}.status_code; + if (status) { + apiCallsFound = true; + const success = status >= 200 && status < 300; + commentBody += `- Global Masking Rule: ${success ? '✅' : '❌'} ${status}\n`; + } + } + + if (changedFiles.includes('database-catalog-classification.json')) { + const maskingStatuses = Object.keys(${{ toJSON(steps.apply-column-masking.outputs) }} || {}) + .filter(key => key.startsWith('status_code_')) + .map(key => ({ + name: key.replace('status_code_', ''), + status: ${{ toJSON(steps.apply-column-masking.outputs) }}[key] + })); + + maskingStatuses.forEach(({name, status}) => { + apiCallsFound = true; + const success = status >= 200 && status < 300; + commentBody += `- Column Masking (${name}): ${success ? '✅' : '❌'} ${status}\n`; + }); + } + + if (!apiCallsFound) { + commentBody += `None`; + } + + await github.rest.issues.createComment({ + ...context.repo, + issue_number: context.issue.number, + body: commentBody + }); diff --git a/masking/databases/test-sample-instance/hr_test/database-catalog-classification.json b/masking/databases/test-sample-instance/hr_test/database-catalog-classification.json new file mode 100644 index 0000000..e625130 --- /dev/null +++ b/masking/databases/test-sample-instance/hr_test/database-catalog-classification.json @@ -0,0 +1,24 @@ +{ + "name": "instances/test-sample-instance/databases/hr_test/catalog", + "schemas": [ + { + "name": "public", + "tables": [ + { + "name": "salary", + "columns": { + "columns": [ + { + "name": "amount", + "semanticType": "bb.default", + "labels": {}, + "classification": "" + } + ] + }, + "classification": "2-2" + } + ] + } + ] +} \ No newline at end of file diff --git a/masking/global-masking-rule-advanced.json b/masking/global-masking-rule-classification.json similarity index 100% rename from masking/global-masking-rule-advanced.json rename to masking/global-masking-rule-classification.json