0% found this document useful (0 votes)
51 views12 pages

Hdfs To s3

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
51 views12 pages

Hdfs To s3

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 12

#

#!/bin/bash

#!/usr/bin/env bash
###################################################################################
##############
# created : 2021-01-10
#
# Purpose : Script to copy data from HDFS to S3
#
# Arguments reqd : 4 arguments are required for this job to run.
# 1st : The properties file for the S3/BDA
configuration
# 2nd : TableList with the list of tables that
need to me moved along with Primary partition details.
# 3rd (optional) : fRunDate in YYYY-MM-DD format.
# 4th (optional) : tRunDate in YYYY-MM-DD format.
#
#
#
# Examples : sh hdfs_to_S3.sh /conf/properties/aws-us-east-
1.properties /conf/tableList/smis_hdfs_to_s3.config 2021-01-10 2021-01-10
#
# Version History : 1.0
#
###################################################################################
##############

# Read the parameters


props=$1
tableList=$2
fRunDate=$3
tRunDate=$4
echo "INFO : fRunDate ===> $fRunDate"
echo "INFO : tRunDate ===> $tRunDate"
runType="History"
if [ "$fRunDate" = "" ] && [ "$tRunDate" = "" ]
then
fRunDate=`date +%Y-%m-%d`
tRunDate=`date +%Y-%m-%d`
runType="Daily"
echo "INFO : fRunDate ===> $fRunDate"
echo "INFO : tRunDate ===> $tRunDate"
fi

scriptName=`echo ${tableList##*/} | cut -f1 -d"."`


srcTbleName=""
srcSchemaName=""
reconSyncFlag="0"
filterFileNm=`echo $tRunDate"_"$scriptName"_bda_to_s3_filter_tmp.txt"`
`rm *"$scriptName"_bda_to_s3_filter_tmp.txt*`
#set color coding
red=`tput setaf 1`
green=`tput setaf 2`
yellow=`tput setaf 3`
reset=`tput sgr0`

echo $props

echo "INFO :-------------------Read Properties


Files---------------------------------"
if [ -f "$props" ]
then
echo "${green}####################################---PROPERTIES---
#####################################"
while IFS='=' read -r key value
do
key=$(echo $key)
eval ${key}=\${value}
echo "${red}$key=${yellow}$value"
done < "$props"
echo "$
{green}############################################################################
##############"
else
echo "${red} ERROR : $props not found."
exit 1
fi
# kinit
export PYTHON_EGG_CACHE=./myeggs
#
domainIdAndRealm=`echo $BDA_SERVICE_ACCOUNT"@AD001.INFOFTPS.COM"`
keyTabFile=`echo $BDA_SERVICE_ACCOUNT".keytab"`
if [ "$domainIdAndRealm" != "" ] && [ "$keyTabFile" != "" ]; then
domainIdAndRealm=`echo $domainIdAndRealm | sed "s/\"//g"`
homePath=`echo "/home/"$BDA_SERVICE_ACCOUNT`
`kinit -kt $homePath/$keyTabFile $domainIdAndRealm`
if [ $? -ne 0 ]; then
echo "ERROR : Kinit unsuccessful!!!"
exit 1
fi
fi
. /home/$BDA_SERVICE_ACCOUNT/.bash_profile
echo "INFO : Kinit successful!!!"
# Logging Fuction
logIt () {
timestamp=$(date +%Y%m%d%H%M%S)
v2=`echo -n "$2" | tr '\n' '|'`
v3=`echo -n "$3" | tr '\n' '|'`
v4=`echo -n "$4" | tr '\n' '|'`
v5=`echo -n "$5" | tr '\n' '|'`
v6=`echo -n "$6" | tr '\n' '|'`
printf "$timestamp\a$1\a$scriptName\a$srcSchemaName\a$srcTbleName\a$v2\a$v3\
a$v4\a$v5\a$v6\n" | hdfs dfs -appendToFile - $appLogFile
}

# Send Email Function


sendMail() {
status="$1"
body="$2"
mail -s "$status:$runType:BDA to S3 Load:$scriptName" $emailAdd <<< "$body"
}
# Get POST check Queries from DB
getPrePostRules() {

postrunsql="select b.sql1 FROM $tbl_config_dtl b WHERE b.source_table =


'$tablename'
AND b.config_type = 'POST' AND b.extract_date IN (select
max(a.extract_date) AS extract_date FROM $tbl_config_dtl a
WHERE a.source_table = '$tablename');"
postCheckSqls=$(beeline -u $beeline_connection --hiveconf
mapred.job.queue.name=$yarn_queue -e "$postrunsql" 2>/dev/null)
if [ $? -ne 0 ]; then
logIt "ERROR" "Retrieve POST CHECK SQL error " "$tablename" "" "" 1
exit 1
fi
parsedOutPut=`echo "$postCheckSqls" | grep -A 3 "^[+][-+]*[+]$" | tail -n
+4 | head -n -1`
printf "$parsedOutPut";
}

createFilterFile() {
fromDate=$1
toDate=$2
partName=$3
dateFormat=$4
if [ "$fromDate" != "$toDate" ]
then
while ! [[ $fromDate > $toDate ]]
do
parsedDate=`date -d "$fromDate" +$dateFormat`
echo "$partName=$parsedDate" >> "$filterFileNm"
fromDate=$(date -d "$fromDate + 1 day" +%Y-%m-%d)

done
else
parsedDate=`date -d "$fromDate" +$dateFormat`
echo "$partName=$parsedDate" >> "$filterFileNm"
fi
}

cleantmp() {
`rm $filterFileNm`
}
sendMail "Started" "$scriptName Started loading data to S3 Bucket: $bucket"

echo "INFO :------------------Set Log File and Recon


Paths---------------------------------"
hdfsLogFile=$log_dir"="
appLogFile=$hdfsLogFile`date +%Y-%m-%d`/$bda_filename
echo "INFO : Log path==> $appLogFile"
hdfsReconFile=$recon_dir"="
reconfilename=`echo $bda_filename".csv"`
appReconFile=$hdfsReconFile`date +%Y-%m-%d`/$reconfilename
echo "INFO : Recon path==> $appReconFile"
$(hdfs dfs -mkdir $hdfsLogFile`date +%Y-%m-%d`/)
if [ $? -ne 0 ]; then
logIt "INFO" "Log path already exists " "" "" "" 0

fi
$(hdfs dfs -mkdir $hdfsReconFile`date +%Y-%m-%d`/)
if [ $? -ne 0 ]; then
logIt "INFO" "Recon path already exists " "" "" "" 0

fi
echo "INFO :------------------ HDFS Log and Recon paths created ------------------
"

echo ${reset}
echo""

# Check for the standard AWS environment variables


start_Time=$SECONDS

temp_role1=$(aws sts assume-role \


--role-arn "arn:aws:iam::${aws_account_number}:role/$
{aws_role_name}" \
--role-session-name "${aws_session_name}" --profile $
{aws_profile} --duration-seconds 14400 \
)

export AWS_ACCESS_KEY_ID=$(echo $temp_role1 |python2 -c "import sys, json; print


json.load(sys.stdin)['Credentials']['AccessKeyId']")
export AWS_SECRET_ACCESS_KEY=$(echo $temp_role1 |python2 -c "import sys, json;
print json.load(sys.stdin)['Credentials']['SecretAccessKey']")
export AWS_SESSION_TOKEN=$(echo $temp_role1 |python2 -c "import sys, json; print
json.load(sys.stdin)['Credentials']['SessionToken']")

if [ -z "$AWS_ACCESS_KEY_ID" ]
then
logIt "INFO" "AWS_ACCESS_KEY_ID not set" "" "" "" 0
read -s -p "s3 access key :" access_key
export AWS_ACCESS_KEY_ID=$access_key
else
logIt "INFO" "AWS_ACCESS_KEY_ID set" "" "" "" 0

fi
if [ -z "$AWS_SECRET_ACCESS_KEY" ]
then
logIt "INFO" "AWS_SECRET_ACCESS_KEY not set" "" "" "" 0
read -s -p "s3 secret key :" secret_key
export AWS_SECRET_ACCESS_KEY=$secret_key
else
logIt "INFO" "AWS_SECRET_ACCESS_KEY set" "" "" "" 0
fi
echo ${reset}
echo""

echo "INFO :------------------ AWS environment variables set.------------------"


logIt "INFO" "Script Run Date ===> " "" "$tRunDate" "" 0
if [ -f "$tableList" ]
then
while IFS='|' read -r table partition days noofdays
do
tablename=$table
srcTbleName=`echo $tablename | cut -f2 -d"."`
srcSchemaName=`echo $tablename | cut -f1 -d"."`
partitionname=$partition
bucket=$s3_bucket
srcSchemaName=`echo "$tablename" | cut -f1 -d"."`
srcTbleName=`echo "$tablename" | cut -f2 -d"."`

logIt "INFO" "Processing Started for Table $srcSchemaName.$srcTbleName"


"" "" "" 0
echo "INFO : Processing Started for Table $srcSchemaName.$srcTbleName"
if [ "network_process_date" = "$partition" ]
then
dayVal=$(( $days + $noofdays ))
echo " Dayval-==> $dayVal"
fromRunDate=`date -d "$fRunDate -$dayVal day" +%Y-%m-%d`
toRunDate=`date -d "$tRunDate -$days day" +%Y-%m-%d`

`rm $filterFileNm`
createFilterFile "$fromRunDate" "$toRunDate" "$partition" "%Y%m%d"

elif [ "process_month" = "$partition" ]


then
dayVal=$(( $days + $noofdays ))
echo " Dayval-==> $dayVal"
fromRunDate=`date -d "$fRunDate -$dayVal month" +%Y-%m-%d`
toRunDate=`date -d "$tRunDate -$days month" +%Y-%m-%d`
echo "fromDate==> $fromRunDate"
`rm $filterFileNm`
createFilterFile "$fromRunDate" "$toRunDate" "$partition" "%Y%m"

elif [ "cycl_strt_dt" = "$partition" ]


then
dayVal=$(( $days + $noofdays ))
echo " Dayval-==> $dayVal"
fromRunDate=`date -d "$fRunDate -$dayVal month" +%Y-%m-01`
toRunDate=`date -d "$tRunDate -$days month" +%Y-%m-01`
echo "fromDate==> $fromRunDate"
`rm $filterFileNm`
createFilterFile "$fromRunDate" "$toRunDate" "$partition" "%Y-%m-
%d"

elif [ "process_date" = "$partition" ]


then
dayVal=$(( $days + $noofdays ))
echo " Dayval-==> $dayVal"
fromRunDate=`date -d "$fRunDate -$dayVal day" +%Y-%m-%d`
toRunDate=`date -d "$tRunDate -$days day" +%Y-%m-%d`
echo "fromDate==> $fromRunDate"
`rm $filterFileNm`
createFilterFile "$fromRunDate" "$toRunDate" "$partition" "%Y%m%d"

elif [ "NA" = "$partition" ]


then
partitionname=""
elif [ "extract_date" = "$partition" ]
then
fromRunDate=`date -d "$fRunDate" +%Y-%m-%d`
toRunDate=`date -d "$tRunDate" +%Y-%m-%d`
`rm $filterFileNm`
createFilterFile "$fromRunDate" "$toRunDate" "$partition" "%Y-%m-
%d"

else
echo "ERROR : Wrong Partition Name $partition"
logIt "INFO" "Wrong Partition Name $partition " "" "" "" 0
exit 1
fi

echo "INFO : Table ==> $tablename and fromdateformat ==> $fromRunDate"


echo "INFO : Table ==> $tablename and dateformat ==> $toRunDate"
echo "INFO : Partitionname variable logic set."
uniqueSrcPath=()
parsedSrcPath=()
if [ "NA" != "$partition" ]
then
getPartSql="explain extended select * from $tablename;"
srcPath=`beeline -u $beeline_connection --hiveconf
mapred.job.queue.name=$yarn_queue -e "$getPartSql" 2>/dev/null | grep location |
grep -Ff $filterFileNm | tr -s ' '| cut -d" " -f3 | sort -u`
if [ $? -ne 0 ]; then
logIt "ERROR" "Retrieve table Location error " "" "$tablename"
"" 1
sendMail "Failed" "Retrieve table Location error ==>
$tablename"
cleantmp
exit 1
fi
IFS=$'\n'
srcPath=(`echo "$srcPath" | sed "s/[|]$/\\n/g"`)
for sqlList in "${srcPath[@]}"
do
# echo "$sqlList"
path1=${sqlList##*$partition}
path2=${path1%%/*}
partitionname=`echo $partition$path2`
pSrcPath=`echo ${sqlList%%$partition*}$partitionname | cut -
f3 -d" "`
parsedSrcPath+=($pSrcPath)
done
uniqueSrcPath=(`echo "${parsedSrcPath[@]}" | tr ' ' '\n' | sort -
u`)
else
getPartSql="describe formatted $tablename;"
srcPath=`beeline -u $beeline_connection --hiveconf
mapred.job.queue.name=$yarn_queue -e "$getPartSql" 2>/dev/null | grep Location: |
cut -f3 -d"|" | sort -u`
if [ $? -ne 0 ]; then
logIt "ERROR" "Retrieve table Location error " "" "$tablename"
"" 1
sendMail "Failed" "Retrieve table Location error ==>
$tablename"
cleantmp
exit 1
fi
uniqueSrcPath=($srcPath)
fi
echo "INFO : Unique Source Path List Created"
# echo "$uniqueSrcPath"
# echo "$uniqueSrcPath"

if [ "$srcPath" != "" ]
then
IFS=$'\n'
for sqlList in "${uniqueSrcPath[@]}"
do

if [ "NA" != "$partition" ]
then
# echo $sqlList
# echo $partition
# echo $tablename
path1=${sqlList##*$partition}
path2=${path1%%/*}
btwPath1=${sqlList##*$srcTbleName}
btwPath2=${btwPath1%%$partition*}
partitionname=`echo $partition$path2`
# echo $btwPath2
# echo $partitionname
# echo ${sqlList%%$partition*}$partitionname
parsedSrcPath=$sqlList
dest=`echo
"$bucket/$srcSchemaName/$srcTbleName/$partitionname"`

else
parsedSrcPath=$sqlList
dest=`echo "$bucket/$srcSchemaName/$srcTbleName"`
fi

echo "INFO : Destination ===> $dest"


echo "INFO : Source ====> $parsedSrcPath "

source=${parsedSrcPath//[[:space:]]}
dest=${dest//[[:space:]]}

logIt "INFO" "Source Path" "" "$source" "" 0


logIt "INFO" "Destination Path" "" "$dest" "" 0

echo "INFO :
$server_side_encryption_algorithm=$server_side_encryption_algorithm"
logIt "INFO" "$server_side_encryption_algorithm" ""
"$server_side_encryption_algorithm" "" 0
echo "INFO : $s3_endpoint=$s3_endpoint"
logIt "INFO" "$s3_endpoint" "" "$s3_endpoint" "" 0
echo "INFO : $s3a_fast_upload=$s3a_fast_upload"
logIt "INFO" "$s3a_fast_upload" "" "$s3a_fast_upload" "" 0

logIt "INFO" "BDA to S3 Upload Started..." "" "$tablename" "" 0


echo "INFO : BDA to S3 Upload Started...for table $tablename
and partition : $partitionname "
end_Time=$(( SECONDS - $start_Time ))
if [ $end_Time -ge $time_Diff ]
then
echo "INFO : Renew the AWS Account Credentials"
start_Time=$SECONDS
temp_role1=$(aws sts assume-role \
--role-arn "arn:aws:iam::$
{aws_account_number}:role/${aws_role_name}" \
--role-session-name "$
{aws_session_name}" --profile ${aws_profile} --duration-seconds 14400 \
)

export AWS_ACCESS_KEY_ID=$(echo $temp_role1 |python2 -c


"import sys, json; print json.load(sys.stdin)['Credentials']['AccessKeyId']")
export AWS_SECRET_ACCESS_KEY=$(echo $temp_role1 |python2 -c
"import sys, json; print json.load(sys.stdin)['Credentials']['SecretAccessKey']")
export AWS_SESSION_TOKEN=$(echo $temp_role1 |python2 -c
"import sys, json; print json.load(sys.stdin)['Credentials']['SessionToken']")

if [ -z "$AWS_ACCESS_KEY_ID" ]
then
logIt "INFO" "AWS_ACCESS_KEY_ID not set" "" "" "" 0
read -s -p "s3 access key :" access_key
export AWS_ACCESS_KEY_ID=$access_key
else
logIt "INFO" "AWS_ACCESS_KEY_ID set again" "" "" ""
0

fi
if [ -z "$AWS_SECRET_ACCESS_KEY" ]
then
logIt "INFO" "AWS_SECRET_ACCESS_KEY not set" "" ""
"" 0
read -s -p "s3 secret key :" secret_key
export AWS_SECRET_ACCESS_KEY=$secret_key
else
logIt "INFO" "AWS_SECRET_ACCESS_KEY set again" ""
"" "" 0
fi
fi
hadoop distcp -Dmapreduce.job.queuename=$yarn_queue \
-Dfs.s3a.server-side-encryption-
algorithm=$server_side_encryption_algorithm \
-Dfs.s3a.endpoint=$s3_endpoint \
-Dfs.s3a.fast.upload=$s3a_fast_upload \
-Dfs.s3a.buffer.dir=$s3a_buffer_dir \
-Dfs.s3a.multipart.uploads.enabled=$multipart_uploads_enabled
\
-Dfs.s3a.access.key=$AWS_ACCESS_KEY_ID \
-Dfs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY \
-
Dfs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsPr
ovider \
-Dfs.s3a.session.token=$AWS_SESSION_TOKEN \
-Dfs.s3a.proxy.host=$s3a_proxy_host \
-Dfs.s3a.proxy.port=$s3a_proxy_port \
-m 150 \
-update \
-skipcrccheck \
-numListstatusThreads 40 \
$source s3a://$dest 2>/dev/null
if [ $? -ne 0 ]; then
logIt "ERROR" "BDA to S3 Upload Distcp Command Failed" ""
"$tablename" "" 1
sendMail "Failed" "BDA to S3 Upload Distcp Command Failed
==> $tablename"
cleantmp
exit 1
else
logIt "INFO" "BDA to S3 Upload Finished Successfully. " ""
"$tablename" "" 0
fi
echo "INFO : BDA to S3 Upload Finished...for table $tablename
and partition : $partitionname"

done
logIt "INFO" "POST BDA Processing Started... " "" "$tablename"
"" 0
rulesEngine=$(getPrePostRules)
IFS=$'\n'
rulesEngine=(`echo "$rulesEngine" | sed "s/[|]$/\\n/g"`)
for rule in "${rulesEngine[@]}"
do
load_date=$(date +%Y-%m-%d)
runTime=$(date +%Y%m%d%H%M%S)

sql=`echo "$rule"| cut -f2 -d"|" | sed "s/{toRunDate}/"$


{toRunDate}"/"g | sed "s/{fromRunDate}/"${fromRunDate}"/"g `

#echo "$sql"

reconSyncFlag="1"

postCheckResult=$(beeline -u $beeline_connection --hiveconf


mapred.job.queue.name=$yarn_queue -e "$sql" 2>/dev/null )
if [ $? -ne 0 ]; then
logIt "ERROR" "Recon SQL for Table $tablename Failed"
"" "$sql" "" 1
sendMail "Failed" "Recon SQL for Table $tablename
Failed ==> $sql"
cleantmp
exit 1
fi

rows=`echo "$postCheckResult" | sed -n '/+------/!p' | tail


-n +2 `
hdr=`echo "$postCheckResult" | grep -A 3 "^[+][-+]*[+]$" |
head -n 2 | tail -n -1`
colCnt=`echo "$hdr" | tr -cd '|' | wc -c`
rowCnt=`echo "$rows" | wc -l`

IFS=$'\n'
rows=(`echo "$rows" | sed "s/[|]$/\\n/g"`)

if [ "NA" != "$partition" ]
then
for row in "${rows[@]}"
do
# echo "Row===> $row"
bda_partition=`echo $row | cut -f2 -d"|"`
bda_partition_val=`echo $row | cut -f3 -d"|"`
for ((i=3;i<colCnt;i++))
do

val=$(( i + 1 ))
Matrix_name=`echo $hdr
| cut -f$val -d"|"`
Matrix_value=`echo $row
| cut -f$val -d"|" | tr -d " "`
# echo "Value ==>
$Matrix_value"
len=`echo $Matrix_value
| tr -d " "| wc -m`
# echo " Length ==>
$len"
if [ $Matrix_value =
"0E-18" ]
then

Matrix_value="0.000000000000000000"
fi
# echo "Matrix_name==>
`echo $hdr | cut -f$val -d"|"` Matrix_value ==> `echo $rows | cut -f$val -d"|"`"
# logIt "DEBUG"
"Matrix_name==> ${Matrix_name//[[:space:]]}" "" "${Matrix_value//[[:space:]]}" "" 0
# echo "|$bda_partition|
$bda_partition_val|${Matrix_name//[[:space:]]}|${Matrix_value//[[:space:]]}"
printf "$runTime|
$tablename|$bda_partition|$bda_partition_val|${Matrix_name//[[:space:]]}|$
{Matrix_value//[[:space:]]}|$load_date\n" | hdfs dfs -appendToFile - $appReconFile
done

done

else
bda_partition=""
bda_partition_val=""
for ((i=1;i<colCnt;i++))
do
val=$(( i + 1 ))
Matrix_name=`echo $hdr | cut -f$val -d"|"`
Matrix_value=`echo $rows | cut -f$val -d"|"`
# echo "Matrix_name==> `echo $hdr | cut -f$val -
d"|"` Matrix_value ==> `echo $rows | cut -f$val -d"|"`"
logIt "DEBUG" "Matrix_name==>
${Matrix_name//[[:space:]]}" "" "${Matrix_value//[[:space:]]}" "" 0
printf "$runTime|$tablename|$bda_partition|
$bda_partition_val|${Matrix_name//[[:space:]]}|${Matrix_value//[[:space:]]}|
$load_date\n" | hdfs dfs -appendToFile - $appReconFile
done

fi

done

logIt "INFO" "POST BDA Processing Finished. " "" "$tablename" "" 0
else
echo "INFO : ------------------No data for table ==> $tablename
------------------ "
logIt "INFO" "No data for table. " "" "$tablename" "" 0
fi

done < $tableList


echo "$
{green}############################################################################
##############"
else
echo "${red} ERROR: $tableList not found."
logIt "ERROR" "ERROR: $tableList not found" "" "$tableList" "" 1
sendMail "Failed" "$scriptName $tableList not found"
exit 1
fi
echo "INFO : ------------------BDA to S3 data load finished------------------ "
logIt "INFO" "BDA to S3 data load finished " "" "" "" 0

$(beeline -u $beeline_connection --hiveconf mapred.job.queue.name=$yarn_queue -e


"MSCK REPAIR TABLE $tbl_recon;" 2>/dev/null)
if [ $? -ne 0 ]; then
logIt "INFO" "MSCK REPAIR Recon table failed " "" "" "" 0
else
logIt "INFO" "MSCK REPAIR Recon table Successful" "" "" "" 0

fi

if [ "$reconSyncFlag" = "1" ]
then
echo "INFO : Load Recon data to S3 Started..."
logIt "INFO" "Load Recon data to S3 Started..." "" "" "" 0

srcSchemaName=`echo "$tbl_recon" | cut -f1 -d"."`


srcTbleName=`echo "$tbl_recon" | cut -f2 -d"."`
partitionname=$recon_dir"="$load_date
reconSrcPath=`echo $partitionname `
reconDestPath=`echo
"$bucket/$srcSchemaName/$srcTbleName/extract_date=$load_date"`

hadoop distcp -Dmapreduce.job.queuename=$yarn_queue \


-Dfs.s3a.server-side-encryption-
algorithm=$server_side_encryption_algorithm \
-Dfs.s3a.endpoint=$s3_endpoint \
-Dfs.s3a.fast.upload=$s3a_fast_upload \
-Dfs.s3a.buffer.dir=$s3a_buffer_dir \
-Dfs.s3a.multipart.uploads.enabled=$multipart_uploads_enabled \
-Dfs.s3a.access.key=$AWS_ACCESS_KEY_ID \
-Dfs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY \
-
Dfs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsPr
ovider \
-Dfs.s3a.session.token=$AWS_SESSION_TOKEN \
-Dfs.s3a.proxy.host=$s3a_proxy_host \
-Dfs.s3a.proxy.port=$s3a_proxy_port \
-m 100 \
-update \
-skipcrccheck \
-numListstatusThreads 40 \
$reconSrcPath s3a://$reconDestPath 2>/dev/null
if [ $? -ne 0 ]; then
logIt "ERROR" "BDA to S3 Upload Recon data Distcp Command Failed" ""
"$tbl_recon" "" 1
sendMail "Failed" "$scriptName Failed loading Recon $tbl_recon table data
to S3"x
cleantmp
exit 1
else
logIt "INFO" "BDA to S3 Upload Recon Data Finished Successfully. " ""
"$tbl_recon" "" 0
fi
echo "INFO : Load Recon data to S3 Finished."
logIt "INFO" "Load Recon data to S3 Finished." "" "" "" 0
else
echo "INFO : No data to Sync for Recon data to S3."
logIt "INFO" "INFO: No data to Sync for Recon data to S3." "" "" "" 0
fi

$(beeline -u $beeline_connection --hiveconf mapred.job.queue.name=$yarn_queue -e


"MSCK REPAIR TABLE $tbl_log;" 2>/dev/null)
if [ $? -ne 0 ]; then
logIt "INFO" "MSCK REPAIR Log table failed" "" "" "" 0
else
logIt "INFO" "MSCK REPAIR Log table Successful" "" "" "" 0
fi
sendMail "Successful" "$scriptName successfully loaded data to S3 Bucket: $bucket"
cleantmp
echo ${reset}
echo""

You might also like