Hadoop Installation 2
Hadoop Installation 2
/* Attention! */
/* Reverse DNS lookup must be setup properly. */
/* Configuration should be done on namenode. */
/* Otherwise, datanode cannot connect to namenode. */
/* Alternative way is to setup an DNS server. */
/* Without HA, port is 9870 */
/* With HA, port is 50070 */
/***************************************************/
/**
* add new user and configure sudo
*/
adduser hadoopuser &&
apt install sudo &&
visudo &&
clear
/**
* alternative way to use sudo
*/
usermod -aG sudo hadoopuser
/**
* ssh login without password
*/
ssh-keygen -t rsa
/* cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys */
/**
* Login to other machine without password
*/
ssh-copy-id hadoopuser@hadoop-master-1
ssh-copy-id hadoopuser@hadoop-slave-1
ssh-copy-id hadoopuser@hadoop-slave-2
ssh-copy-id hadoopuser@hadoop-slave-3
ssh-copy-id hadoopuser@hadoop-slave-4
ssh-copy-id hadoopuser@hadoop-master-2
sudo su
/**
* Install Java Runtime Environment and Java Development Kits
* Then download hadoop compress file, and decompress it to /opt.
*/
sudo apt update && sudo apt install default-jre default-jdk -y
cd /opt
# wget http://ftp.mirror.tw/pub/apache/hadoop/common/hadoop-3.3.5/hadoop-
3.3.5.tar.gz &&
sudo wget http://192.168.0.11:8080/FTP/hadoop-3.3.5.tar.gz
sudo tar -zxvf hadoop-3.3.5.tar.gz
sudo rm hadoop-3.3.5.tar.gz
cd hadoop-3.3.5/
/**
* Create .bashrc for environment variable.
*/
echo "export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64" > .bashrc
echo "export HADOOP_HOME=/opt/hadoop-3.3.5" >> .bashrc
echo "export PATH=\$PATH:\$HADOOP_HOME/bin" >> .bashrc
echo "export PATH=\$PATH:\$HADOOP_HOME/sbin" >> .bashrc
echo "export HADOOP_MAPRED_HOME=\$HADOOP_HOME" >> .bashrc
echo "export HADOOP_COMMON_HOME=\$HADOOP_HOME" >> .bashrc
echo "export HADOOP_HDFS_HOME=\$HADOOP_HOME" >> .bashrc
echo "export YARN_HOME=\$HADOOP_HOME" >> .bashrc
echo "export HADOOP_COMMON_LIB_NATIVE_DIR=\$HADOOP_HOME/lib/native" >> .bashrc
echo "export HADOOP_OPTS=\"-Djava.library.path=\$HADOOP_HOME/lib\"" >> .bashrc
/**
* Load .bashrc
*/
source .bashrc
/**
* Craete namenode and datanode data storage path.
*/
cd /opt/hadoop-3.3.5
mkdir hdfs
mkdir hdfs/journalnode
mkdir hdfs/datanode
mkdir hdfs/namenode
chown hadoopuser -R ./
nano $HADOOP_HOME/etc/hadoop/hadoop-env.sh
nano $HADOOP_HOME/etc/hadoop/core-site.xml
nano $HADOOP_HOME/etc/hadoop/hdfs-site.xml
nano $HADOOP_HOME/etc/hadoop/workers
/**
* Copy config to other machines.
* Only one node needs to use these command.
*/
scp /opt/hadoop-3.3.5/etc/hadoop/* [email protected]:/opt/hadoop-
3.3.5/etc/hadoop/
scp /opt/hadoop-3.3.5/etc/hadoop/* [email protected]:/opt/hadoop-
3.3.5/etc/hadoop/
scp /opt/hadoop-3.3.5/etc/hadoop/* [email protected]:/opt/hadoop-
3.3.5/etc/hadoop/
scp /opt/hadoop-3.3.5/etc/hadoop/* [email protected]:/opt/hadoop-
3.3.5/etc/hadoop/
scp /opt/hadoop-3.3.5/etc/hadoop/* [email protected]:/opt/hadoop-
3.3.5/etc/hadoop/
/**
* Start
*/
chown hadoopuser -R ./
hdfs namenode -format
start-all.sh
## High Availbility ##
/**
* Run this command on all nodes which would run journalnode on.
*/
source /opt/hadoop-3.3.5/.bashrc && hdfs --daemon start journalnode
/**
* Synchronized namenode's state to journalnode
* And
*/
hdfs namenode -initializeSharedEdits
scp /opt/hadoop-3.3.5/hdfs/namenode/* hadoopuser@hadoop-master-2:/opt/hadoop-
3.3.5/hdfs/namenode/
/**
* Initialized ZooKeeper Failover Controller for High Avalibility
*/
hdfs zkfc -formatZK
###########################################################
/**
* Add environment variable to $HADOOP_HOME/etc/hadoop/hadoop-env.sh
*/
export HDFS_NAMENODE_USER=hadoopuser
export HDFS_DATANODE_USER=hadoopuser
export HDFS_SECONDARYNAMENODE_USER=hadoopuser
export YARN_RESOURCEMANAGER_USER=hadoopuser
export YARN_NODEMANAGER_USER=hadoopuser
export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"
######################################
/**
* Set HDFS to $HADOOP_HOME/etc/hadoop/core-site.xml
*/
<configuration>
<!-- Access to master namenode -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master-1.lan:9000</value>
</property>
<!-- Enable Cross Origin access
<property>
<name>hadoop.http.cross-origin-enabled</name>
<value>true</value>
</property>
-->
<!-- Make WebUI can write file to HDFS -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>hadoopuser</value>
</property>
<!-- Prevent CORS from blocking upload files -->
<property>
<name>hadoop.http.corss-origin.allowed-origins</name>
<value>hadoop-master.lan</value>
</property>
</configuration>
######################################
/**
* Set namenode and datanode config to $HADOOP_HOME/etc/hadoop/hdfs-site.xml
*/
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///opt/hadoop-3.3.5/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///opt/hadoop-3.3.5/hdfs/datanode</value>
</property>
<!-- If want to switch to HA, just add the below line -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop-master-1:8485;hadoop-master-2:8485;hadoop-slave-
1:8485;hadoop-slave-2:8485;hadoop-slave-3:8485;hadoop-slave-4:8485;/mycluster</
value>
</property>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</
value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop-master-1:2181,hadoop-master-2:2181</value>
</property>
</configuration>
######################################
/**
* Authnticate hostname for Reverse DNS lookup.
* Located at $HADOOP_HOME/etc/hadoop/workers
*/
hadoop-master-1.lan
hadoop-master-2.lan
hadoop-slave1.lan
hadoop-slave2.lan
hadoop-slave3.lan
hadoop-slave4.lan
###########################################################