0% found this document useful (0 votes)
31 views5 pages

Hadoop Installation 2

The document provides instructions for setting up a Hadoop cluster with high availability (HA) using multiple nodes. It describes steps such as installing Java and Hadoop, configuring environment variables, setting up HDFS and YARN, modifying configuration files, starting services, and enabling HA features like the journalnode and Zookeeper. The goal is to configure a resilient Hadoop cluster that can handle failures through automatic failover between redundant NameNodes and JobTrackers.

Uploaded by

吳欣祐
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
31 views5 pages

Hadoop Installation 2

The document provides instructions for setting up a Hadoop cluster with high availability (HA) using multiple nodes. It describes steps such as installing Java and Hadoop, configuring environment variables, setting up HDFS and YARN, modifying configuration files, starting services, and enabling HA features like the journalnode and Zookeeper. The goal is to configure a resilient Hadoop cluster that can handle failures through automatic failover between redundant NameNodes and JobTrackers.

Uploaded by

吳欣祐
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 5

/***************************************************/

/* Attention! */
/* Reverse DNS lookup must be setup properly. */
/* Configuration should be done on namenode. */
/* Otherwise, datanode cannot connect to namenode. */
/* Alternative way is to setup an DNS server. */
/* Without HA, port is 9870 */
/* With HA, port is 50070 */
/***************************************************/

/**
* add new user and configure sudo
*/
adduser hadoopuser &&
apt install sudo &&
visudo &&
clear

/**
* alternative way to use sudo
*/
usermod -aG sudo hadoopuser

/**
* ssh login without password
*/
ssh-keygen -t rsa

/* cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys */

/**
* Login to other machine without password
*/
ssh-copy-id hadoopuser@hadoop-master-1
ssh-copy-id hadoopuser@hadoop-slave-1
ssh-copy-id hadoopuser@hadoop-slave-2
ssh-copy-id hadoopuser@hadoop-slave-3
ssh-copy-id hadoopuser@hadoop-slave-4
ssh-copy-id hadoopuser@hadoop-master-2

sudo su

/**
* Install Java Runtime Environment and Java Development Kits
* Then download hadoop compress file, and decompress it to /opt.
*/
sudo apt update && sudo apt install default-jre default-jdk -y
cd /opt
# wget http://ftp.mirror.tw/pub/apache/hadoop/common/hadoop-3.3.5/hadoop-
3.3.5.tar.gz &&
sudo wget http://192.168.0.11:8080/FTP/hadoop-3.3.5.tar.gz
sudo tar -zxvf hadoop-3.3.5.tar.gz
sudo rm hadoop-3.3.5.tar.gz
cd hadoop-3.3.5/

/**
* Create .bashrc for environment variable.
*/
echo "export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64" > .bashrc
echo "export HADOOP_HOME=/opt/hadoop-3.3.5" >> .bashrc
echo "export PATH=\$PATH:\$HADOOP_HOME/bin" >> .bashrc
echo "export PATH=\$PATH:\$HADOOP_HOME/sbin" >> .bashrc
echo "export HADOOP_MAPRED_HOME=\$HADOOP_HOME" >> .bashrc
echo "export HADOOP_COMMON_HOME=\$HADOOP_HOME" >> .bashrc
echo "export HADOOP_HDFS_HOME=\$HADOOP_HOME" >> .bashrc
echo "export YARN_HOME=\$HADOOP_HOME" >> .bashrc
echo "export HADOOP_COMMON_LIB_NATIVE_DIR=\$HADOOP_HOME/lib/native" >> .bashrc
echo "export HADOOP_OPTS=\"-Djava.library.path=\$HADOOP_HOME/lib\"" >> .bashrc

/**
* Load .bashrc
*/
source .bashrc

/**
* Craete namenode and datanode data storage path.
*/
cd /opt/hadoop-3.3.5
mkdir hdfs
mkdir hdfs/journalnode
mkdir hdfs/datanode
mkdir hdfs/namenode
chown hadoopuser -R ./

## before modify configuration!!! ##

nano $HADOOP_HOME/etc/hadoop/hadoop-env.sh
nano $HADOOP_HOME/etc/hadoop/core-site.xml
nano $HADOOP_HOME/etc/hadoop/hdfs-site.xml
nano $HADOOP_HOME/etc/hadoop/workers

## after modify configuration!!! ##

/**
* Copy config to other machines.
* Only one node needs to use these command.
*/
scp /opt/hadoop-3.3.5/etc/hadoop/* [email protected]:/opt/hadoop-
3.3.5/etc/hadoop/
scp /opt/hadoop-3.3.5/etc/hadoop/* [email protected]:/opt/hadoop-
3.3.5/etc/hadoop/
scp /opt/hadoop-3.3.5/etc/hadoop/* [email protected]:/opt/hadoop-
3.3.5/etc/hadoop/
scp /opt/hadoop-3.3.5/etc/hadoop/* [email protected]:/opt/hadoop-
3.3.5/etc/hadoop/
scp /opt/hadoop-3.3.5/etc/hadoop/* [email protected]:/opt/hadoop-
3.3.5/etc/hadoop/

/**
* Start
*/
chown hadoopuser -R ./
hdfs namenode -format
start-all.sh

## High Availbility ##

/**
* Run this command on all nodes which would run journalnode on.
*/
source /opt/hadoop-3.3.5/.bashrc && hdfs --daemon start journalnode

/**
* Synchronized namenode's state to journalnode
* And
*/
hdfs namenode -initializeSharedEdits
scp /opt/hadoop-3.3.5/hdfs/namenode/* hadoopuser@hadoop-master-2:/opt/hadoop-
3.3.5/hdfs/namenode/

/**
* Initialized ZooKeeper Failover Controller for High Avalibility
*/
hdfs zkfc -formatZK

###########################################################

/**
* Add environment variable to $HADOOP_HOME/etc/hadoop/hadoop-env.sh
*/

export HDFS_NAMENODE_USER=hadoopuser
export HDFS_DATANODE_USER=hadoopuser
export HDFS_SECONDARYNAMENODE_USER=hadoopuser
export YARN_RESOURCEMANAGER_USER=hadoopuser
export YARN_NODEMANAGER_USER=hadoopuser
export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"

######################################

/**
* Set HDFS to $HADOOP_HOME/etc/hadoop/core-site.xml
*/

<configuration>
<!-- Access to master namenode -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master-1.lan:9000</value>
</property>
<!-- Enable Cross Origin access
<property>
<name>hadoop.http.cross-origin-enabled</name>
<value>true</value>
</property>
-->
<!-- Make WebUI can write file to HDFS -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>hadoopuser</value>
</property>
<!-- Prevent CORS from blocking upload files -->
<property>
<name>hadoop.http.corss-origin.allowed-origins</name>
<value>hadoop-master.lan</value>
</property>
</configuration>

######################################

/**
* Set namenode and datanode config to $HADOOP_HOME/etc/hadoop/hdfs-site.xml
*/

<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///opt/hadoop-3.3.5/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///opt/hadoop-3.3.5/hdfs/datanode</value>
</property>

<!-- If want to switch to HA, just add the below line -->

<!-- Configure High Avalibility-->


<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>

<!-- masters' RPC addr-->


<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>hadoop-master-1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>hadoop-master-2:8020</value>
</property>

<!-- masters' http addr -->


<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>hadoop-master-1:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>hadoop-master-2:50070</value>
</property>

<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop-master-1:8485;hadoop-master-2:8485;hadoop-slave-
1:8485;hadoop-slave-2:8485;hadoop-slave-3:8485;hadoop-slave-4:8485;/mycluster</
value>
</property>

<!-- isoloation to determine only one server is active -->


<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoopuser/.ssh/id_rsa</value>
</property>

<!-- Failover settings -->


<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>

<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</
value>
</property>

<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>

<!-- journalnode data dir -->


<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/hadoop-3.3.5/hdfs/journalnode</value>
</property>

<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop-master-1:2181,hadoop-master-2:2181</value>
</property>

</configuration>

######################################

/**
* Authnticate hostname for Reverse DNS lookup.
* Located at $HADOOP_HOME/etc/hadoop/workers
*/

hadoop-master-1.lan
hadoop-master-2.lan
hadoop-slave1.lan
hadoop-slave2.lan
hadoop-slave3.lan
hadoop-slave4.lan

###########################################################

You might also like