Hadoop Installation
Hadoop Installation
/* Attention! */
/* Reverse DNS lookup must be setup properly. */
/* Configuration should be done on namenode. */
/* Otherwise, datanode cannot connect to namenode. */
/* Alternative way is to setup an DNS server. */
/***************************************************/
/**
* add new user and configure sudo
*/
adduser hadoopuser &&
apt install sudo &&
visudo &&
clear
/**
* alternative way to use sudo
*/
usermod -aG sudo hadoopuser
/**
* ssh login without password
*/
ssh-keygen -t rsa
/* cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys */
/**
* Login to other machine without password
*/
ssh-copy-id [email protected] &&
ssh-copy-id [email protected] &&
ssh-copy-id [email protected] &&
ssh-copy-id [email protected] &&
ssh-copy-id [email protected]
sudo su
/**
* Install Java Runtime Environment and Java Development Kits
* Then download hadoop compress file, and decompress it to /opt.
*/
apt update && apt install default-jre default-jdk -y &&
cd /opt &&
# wget http://ftp.mirror.tw/pub/apache/hadoop/common/hadoop-3.3.5/hadoop-
3.3.5.tar.gz &&
wget wget http://truenas.lan:8080/FTP/hadoop-3.3.5.tar.gz &&
tar -zxvf hadoop-3.3.5.tar.gz &&
rm hadoop-3.3.5.tar.gz &&
cd hadoop-3.3.5/
/**
* Create .bashrc for environment variable.
*/
echo "export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64" > .bashrc
echo "export HADOOP_HOME=/opt/hadoop-3.3.5" >> .bashrc
echo "export PATH=\$PATH:\$HADOOP_HOME/bin" >> .bashrc
echo "export PATH=\$PATH:\$HADOOP_HOME/sbin" >> .bashrc
echo "export HADOOP_MAPRED_HOME=\$HADOOP_HOME" >> .bashrc
echo "export HADOOP_COMMON_HOME=\$HADOOP_HOME" >> .bashrc
echo "export HADOOP_HDFS_HOME=\$HADOOP_HOME" >> .bashrc
echo "export YARN_HOME=\$HADOOP_HOME" >> .bashrc
echo "export HADOOP_COMMON_LIB_NATIVE_DIR=\$HADOOP_HOME/lib/native" >> .bashrc
echo "export HADOOP_OPTS=\"-Djava.library.path=\$HADOOP_HOME/lib\"" >> .bashrc
/**
* Load .bashrc
*/
source .bashrc
/**
* Craete namenode and datanode data storage path.
*/
mkdir hdfs
mkdir hdfs/datanode
mkdir hdfs/namenode
chown hadoopuser -R ./
nano $HADOOP_HOME/etc/hadoop/hadoop-env.sh
nano $HADOOP_HOME/etc/hadoop/core-site.xml
nano $HADOOP_HOME/etc/hadoop/hdfs-site.xml
nano $HADOOP_HOME/etc/hadoop/workers
/**
* Copy config to other machines
*/
scp /opt/hadoop-3.3.5/etc/hadoop/*
[email protected]:/opt/hadoop-3.3.5/etc/hadoop/ &&
scp /opt/hadoop-3.3.5/etc/hadoop/*
[email protected]:/opt/hadoop-3.3.5/etc/hadoop/ &&
scp /opt/hadoop-3.3.5/etc/hadoop/*
[email protected]:/opt/hadoop-3.3.5/etc/hadoop/ &&
scp /opt/hadoop-3.3.5/etc/hadoop/*
[email protected]:/opt/hadoop-3.3.5/etc/hadoop/
chown hadoopuser -R ./
hdfs namenode -format
start-all.sh
###########################################################
/**
* Add environment variable to $HADOOP_HOME/etc/hadoop/hadoop-env.sh
*/
export HDFS_NAMENODE_USER=hadoopuser
export HDFS_DATANODE_USER=hadoopuser
export HDFS_SECONDARYNAMENODE_USER=hadoopuser
export YARN_RESOURCEMANAGER_USER=hadoopuser
export YARN_NODEMANAGER_USER=hadoopuser
export JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"
######################################
/**
* Set HDFS to $HADOOP_HOME/etc/hadoop/core-site.xml
*/
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master.lan:9000</value>
</property>
</configuration>
######################################
/**
* Set namenode and datanode config to $HADOOP_HOME/etc/hadoop/hdfs-site.xml
*/
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///opt/hadoop-3.3.5/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///opt/hadoop-3.3.5/hdfs/datanode</value>
</property>
</configuration>
######################################
/**
* Authnticate hostname for Reverse DNS lookup.
* Located at $HADOOP_HOME/etc/hadoop/workers
*/
hadoop-master.lan
hadoop-slave1.lan
hadoop-slave2.lan
hadoop-slave3.lan
hadoop-slave4.lan
###########################################################