Skip to content

Latest commit

 

History

History
122 lines (91 loc) · 3.25 KB

Hadoop Installation.md

File metadata and controls

122 lines (91 loc) · 3.25 KB

sudo apt-get update

sudo apt-get install default-jdk java --version

sudo addgroup hadoop sudo adduser --ingroup hadoop hduser //password 1234

sudo apt-get install ssh

sudo adduser hduser sudo

su hduser

ssh-keygen -t rsa -P ""

cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys

cd /

sudo mkdir usr/local/hadoop

wget https://downloads.apache.org/hadoop/common/hadoop-3.3.0/hadoop-3.3.0.tar.gz

sudo cp -r hadoop-3.3.0.tar.gz /usr/local/hadoop/

sudo chown -R hduser:hadoop /usr/local/hadoop

sudo nano ~/.bashrc

//Add the following content at the end of the bashrc file

#HADOOP VARIABLES START export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 export HADOOP_INSTALL=/usr/local/hadoop export PATH=$PATH:$HADOOP_INSTALL/bin export PATH=$PATH:/usr/local/hadoop/bin/ export PATH=$PATH:$HADOOP_INSTALL/sbin export HADOOP_MAPRED_HOME=$HADOOP_INSTALL export HADOOP_COMMON_HOME=$HADOOP_INSTALL export HADOOP_HDFS_HOME=$HADOOP_INSTALL export YARN_HOME=$HADOOP_INSTALL export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_INSTALL/lib/native export HADOOP_OPTS="-Djava.library.path=$HADOOP_INSTALL/lib" #HADOOP VARIABLES END

tar xzf hadoop-3.3.0.tar.gz sudo mv hadoop-3.3.0/* /usr/local/hadoop/ sudo nano /usr/local/hadoop/etc/hadoop/hadoop-env.sh //Add the following content at the end export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64

sudo mkdir -p /app/hadoop/tmp sudo chown hduser:hadoop /app/hadoop/tmp

sudo gedit /usr/local/hadoop/etc/hadoop/core-site.xml

//Add the following content at the end

hadoop.tmp.dir /app/hadoop/tmp A base for other temporary directories. fs.default.name hdfs://localhost:54310 The name of the default file system. A URI whose scheme and authority determine the FileSystem implementation. The uri's scheme determines the config property (fs.SCHEME.impl) naming the FileSystem implementation class. The uri's authority is used to determine the host, port, etc. for a filesystem.

sudo gedit /usr/local/hadoop/etc/hadoop/mapred-site.xml

//Add the following content at the end

mapred.job.tracker localhost:54311 The host and port that the MapReduce job tracker runs at. If "local", then jobs are run in-process as a single map and reduce task.

sudo mkdir -p /usr/local/hadoop_store/hdfs/namenode sudo mkdir -p /usr/local/hadoop_store/hdfs/datanode sudo chown -R hduser:hadoop /usr/local/hadoop_store sudo gedit /usr/local/hadoop/etc/hadoop/hdfs-site.xml

Add the following content at the end

dfs.replication 1 Default block replication. The actual number of replications can be specified when the file is created. The default is used if replication is not specified in create time. dfs.namenode.name.dir file:/usr/local/hadoop_store/hdfs/namenode dfs.datanode.data.dir file:/usr/local/hadoop_store/hdfs/datanode

hadoop namenode -format start-dfs.sh start-yarn.sh jps