准备工作
- 查看ubuntu版本
lsb_release -a
- 配置国内下载镜像源 link
sudo nano /etc/apt/sources.list
- 更新
apt
sudo apt update
SSH 无密码登录
安装
- ssh = secure shell
sudo apt install openssh-server
sudo service ssh restart
测试
which ssh
which sshd
产生 SSH key
生成 public/private key pair
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
- t = type 密钥类型
- rsa, Rivest–Shamir–Adleman => factorization of large integers
- dsa, Digital Signature Algorithm => discrete logarithm problem
- P = passphrase
- f = filename
将产生的 key 放置授权文件
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
指令 >> 文件
- 会将指令产生的标准输出(stdout),附加在文件之后
- 文件不存在时,会新建
- 当文件已经存在,会将stdout附加在文件内容后面,而不会覆盖
测试【不成功则尝试重启 sudo service ssh restart
】
ssh localhost
安装 JDK
- 查看Java版本
java -version
- Linux使用apt进行套件管理
- apt = advance package tool
- sudo命令以系统管理者的身份执行指令,也就是说,经由 sudo 所执行的指令就好像是 root 亲自执行
sudo apt install default-jdk
下载安装 Hadoop
下载binary
wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-3.1.2/hadoop-3.1.2.tar.gz
解压缩
sudo tar -zxvf hadoop-3.1.2.tar.gz
- z = filter file thru gzip
- x = extract files from an archive
- v = verbosely list files processed
- f = filename
移动
sudo mv hadoop-3.1.2 /usr/local/hadoop
查看
ll /usr/local/hadoop
bin/
执行文件sbin/
shell执行文件etc/hadoop
设定文件lib/
函数库logs/
系统日志
设定 Hadoop 参数
编辑 .bashrc
sudo nano .bashrc
#Hadoop variables
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_COMMON_LIB_NATIVE_DIR"
export
JAVA_LIBRARY_PATH=HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
#Hadoop variables
生效设定
source .bashrc
测试
echo $HADOOP_HOME
测试
hadoop version
Hadoop组件设定
hadoop-env.sh
core-site.xml
yarn-site.xml
mapred-site.xml
hdfs-site.xml
sudo nano /usr/local/hadoop/etc/hadoop/hadoop-env.sh
设定Java安装路径
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
sudo nano /usr/local/hadoop/etc/hadoop/core-site.xml
hdfs预设名称
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
sudo nano /usr/local/hadoop/etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
mapred-site.xml
用于监控map和reduce程序的JobTracker工作分配状况、以及TaskTracker工作执行状况
设定mapreduce框架为yarn
sudo nano /usr/local/hadoop/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
hdfs-site.xml
用于设定HDFS分散式档案系统
sudo nano /usr/local/hadoop/etc/hadoop/hdfs-site.xml
- 设定blocks副本备份数(每一个文件在其他node的备份数量)
- 设定NameNode资料存储目录
- 设定DataNode资料存储目录
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop/hadoop_data/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop/hadoop_data/hdfs/datanode</value>
</property>
<property>
<name>dfs.http.address</name>
<value>127.0.0.1:50070</value>
</property>
</configuration>
建立与格式化HDFS目录
- 建立NameNode以及DataNode文件存储目录
sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/namenode
sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode
- 更改hadoop文件夹拥有者
sudo chown <username>:root -R /usr/local/hadoop
-R
= recursively格式化NameNode
hadoop namenode -format
启动Hadoop
启动 HDFS
start-dfs.sh
查看HDFS web界面:http://localhost:8088
启动 YARN
start-yarn.sh
查看Resource Manager web界面:http://localhost:8088
查看已经启动的进程
jps
HDFS
功能:NameNode
,SecondaryNameNode
,DataNode
都已启动YARN
功能:ResouceManager
,NodeManager
都已启动
关闭
stop-all.sh