准备工作
- 查看ubuntu版本
 
lsb_release -a
- 配置国内下载镜像源 link
 
sudo nano /etc/apt/sources.list
- 更新
apt 
sudo apt update
SSH 无密码登录
安装
- ssh = secure shell
 
sudo apt install openssh-server
sudo service ssh restart
测试
which ssh
which sshd
产生 SSH key
生成 public/private key pair
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
- t = type 密钥类型
 - rsa, Rivest–Shamir–Adleman => factorization of large integers
 - dsa, Digital Signature Algorithm => discrete logarithm problem
 - P = passphrase
 - f = filename
 
将产生的 key 放置授权文件
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
指令 >> 文件
- 会将指令产生的标准输出(stdout),附加在文件之后
- 文件不存在时,会新建
 - 当文件已经存在,会将stdout附加在文件内容后面,而不会覆盖
 
 
测试【不成功则尝试重启 sudo service ssh restart】
ssh localhost
安装 JDK
- 查看Java版本
 
java -version
- Linux使用apt进行套件管理
 - apt = advance package tool
 - sudo命令以系统管理者的身份执行指令,也就是说,经由 sudo 所执行的指令就好像是 root 亲自执行
 
sudo apt install default-jdk
下载安装 Hadoop
下载binary
wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-3.1.2/hadoop-3.1.2.tar.gz
解压缩
sudo tar -zxvf hadoop-3.1.2.tar.gz
- z = filter file thru gzip
 - x = extract files from an archive
 - v = verbosely list files processed
 - f = filename
 
移动
sudo mv hadoop-3.1.2 /usr/local/hadoop
查看
ll /usr/local/hadoop
bin/执行文件sbin/shell执行文件etc/hadoop设定文件lib/函数库logs/系统日志
设定 Hadoop 参数
编辑 .bashrc
sudo nano .bashrc
#Hadoop variables
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME 
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_COMMON_LIB_NATIVE_DIR"
export
JAVA_LIBRARY_PATH=HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
#Hadoop variables
生效设定
source .bashrc
测试
echo $HADOOP_HOME
测试
hadoop version
Hadoop组件设定
hadoop-env.shcore-site.xmlyarn-site.xmlmapred-site.xmlhdfs-site.xml
sudo nano /usr/local/hadoop/etc/hadoop/hadoop-env.sh
设定Java安装路径
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
sudo nano /usr/local/hadoop/etc/hadoop/core-site.xml
hdfs预设名称
<configuration>
    <property>
        <name>fs.default.name</name>
        <value>hdfs://localhost:9000</value>
    </property>
</configuration>
sudo nano /usr/local/hadoop/etc/hadoop/yarn-site.xml
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
</configuration>
mapred-site.xml用于监控map和reduce程序的JobTracker工作分配状况、以及TaskTracker工作执行状况
设定mapreduce框架为yarn
sudo nano /usr/local/hadoop/etc/hadoop/mapred-site.xml
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>
hdfs-site.xml用于设定HDFS分散式档案系统
sudo nano /usr/local/hadoop/etc/hadoop/hdfs-site.xml
- 设定blocks副本备份数(每一个文件在其他node的备份数量)
 - 设定NameNode资料存储目录
 - 设定DataNode资料存储目录
 
<configuration>
    <property>
        <name>dfs.replication</name>
        <value>3</value>
    </property>
     <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:/usr/local/hadoop/hadoop_data/hdfs/namenode</value>
    </property>
     <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:/usr/local/hadoop/hadoop_data/hdfs/datanode</value>
    </property>
    <property>
        <name>dfs.http.address</name>
        <value>127.0.0.1:50070</value>
    </property>
</configuration>
建立与格式化HDFS目录
- 建立NameNode以及DataNode文件存储目录
 
sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/namenode
sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode
- 更改hadoop文件夹拥有者
 
sudo chown <username>:root -R /usr/local/hadoop
-R= recursively格式化NameNode
hadoop namenode -format
启动Hadoop
启动 HDFS
start-dfs.sh
查看HDFS web界面:http://localhost:8088
启动 YARN
start-yarn.sh
查看Resource Manager web界面:http://localhost:8088
查看已经启动的进程
jps
HDFS功能:NameNode,SecondaryNameNode,DataNode都已启动YARN功能:ResouceManager,NodeManager都已启动
关闭
stop-all.sh