随着数据量的“暴增”趋势,本地处理方式已经难以满足需求,现急需一套方案来解决这个问题,故推荐使用大数据分析平台——Hadoop。
大数据的特点:容量大、种类多、速度快、价值高
序号 主机名 IP地址 备注
1 master-yly 192.168.91.134 主节点
2 slave01-yly 192.168.91.135 从节点
3 slave02-yly 192.168.91.136 从节点
操作系统 CentOS 7
master-yly:
[root@localhost ~]# hostnamectl set-hostname master-yly
[root@localhost ~]# bash
slave01-yly:
[root@localhost ~]# hostnamectl set-hostname slave01-yly
[root@localhost ~]# bash
slave02-yly:
[root@localhost ~]# hostnamectl set-hostname slave02-yly
[root@localhost ~]# bash
master-yly:
[root@master-yly ~]# vim /etc/hosts
192.168.91.134 master-yly
192.168.91.135 slave01-yly
192.168.91.136 slave02-yly
slave01-yly:
[root@slave01-yly ~]# vim /etc/hosts
192.168.91.134 master-yly
192.168.91.135 slave01-yly
192.168.91.136 slave02-yly
slave02-yly:
[root@slave02-yly ~]# vim /etc/hosts
192.168.91.134 master-yly
192.168.91.135 slave01-yly
192.168.91.136 slave02-yly
[root@master-yly ~]# systemctl stop firewalld
[root@slave01-yly ~]# systemctl stop firewalld
[root@slave02-yly ~]# systemctl stop firewalld
master-yly:
[root@master-yly ~]# yum install -y ntp
#修改配置文件,在末尾添加如下两条配置:
[root@master-yly ~]# vim /etc/ntp.conf
server 127.127.1.0
fudge 127.127.1.0 startum 10
#启动服务并添加到开机自启动:
[root@master-yly ~]# systemctl start ntpd
[root@master-yly ~]# systemctl enable ntpd
slave01-yly:
[root@slave01-yly ~]# yum install -y ntpdate
[root@slave01-yly ~]# ntpdate master-yly
slave02-yly:
[root@slave02-yly ~]# yum install -y ntpdate
[root@slave02-yly ~]# ntpdate master-yly
[root@master-yly ~]# ssh-keygen
[root@master-yly ~]# ssh-copy-id master-yly
[root@master-yly ~]# ssh-copy-id slave02-yly
[root@master-yly ~]# ssh-copy-id slave01-yly
# 新建一个目录用来存放Hadoop的相关安装包
[root@master-yly ~]# mkdir /opt/Hadoop
# 将安装包移动到新建的目录下
[root@master-yly ~]# mv hadoop-2.7.6.tar.gz /opt/hadoop/
[root@master-yly ~]# mv jdk-8u171-linux-x64.tar.gz /opt/hadoop/
[root@master-yly ~]# cd /opt/hadoop/
# 将jdk安装包进行解压
[root@master-yly hadoop]# tar -zxvf jdk-8u171-linux-x64.tar.gz
# 编辑/etc/profile文件,配置环境变量
[root@master-yly hadoop]# vim /etc/profile
export JAVA_HOME=/opt/hadoop/jdk1.8.0_171
export PATH=$PATH:$JAVA_HOME/bin
# 刷新环境变量,使修改的变量生效
[root@master-yly hadoop]# source /etc/profile
# 验证是否安装成功
[root@master-yly hadoop]# java -version
# 解压Hadoop的安装包
[root@master-yly hadoop]# tar -zxvf hadoop-2.7.6.tar.gz
# 编辑/etc/profile文件,配置环境变量
[root@master-yly hadoop]# vim /etc/profile
export HADOOP_HOME=/opt/hadoop/hadoop-2.7.6
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
# 刷新环境变量,使修改的变量生效
[root@master-yly hadoop]# source /etc/profile
# 验证是否安装成功
[root@master-yly hadoop]# hadoop version
PS:配置文件在:/opt/hadoop/ hadoop-2.7.6/etc/hadoop/目录下
# 修改core-site.xml
[root@master-yly hadoop]# cd hadoop-2.7.6/etc/hadoop/
[root@master-yly hadoop]# vim core-site.xml
fs.defaultFS
hdfs://master-yly:9000
hadoop.tmp.dir
/opt/hadoop/hadoop-2.7.6/hdfs
# 修改hdfs-site.xml
[root@master-yly hadoop]# vim hdfs-site.xml
dfs.replication
3
dfs.namenode.secondary.http-address
slave02-yly:9001
# 修改mapred-site.xml
[root@master-yly hadoop]# cp mapred-site.xml.template mapred-site.xml
[root@master-yly hadoop]# vim mapred-site.xml
mapreduce.framework.name
yarn
# 修改yarn-site.xml
[root@master-yly hadoop]# vim yarn-site.xml
yarn.resourcemanager.hostname
master-yly
yarn.nodemanager.aux-services
mapreduce_shuffle
# 修改slaves文件
[root@master-yly hadoop]# vim slaves
master-yly
slave01-yly
slave02-yly
# 修改hadoop-env.sh文件
[root@master-yly hadoop]# vim hadoop-env.sh
export JAVA_HOME=/opt/hadoop/jdk1.8.0_171
# 将jdk复制到其他的从节点
[root@master-yly ~]# scp -r /opt/hadoop/jdk1.8.0_171/ slave01-yly:/opt/hadoop/
[root@master-yly ~]# scp -r /opt/hadoop/jdk1.8.0_171/ slave02-yly:/opt/hadoop/
# 将Hadoop复制到其他从节点
[root@master-yly ~]# scp -r /opt/hadoop/hadoop-2.7.6/ slave01-yly:/opt/hadoop/
[root@master-yly ~]# scp -r /opt/hadoop/hadoop-2.7.6/ slave01-yly:/opt/hadoop/
# 将环境变量复制到其他从节点
[root@master-yly hadoop]# scp /etc/profile slave01-yly:/etc/
[root@master-yly hadoop]# scp /etc/profile slave02-yly:/etc/
[root@master-yly hadoop]# hdfs namenode -format
[root@master-yly hadoop]# start-all.sh
# master-yly:
[root@master-yly ~]# jps
16626 DataNode
16916 ResourceManager
16468 NameNode
17048 NodeManager
18095 Jps
# slave01-yly:
[root@slave01-yly ~]# jps
16993 NodeManager
16823 DataNode
17320 Jps
# slave02-yly:
[root@slave02-yly ~]# jps
48538 NodeManager
48906 Jps
48204 DataNode
48367 SecondaryNameNode
[root@master-yly ~]# hdfs dfs -ls -R /
[root@master-yly ~]# hdfs dfs -mkdir /yly
[root@master-yly ~]# hdfs dfs -touchz /yly/41_yinliyang
[root@master-yly ~]# echo "hello yinliyang" | hdfs dfs -appendToFile - /yly/41_yinliyang
[root@master-yly ~]# hdfs dfs -cat /yly/41_yinliyang
[root@master-yly ~]# hdfs dfs -rm /test
[root@master-yly ~]# hdfs dfs -put test /yly
[root@master-yly ~]# hdfs dfs -get /yly/test /home/
[root@master-yly ~]# hdfs dfs -cp /yly/41_yinliyang /hadoop
[root@master-yly ~]# hdfs dfs -mv /hadoop/41_yinliyang /info