1.事先准备好一台linux虚拟机(网卡为仅主机模式,最小化安装)
#systemctl stop firewalld //关闭防火墙
#systemctl disable firewalld //防止开机自启
2.IPADDR 192.168.56.100/24 网关192.168.56.1
#vi /etc/sysconfig/network
NETWORKING=yes
GATEWAY=192.168.56.1
#vi /etc/sysconfig/network-sripts/ifcfg-enp0s3
TYPE=Ethernet
IPADDR=192.168.56.100
NETMASK=255.255.255.0
3.用root连接Xshell,使用xftp上传jdk和hadoop
4.安装jdk和hadoop(检验是否成功)
#cd /usr/local
#rpm -ivh jkd-8u91-linux-64.rpm //用Tab补齐
#cd /usr/
#java
#cd local
#ls
#tar -xvf hadoop-2.7.3.tar.gz //注意用Tab补齐
#cd /usr/local
#ls
bin games hadoop-2.7.3.tar.gz jdk-8u91-linux-x64.rpm lib64 sbin src
etc hadoop include lib libexec share
# cd hadoop
# cd etc
# ls
hadoop
# cd h*
# ls
capacity-scheduler.xml httpfs-env.sh mapred-env.sh
configuration.xsl httpfs-log4j.properties mapred-queues.xml.template
container-executor.cfg httpfs-signature.secret mapred-site.xml.template
core-site.xml httpfs-site.xml slaves
hadoop-env.cmd kms-acls.xml ssl-client.xml.example
hadoop-env.sh kms-env.sh ssl-server.xml.example
hadoop-metrics2.properties kms-log4j.properties yarn-env.cmd
hadoop-metrics.properties kms-site.xml yarn-env.sh
hadoop-policy.xml log4j.properties yarn-site.xml
hdfs-site.xml mapred-env.cmd
#vim hadoop-env.sh //配置hadoop环境
//将export JAVA_HOME=${JAVA_HOME}改写为export JAVA_HOME=/usr/java/default
#vim /etc/profile
//zai 末尾添加export PATH=$PATH:/usr/local/hadoop/bin:/usr/local/hadoop/sbin
#source /etc/profile //执行改写的文件,使之生效
#hadoop
5.退出master,将master复制三台服务器:slave1,slave2,slave3
6.将三台服务器的Ip分别改为192.168.56.101,192.168.56.102,192.168.56.103,名字分别为slave1,slave2,slave3
查看是否都能ping通
systemctl stop firewalld //关闭防火墙
systemctl disable firewalld //关闭开机自启
7.master管理者另外三台服务器(slave1,2,3)(工具--发送键送到所以会话)
#cd /usr/local/hadoop/etc/hadoop
#vim core-site.xml //四台都要
fs.defaultFS //名字
hdfs://master:9000 //hdfs协议,9000为监听端口
#vim /etc/hosts
192.168.56.100 master
192.168.56.101 slave1
192.168.56.102 slave2
192.168.56.103 slave3
//在master中
#hdfs namenode .format //格式化
#hadoop-daemon.sh start namenode
starting namenode, logging to /usr/local/ha
# jps
3352 NameNode //成功
3420 Jps
//在slave1,2,3中
#hadoop-daemo.sh start datanode
starting datanode, logging to /usr/local/hadoop/logs/hadoop-root-datanode-slave1.out
#jps
3210 DataNode //成功
3293 Jps
8.开启关闭集群命令
#hadoop-daemon.sh stop namenode //关闭
#hadoop-daemon.sh start namenode //开启
#jps //用来观察进程情况
9.集中式管理添加所有机器
#vim slaves
slave1
slave2
slave3
#start-dfs.sh //启动所有机器(需要输入密码)
#jps
10.ssh免密登录
#ssh slave1 //第一次需要输入密码
#exit
#cd //进入根目录
#ls -la
#cd .ssh
#ssh-keygen -t rsa //默认直接回车确认
#ls
id_rsa id_rsa.pub known_hosts //id_rsa是root的私钥,id_rsa.pub是root的公钥
#ssh-copy-id slave1 //传递公钥
#ssh-copy-id slave2
#ssh-copy-id slave3
#ssh-copy-id master //默认本机也是远程登录,所以也需要
stop-dfs.sh //关闭集群
start-dfs.sh //开启集群
11.上传一个文件,每个hdfs块文件备份2份,从节点的心跳检查时间间隔10秒
#ls
capacity-scheduler.xml httpfs-env.sh mapred-env.sh
configuration.xsl httpfs-log4j.properties mapred-queues.xml.template
container-executor.cfg httpfs-signature.secret mapred-site.xml.template
core-site.xml httpfs-site.xml slave
hadoop-env.cmd kms-acls.xml slaves
hadoop-env.sh kms-env.sh ssl-client.xml.example
hadoop-metrics2.properties kms-log4j.properties ssl-server.xml.example
hadoop-metrics.properties kms-site.xml yarn-env.cmd
hadoop-policy.xml log4j.properties yarn-env.sh
hdfs-site.xml mapred-env.cmd yarn-site.xml
#vim hdfs-site.xml //修改多少个备份
dfs.replication
2
# vim hdfs-site.xml
dfs.namenode.heartbeat.recheck-interval
10000 //每隔10000毫秒更新
#cd /usr/local
# ls
bin hadoop-2.7.3.tar.gz lib64 src
etc include libexec
games jdk-8u91-linux-x64.rpm sbin
hadoop lib share
# hadoop fs -put jdk-8u91-linux-x64.rpm /
12.hadoop.tmp.dir配置为/var/tmphadoop/
1.所有机器都要修改
#cd /tmp
#cd /usr/local/hadoop/etc/hadoop
#ls
#vim core-site.xml
hadoop.tmp.dir
/var/hadoop
2.namenode格式化
#hdfs namenode -format
#stop-dfs.sh
#start-dfs.sh
13.进入java配置
1.导入必要的包,新建class
(1).share--hadoop--common--hadoop-common-2.7.3jar
(2).share--hadoop--common--lib--全部
(3).share--hadoop--hdfs--hadoop-hdfs-2.7.3jar
2.新建文件
#cd
#vi hello.test
hello bj
hello sh
hello sz
hello AMD690G
#hadoop fs -put ./hello.txt /
# hadoop fs -ls /
Found 1 items
-rw-r--r-- 2 root supergroup 41 2017-10-06 23:12 /hello.txt
#vim hdfs-site.xml
dfs.permissions.enabled
false
#stop-dfs.sh
#start-dfs.sh
全部代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class hellohdfs2 {
public static void main(String[] args) throws Exception{
/*URL url = new URL("http://www.baidu.com");
InputStream in = url.openStream();
IOUtils.copyBytes(in, System.out , 4096, true);*/
/*URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
URL url = new URL("hdfs://192.168.56.10:9000/hello.txt");
InputStream in = url.openStream();
IOUtils.copyBytes(in, System.out , 4096, true);*/
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.56.10:9000");
FileSystem fileSystem = FileSystem.get(conf);
/*boolean success = fileSystem.mkdirs(new Path("/msb"));
System.out.println(success);
success = fileSystem.exists(new Path("/hello.txt"));
System.out.println(success); //判断文件是否存在
success = fileSystem.delete(new Path("/msb"), true);
System.out.println(success); //删除目录
success = fileSystem.exists(new Path("/msb"));
System.out.println(success);*/
/*FSDataOutputStream out = fileSystem.create(new Path("/test.data"), true);
FileInputStream fis = new FileInputStream("c:/test/core-site.xml");
IOUtils.copyBytes(fis, out, 4096, true);*/
/*FSDataOutputStream out = fileSystem.create(new Path("/test.data"), true);
FileInputStream in = new FileInputStream("c:/test/core-site.xml");
byte[] buf = new byte[4096];
int len = in.read(buf);
while(len !=-1) {
out.write(buf, 0, len);
len = in.read(buf);
}
in.close();
out.close();*/
FileStatus[] statuses = fileSystem.listStatus(new Path("/"));
//System.out.println(statuses.length);
for(FileStatus status : statuses) {
System.out.println(status.getPath());
System.out.println(status.getPermission());
System.out.println(status.getReplication());
}
}
}
13.yarn
#cd /usr/local/hadoop/etc/hadoop
#vim yarn-site.xml //四台都要
yarn.resourcemanager.hostname
master
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.nodemanager.auxservices.mapreduce.shuffle.class
org.apache.hadoop.mapred.ShuffleHandler
//namenode上配置mapred-site.xml(要复制)
#vi mapred-site.xml
mapreduce.framework.name
yarn
#start-yarn.sh
#jps
8225 NameNode
8418 SecondaryNameNode
8870 ResourceManager
9134 Jps
#vim mapred-site.xml
mapreduce.framework.name
yarn
#vi input.txt
hello java
hello java
hello c
hello c++
hello python
hello java
#cd
# vi input.txt
# hadoop fs -mkdir /wcinput
# hadoop fs -put input.txt /wcinput/
# hadoop fs -ls /wcinput
Found 1 items
-rw-r--r-- 2 root supergroup 64 2017-10-07 00:22 /wcinput/input.txt
#find /usr/local/hadoop -name *example*.jar //查找示例文件
#hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount /wcinput/input.txt /wcoutput
通过网页来观察该job的运行情况
192.168.56.10:8088