DEVICE=eth0
HWADDR=00:25:90:EC:4A:3C
TYPE=Ethernet
UUID=40839d74-3d20-4333-ae22-0149d6834566
ONBOOT=yes
IPV4_FAILURE=yes
NM_CONTROLLED=yes
BOOTPROTO=none
IPADDR=10.82.27.102
PREFIX=24
GATEWAY=10.82.27.1
DNS1=10.82.1.4
NAME="System eth0"
Ipv6init=NO
cat /etc/security/limits.conf
* soft noproc 11000
* hard noproc 11000
* soft nofile 278528
* hard nofile 278528
Chkconfig iptables off
|
Service iptables stop
vim /etc/sudoers
# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
# enforcing - SELinux security policy is enforced.
# permissive - SELinux prints warnings instead of enforcing.
# disabled - No SELinux policy is loaded.
SELINUX=disabled
# SELINUXTYPE= can take one of three two values:
# targeted - Targeted processes are protected,
# minimum - Modification of targeted policy. Only selected processes are protected.
# mls - Multi Level Security protection.
SELINUXTYPE=targeted
3.修改主机名与配置
[root@hsmaster ~]# cat /etc/hostname
hsmaster
10.82.27.194 hsslave3
10.82.27.100 hsmaster
10.82.27.102 hsslave4
10.82.27.22 hadoop01
10.82.27.23 hadoop02
10.82.27.24 hadoop03
10.82.27.191 hadooptest01
10.82.27.192 hadooptest02
10.82.27.193 hadooptest03
5. 建立admin小写的用户:
useradd admin
6.
配置环境变量.bash_profile
export JAVA_HOME=/home/admin/jdk1.8.0_191
export HADOOP_HOME=/home/admin/module/hadoop-2.7.6
export HADOOP_PREFIX=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_PREFIX=$HADOOP_HOME
export HADOOP_HOME=/home/admin/module/hadoop-2.7.6
export PATH=$JAVA_HOME/bin:$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export SCALA_HOME=/home/admin/module/scala-2.11.12
export PATH=$PATH:${SCALA_HOME}/bin
export SPARK_HOME=/home/admin/module/spark-2.2.1-bin-hadoop2.7
export PATH=$PATH:${SPARK_HOME}/bin
export HIVE_HOME=/home/admin/module/apache-hive-2.3.3-bin
export PATH=$PATH:$HIVE_HOME/bin
export HBASE_HOME=/home/admin/module/hbase-2.1.1
export PATH=$HBASE_HOME/bin:$PATH
export SQOOP_HOME=/home/admin/module/sqoop
export PATH=${PATH}:${SQOOP_HOME}/bin
export ZK_HOME=/home/admin/module/zookeeper-3.4.10
export PATH=${PATH}:${ZK_HOME}/bin
export PHOENIX_HOME=/home/admin/module/apache-phoenix-5.0.0-HBase-2.0-bin
export PHOENIX_CLASSPATH=${PHOENIX_HOME}
export PATH=$PATH:${PHOENIX_HOME}/bin
export PRESTO_HOME=/home/admin/module/presto/presto-server-0.100
export PATH=$PATH:${PRESTO_HOME}/bin
1、环境,3台CentOS7,64位,Hadoop2.7需要64位Linux,CentOS7 Minimal的ISO文件只有600M,操作系统十几分钟就可以安装完成,
hadoop01 192.168.100.129
Slave1 192.168.100.130
Slave2 192.168.100.132
2、SSH免密码登录,因为Hadoop需要通过SSH登录到各个节点进行操作,我用的是root用户,每台服务器都生成公钥,再合并到authorized_keys
(1)CentOS默认没有启动ssh无密登录,去掉/etc/ssh/sshd_config其中2行的注释,每台服务器都要设置,这两参数可以暂时忽略掉对应的说明。
#RSAAuthentication yes
#PubkeyAuthentication yes
(2)输入命令,ssh-keygen -t rsa,生成key,都不输入密码,一直回车,/root就会生成.ssh文件夹,每台服务器都要设置,
(3)合并公钥到authorized_keys文件,在hadoop01服务器,进入/root/.ssh目录,通过SSH命令合并,
cat id_rsa.pub>> authorized_keys
ssh [email protected] cat ~/.ssh/id_rsa.pub>> authorized_keys
ssh [email protected] cat ~/.ssh/id_rsa.pub>> authorized_keys
(4)把hadoop01服务器的authorized_keys、known_hosts复制到Slave服务器的/root/.ssh目录,配置对应的主机名与对应详细内容。
ssh-copy-id hsslave3
ssh-copy-id hsslave4
ssh-copy-id hsmaster
ssh-copy-id hsslave2
ssh-copy-id hsslave1
ssh-copy-id hadooptest01
ssh-copy-id hadooptest02
ssh-copy-id hadooptest03
ssh-copy-id hadooptest04
(5)完成,ssh [email protected]、ssh [email protected]就不需要输入密码了
2.scp /home/admin/.ssh/id_rsa.pub [email protected]:/home/admin/.ssh/authorized_keys
-- 启动HISTORY任务查看器
配置对应的SPARKHISTORY进程与路径。
hadoop fs -ls hdfs://hsmaster:9000/user/spark/applicationHistory/
1、spark-defaults.conf 增加如下内容:
#History
spark.eventLog.dir=hdfs://hsmaster:9000/user/spark/applicationHistory
spark.eventLog.enabled=true
spark.yarn.historyServer.address=http://hsmaster:18018
#####################
2、spark-env.sh 增加如下内容
##History-server
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18018 -Dspark.history.fs.logDirectory=hdfs://hsmaster:9000/user/spark/applicationHistory"
###################
3、cd $SPARK_HOME/
./sbin/start-history-server.sh 启动即可,查看端口监听,网页浏览,没有问题。
[hadoop@snn sbin]$ netstat -tnlp |grep 18018
(Not all processes could be identified, non-owned process info
will not be shown, you would have to be root to see it all.)
tcp 0 0 :::18018 :::* LISTEN 7791/java
[hadoop@snn sbin]$
3、安装JDK,Hadoop2.7需要JDK7,由于我的CentOS是最小化安装,
所以没有OpenJDK,直接解压下载的JDK并配置变量即可
(1)下载“jdk-7u79-linux-x64.gz”,放到/home/java目录下
(2)解压,输入命令,tar -zxvf jdk-7u79-linux-x64.gz
(3)编辑/etc/profile
export JAVA_HOME=/home/java/jdk1.7.0_79
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$PATH:$JAVA_HOME/bin
export JAVA_HOME=/opt/jdk1.8.0_11
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/jre/lib/dt.jar:$JAVA_HOME/jre/lib/tools.jar
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/opt/hadoop/hadoop-2.7.3
export HADOOP_PREFIX=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export PATH=$PATH:$HADOOP_HOME/bin
(4)使配置生效,输入命令,source /etc/profile
(5)输入命令,java -version,完成
4、安装Hadoop2.7,只在hadoop01服务器解压,再复制到Slave服务器
(1)下载“hadoop-2.7.6.tar.gz”,放到/opt/hadoop目录下
(2)解压,输入命令,tar -xzvf hadoop-2.7.3.tar.gz
(3)在/opt/hadoop目录下创建数据存放的文件夹,
tmp、hdfs、hdfs/data、hdfs/name
mkdir –p /opt/hadoop/tmp/
mkdir -p /opt/hadoop/hdfs/
mkdir -p /opt/hadoop/hdfs/name/
mkdir -p /opt/hadoop/hdfs/data/
5、配置/opt/hadoop/hadoop-2.7.3/etc/hadoop目录下的core-site.xml
hadoop jar /opt/hadoop/hadoop-2.7.3/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount /tmp/hive/root/input /tmp/hive/root/ouput
配置HADOOP-ENV
hadoop-env.sh
export HADOOP_IDENT_STRING=$USER
export JAVA_HOME=/home/admin/jdk1.8.0_45
export HADOOP_HOME=/home/admin/module/hadoop-2.7.6
export HADOOP_PREFIX=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_PREFIX=$HADOOP_HOME
export HADOOP_CLASSPATH=.:$CLASSPATH:$HADOOP_CLASSPATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/bin
spark-env.sh
export JAVA_HOME=/home/admin/jdk1.8.0_45
export HADOOP_HOME=/home/admin/module/hadoop-2.7.6
export HADOOP_PREFIX=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_PREFIX=$HADOOP_HOME
9、配置/opt/hadoop/hadoop-2.7.3/etc/hadoop目录下
hadoop-env.sh、yarn-env.sh的JAVA_HOME,不设置的话,启动不了,
#export JAVA_HOME=/home/java/jdk1.7.0_79
export JAVA_HOME=/usr/java/jdk1.7.0_72
10、配置/opt/hadoop/hadoop-2.7.3/etc/hadoop目录下的slaves,删除默认的localhost,增加2个从节点,
192.168.0.183
192.168.0.184
11、将配置好的Hadoop复制到各个节点对应位置上,通过scp传送,
cd /opt/
scp -r hadoop-2.7.3 root@hadoop02:/opt/hadoop/
scp -r hadoop-2.7.3 root@hadoop03:/opt/hadoop/
12、在hadoop01服务器启动hadoop,从节点会自动启动,进入/opt/hadoop/hadoop-2.7.3目录.
(1)初始化,输入命令,bin/hdfs namenode -format
(2)全部启动sbin/start-all.sh,也可以分开
sbin/start-dfs.sh、
sbin/start-yarn.sh
(3)停止的话,输入命令,sbin/stop-all.sh
(4)输入命令,jps,可以看到相关信息
13、Web访问,要先开放端口或者直接关闭防火墙
(1)输入命令,systemctl stop firewalld.service
(2)浏览器打开http://10.82.27.22:8088/
(3)浏览器打开http://10.82.27.22:50070/
14、安装完成。这只是大数据应用的开始,之后的工作就是,结合自己的情况,编写程序调用Hadoop的接口,发挥hdfs、mapreduce的作用。
[root@hadoop01 hadoop-2.7.3]# bin/hdfs namenode -format
18/08/07 10:52:46 INFO namenode.NameNode: STARTUP_MSG:
[root@hadoop01 hadoop-2.7.3]# sbin/start-all.sh
--
停止Hadoop
sbin/hadoop-daemon.sh stop namenode
sbin/hadoop-daemon.sh stop datanode
sbin/yarn-daemon.sh stop resourcemanager
sbin/yarn-daemon.sh stop nodemanager
全部停止批处理文件
sbin/stop_yarn.sh
sbin/stop_dfs.sh
sbin/stop_all.sh
tcp
[root@test apache-tomcat-7.0.53]#
netstat -n | awk '/^tcp/ {++S[$NF]} END {for(a in S) print a, S[a]}'
TIME_WAIT 14434
FIN_WAIT2 6
ESTABLISHED 133
很明显可以看出是请求太多频繁,主要由于后端服务器之间是通过rest请求相互调用的,而java服务器回收tcp线程速度比较慢,虽然已经有显性的关闭连接了,但是实际在调。
用的时候连接回收还是较慢。通过java API解决这个问题的路子就堵死了。
后来去服务器查阅了大量资料,可以考虑通过修改linux内核的方式解决此处问题。解决方法如下:
vi /etc/sysctl.conf
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_fin_timeout = 30
添加配置信息
#
对于一个新建连接,内核要发送多少个
SYN
连接请求才决定放弃
,
不应该大于
255
,默认值是
5
,对应于
180
秒左右时间
net.ipv4.tcp_syn_retries=2
#net.ipv4.tcp_synack_retries=2
#
表示当
keepalive
起用的时候,
TCP
发送
keepalive
消息的频度。缺省是
2
小时,改为
300
秒
net.ipv4.tcp_keepalive_time=1200
net.ipv4.tcp_orphan_retries=3
#
表示
SYN
队列的长度,默认为
1024
,加大队列长度为
8192
,可以容纳更多等待连接的网络连接数。
net.ipv4.tcp_max_syn_backlog = 4096
#
表示开启
SYN Cookies
。当出现
SYN
等待队列溢出时,启用
cookies
来处理,可防范少量
SYN
攻击。默认为
0
,表示关闭
net.ipv4.tcp_syncookies = 1
#
表示开启重用
tcp
连接。允许将
TIME-WAIT sockets
重新用于新的
TCP
连接。默认为
0
,表示关闭
net.ipv4.tcp_tw_reuse = 1
#
表示开启
TCP
连接中
TIME-WAIT sockets
的快速回收。默认为
0
,表示关闭
net.ipv4.tcp_tw_recycle = 1
#
表示如果套接字由本端要求关闭,这个参数决定了它保持在
FIN-WAIT-2
状态的时间
net.ipv4.tcp_fin_timeout = 30
##
减少超时前的探测次数
net.ipv4.tcp_keepalive_probes=5
##
优化网络设备接收队列
net.core.netdev_max_backlog=3000
让参数配置生效
/sbin/sysctl -p
返回结果如图:
问题完美解决:
[root@test apache-tomcat-7.0.53]# netstat -n | awk '/^tcp/ {++S[$NF]} END {for(a in S) print a, S[a]}'
TIME_WAIT 96
FIN_WAIT2 3
ESTABLISHED 141
停止Hadoop
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_fin_timeout = 30
net.ipv4.tcp_keepalive_time=1200
net.ipv4.tcp_keepalive_probes=5
net.core.netdev_max_backlog=3000
net.ipv4.tcp_max_syn_backlog = 4096
vm.overcommit_memory=1
sbin/hadoop-daemon.sh stop namenode
sbin/hadoop-daemon.sh stop datanode
sbin/yarn-daemon.sh stop resourcemanager
sbin/yarn-daemon.sh stop nodemanager
全部停止批处理文件
sbin/stop_yarn.sh
sbin/stop_dfs.sh
sbin/stop_all.sh
--------------------------------------------------------------------------------------------------------------------------
莫小安
1、firewalld的基本使用
启动: systemctl start firewalld
关闭: systemctl stop firewalld
查看状态: systemctl status firewalld
开机禁用 : systemctl disable firewalld
开机启用 : systemctl enable firewalld
2.systemctl是CentOS7的服务管理工具中主要的工具,它融合之前service和chkconfig的功能于一体。
启动一个服务:systemctl start firewalld.service
关闭一个服务:systemctl stop firewalld.service
重启一个服务:systemctl restart firewalld.service
显示一个服务的状态:systemctl status firewalld.service
在开机时启用一个服务:systemctl enable firewalld.service
在开机时禁用一个服务:systemctl disable firewalld.service
查看服务是否开机启动:systemctl is-enabled firewalld.service
查看已启动的服务列表:systemctl list-unit-files|grep enabled
查看启动失败的服务列表:systemctl --failed
3.配置firewalld-cmd
查看版本: firewall-cmd --version
查看帮助: firewall-cmd --help
显示状态: firewall-cmd --state
查看所有打开的端口: firewall-cmd --zone=public --list-ports
更新防火墙规则: firewall-cmd --reload
查看区域信息: firewall-cmd --get-active-zones
查看指定接口所属区域: firewall-cmd --get-zone-of-interface=eth0
拒绝所有包:firewall-cmd --panic-on
取消拒绝状态: firewall-cmd --panic-off
查看是否拒绝: firewall-cmd --query-panic
那怎么开启一个端口呢
添加
firewall-cmd --zone=public --add-port=80/tcp --permanent (--permanent永久生效,没有此参数重启后失效)
重新载入
firewall-cmd --reload
查看
firewall-cmd --zone= public --query-port=80/tcp
删除
firewall-cmd --zone= public --remove-port=80/tcp --permanent
hadoop dfsadmin -safemode leave