################ 修改目录权限 ####################
[root@vwmaster hadoop]# chown -R root:root hadoop260/

################ JDK ####################
[root@vwmaster hadoop260]# vi etc/hadoop/hadoop-env.sh
export JAVA_HOME=/opt/bigdata/java/jdk180

################ hadoop fs 文件系统 ####################
集群配置时将 IP 改为 主机名
[root@vwmaster hadoop260]# vi etc/hadoop/core-site.xml


  
    fs.defaultFS
    hdfs://vwmaster:9000
  
  
    hadoop.tmp.dir
    /opt/bigdata/hadoop/hadoop260
  
  
    hadoop.proxyuser.root.hosts
    *
  
  
    hadoop.proxyuser.root.groups
    *
  

################ hadoop replicas 备份 ####################
dfs.replication 备份数量改为2
dfs.hosts 添加节点白名单
[root@vwmaster hadoop]# pwd
/opt/bigdata/hadoop

[root@vwmaster hadoop]# mkdir hdfs
[root@vwmaster hadoop]# ls
hadoop260 hdfs

[root@vwmaster hadoop]# cd hdfs/
[root@vwmaster hdfs]# mkdir namenode datanode
[root@vwmaster hdfs]# ls
datanode namenode

[root@vwmaster hadoop260]# vi etc/hadoop/hdfs-site.xml


  
    dfs.replication
    1
  
  
    dfs.permissions
    false
  
  

################ hadoop mapreduce 计算框架 ####################
mapreduce.jobhistory.address 添加jobhistory访问端口
mapreduce.jobhistory.webapp.address 添加jobhistory web访问端口
[root@vwmaster hadoop260]# cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml
[root@vwmaster hadoop260]# vi etc/hadoop/mapred-site.xml


  
    mapreduce.framework.name
    yarn
  
  

################ hadoop yarn 管理调度 ####################
yarn.log-aggregation.retain-seconds 添加yarn日志保留时间为7天(单位秒)
yarn.nodemanager.aux-services.mapreduce.shuffle.class 添加指定shuffle计算具体类型
yarn.resourcemanager.hostname 添加yarn主机名
[root@vwmaster hadoop260]# vi etc/hadoop/yarn-site.xml


  
    yarn.nodemanager.aux-services
    mapreduce_shuffle
  
  
    yarn.resourcemanager.hostname
    vwmaster
  
  

################ hadoop slaves 主机名 ####################

注意:hadoop 3.0 以上版本 slaves 更名为 workers

添加所有节点主机名 vwmaster:yarn.resourcemanager
添加所有节点主机名 vwjob:mapreduce.jobhistory
添加所有节点主机名 vwslave01/02:datanode
[root@vwmaster hadoop260]# vi etc/hadoop/slaves

vwslave01
vwslave02
vwslave03

################ hadoop 环境变量 ####################
[root@vwmaster hadoop260]# vi /etc/profile

export JAVA_HOME=/opt/bigdata/java/jdk180
export TOMCAT_HOME=/opt/bigdata/java/tomcat85
export NODE_HOME=/opt/bigdata/elk/node891

export HADOOP_HOME=/opt/bigdata/hadoop/hadoop260
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"

export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$NODE_HOME/bin:$JAVA_HOME/bin:$TOMCAT_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

[root@vwmaster hadoop260]# source /etc/profile

################ hadoop 格式化 HDFS ####################
[root@vwmaster hadoop260]# cd bin
[root@vwmaster bin]# hdfs namenode -format
##见到以下信息说明格式化成功
/****
SHUTDOWN_MSG: Shutting down NameNode at vwmaster/20.0.0.100
****/
##如果格式化失败:需要去检查之前各项配置

################ hadoop-native库 ####################
[root@vwmaster bin]# cd /opt/install/hadoop
[root@vwmaster hadoop]# tar -xf hadoop-native-64-2.6.0.tar -C /opt/bigdata/hadoop/hadoop260/lib/native/

################ 启动 hadoop ####################
[root@vwmaster hadoop]# cd /opt/bigdata/hadoop/hadoop260/bin
[root@vwmaster bin]# start-dfs.sh
[root@vwmaster bin]# start-yarn.sh
[root@vwmaster bin]# stop-all.sh

################ 启动或关闭 hadoop 需要输入密码的解决方法 ####################
[root@vwmaster bin]# cd ~
[root@vwmaster ~]# cd .ssh/
[root@vwmaster .ssh]# ls
authorized_keys id_rsa id_rsa.pub known_hosts
[root@vwmaster .ssh]# cat id_rsa.pub >> authorized_keys
[root@vwmaster .ssh]# ssh localhost

################ 启动 jobhistory ####################
[root@vwmaster bin]# cd ../sbin/
[root@vwmaster sbin]# jps
[root@vwmaster sbin]# ./mr-jobhistory-daemon.sh start historyserver

################ 查看 hadoop 服务状态 ####################
[root@vwmaster sbin]# jps
6800 NodeManager
7329 Jps
6387 DataNode
6548 SecondaryNameNode
6264 NameNode
6697 ResourceManager
7259 JobHistoryServer

http://20.0.0.100: 50070 8088 19888

#------------------------- 操作 hdfs -----------------------------------#
查看hdfs目录
如果存在警告 20/08/15 08:24:13 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes
解决方法:在log4j.properties中添加:log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR

cd ~/kb08/data

hdfs dfs -ls /
hdfs dfs -du [-s] /kb08/data 统计文件夹信息
hdfs dfs -mkdir /kb08
hdfs dfs -mkdir -p /kb08/data 创建多级目录
hdfs dfs -rmdir /kb08/data 删除目录

hdfs dfs -put ~/kb08/hadoop/story.csv /kb08/data 上传文件
hdfs dfs -get /kb08/data/story.csv ~ 下载文件
hdfs dfs -getmerge /kb08/data/* ~/merge.txt 合并下载
hdfs dfs -rm /kb08/data/story.csv
hdfs dfs -cat /kb08/data/story.csv

touch append.txt
vi append.txt
hello guys, I am so hornored to see you agin
let us enjoy this moment

hdfs dfs -appendToFile append.txt /kb08/data/story.csv 追加文件内容
hdfs dfs -chmod 777 /kb08/data/story.csv
hdfs dfs -chown henry:root /kb08/hadoop/story.csv
hdfs dfs -cp /kb08/hadoop/story.csv /kbcom/data
hdfs dfs -mv /kb08/hadoop/story.csv /kbcom/data

hdfs dfs -setrep 3 /kb08/data/story.csv 设置文件备份数量(不能超过节点数量)
hdfs dfsadmin -safemode get 查看安全模式
hdfs dfsadmin -safemode enter 进入安全模式
hdfs dfsadmin -safemode enter 离开安全模式

#--------------------- java 操作hadoop ----------------------#
#依赖

UTF-8
1.8
1.8
2.6.0-cdh5.14.2



junit
junit
4.11
test



org.apache.hadoop
hadoop-common
${hadoop.version}



org.apache.hadoop
hadoop-hdfs
${hadoop.version}



org.apache.hadoop
hadoop-client
${hadoop.version}



log4j
log4j
1.2.17

#--------------- 因为maven没提供hadoop仓库,所以手动添加 --------------------------#


cloudera
https://repository.cloudera.com/artifactory/cloudera-repos/;