Hadoop2.9上传文件并解压
在data目录下新建一个名为hadoop的目录(命令:mkdir hadoop),并将下载得到的hadoop-2.9.2.tar.gz上载到该目录下。
mkdir /data/hadoop
mkdir /data/hadoop/tmp
mkdir /data/hadoop/var
mkdir /data/hadoop/dfs
mkdir /data/hadoop/dfs/name
mkdir /data/hadoop/dfs/data
mkdir /data/hadoop/tmp/dfs/name
配置/etc/profile文件
export HADOOP_HOME=/data/hadoop-2.9.2
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR"
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
hadoop.tmp.dir
/data/hadoop/tmp
Abase for other temporary directories.
fs.default.name
hdfs://localhost:9999
修改JAVA_HOME指定系统安装目录
#export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=/usr/local/java/jdk1.8.0_144
dfs.replication
1
dfs.webhdfs.enabled
true
dfs.http.address
0.0.0.0:50070
yarn.nodemanager.aux-services
mapreduce_shuffle
mapred.job.tracker
hdfs://localhost:9999
mapred.tasktracker.map.tasks.maximum
2
mapred.tasktracker.reduce.tasks.maximum
2
错误日志:
2019-07-29 20:30:27,717 INFO org.mortbay.log: Stopped [email protected]:50070
2019-07-29 20:30:27,817 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Stopping NameNode metrics system...
2019-07-29 20:30:27,818 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: NameNode metrics system stopped.
2019-07-29 20:30:27,818 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: NameNode metrics system shutdown complete.
2019-07-29 20:30:27,820 ERROR org.apache.hadoop.hdfs.server.namenode.NameNode: Failed to start namenode.
java.net.BindException: Problem binding to [izwz938o0q4p4r0l3sljxnz:9000] java.net.BindException: Cannot assign requested address; For more details see: http://wiki.apache.org/hadoop/BindException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:824)
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:735)
at org.apache.hadoop.ipc.Server.bind(Server.java:561)
at org.apache.hadoop.ipc.Server$Listener.(Server.java:1037)
at org.apache.hadoop.ipc.Server.(Server.java:2738)
at org.apache.hadoop.ipc.RPC$Server.(RPC.java:958)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server.(ProtobufRpcEngine.java:420)
at org.apache.hadoop.ipc.ProtobufRpcEngine.getServer(ProtobufRpcEngine.java:341)
at org.apache.hadoop.ipc.RPC$Builder.build(RPC.java:800)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.(NameNodeRpcServer.java:431)
at org.apache.hadoop.hdfs.server.namenode.NameNode.createRpcServer(NameNode.java:803)
at org.apache.hadoop.hdfs.server.namenode.NameNode.initialize(NameNode.java:730)
at org.apache.hadoop.hdfs.server.namenode.NameNode.(NameNode.java:953)
at org.apache.hadoop.hdfs.server.namenode.NameNode.(NameNode.java:932)
at org.apache.hadoop.hdfs.server.namenode.NameNode.createNameNode(NameNode.java:1673)
at org.apache.hadoop.hdfs.server.namenode.NameNode.main(NameNode.java:1741)
Caused by: java.net.BindException: Cannot assign requested address
at sun.nio.ch.Net.bind0(Native Method)
at sun.nio.ch.Net.bind(Net.java:433)
at sun.nio.ch.Net.bind(Net.java:425)
at sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:223)
at sun.nio.ch.ServerSocketAdaptor.bind(ServerSocketAdaptor.java:74)
at org.apache.hadoop.ipc.Server.bind(Server.java:544)
... 13 more
2019-07-29 20:30:27,823 INFO org.apache.hadoop.util.ExitUtil: Exiting with status 1: java.net.BindException: Problem binding to [izwz938o0q4p4r0l3sljxnz:9000] java.net.BindException: Cannot assign requested address; For more details see: http://wiki.apache.org/hadoop/BindException
2019-07-29 20:30:27,825 INFO org.apache.hadoop.hdfs.server.namenode.NameNode: SHUTDOWN_MSG:
原因: 9000端口被占用导致
解决思路:
Hadoop 配置中 50070 端口是 namenode 的默认端口,http://192.168.10.10:50070 拒绝访问时说明 namenode 没有成功启动,或者检查一下防火墙,但是,我在没有关闭防火墙的情况下也是可以访问这个网址的
那么就进去 shell 命令行,执行 jps 命令,查看 namenode 是否启动,没有启动的话,就进入 hadoop 根目录的 logs 目录查看 namenode 的启动日志。
2019-08-01 16:05:45,793 INFO org.apache.hadoop.hdfs.server.common.Storage: Lock on /data/hadoop/tmp/dfs/name/in_use.lock acquired by nodename 21761@localhost
2019-08-01 16:05:45,798 WARN org.apache.hadoop.hdfs.server.namenode.FSNamesystem: Encountered exception loading fsimage
java.io.IOException: NameNode is not formatted.
at org.apache.hadoop.hdfs.server.namenode.FSImage.recoverTransitionRead(FSImage.java:236)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFSImage(FSNamesystem.java:1052)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFromDisk(FSNamesystem.java:681)
at org.apache.hadoop.hdfs.server.namenode.NameNode.loadNamesystem(NameNode.java:666)
at org.apache.hadoop.hdfs.server.namenode.NameNode.initialize(NameNode.java:728)
at org.apache.hadoop.hdfs.server.namenode.NameNode.(NameNode.java:953)
at org.apache.hadoop.hdfs.server.namenode.NameNode.(NameNode.java:932)
at org.apache.hadoop.hdfs.server.namenode.NameNode.createNameNode(NameNode.java:1673)
at org.apache.hadoop.hdfs.server.namenode.NameNode.main(NameNode.java:1741)
2019-08-01 16:05:45,806 INFO org.mortbay.log: Stopped [email protected]:50070
2019-08-01 16:05:45,908 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Stopping NameNode metrics system...
2019-08-01 16:05:45,910 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: NameNode metrics system stopped.
2019-08-01 16:05:45,910 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: NameNode metrics system shutdown complete.
2019-08-01 16:05:45,911 ERROR org.apache.hadoop.hdfs.server.namenode.NameNode: Failed to start namenode.
java.io.IOException: NameNode is not formatted.
at org.apache.hadoop.hdfs.server.namenode.FSImage.recoverTransitionRead(FSImage.java:236)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFSImage(FSNamesystem.java:1052)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFromDisk(FSNamesystem.java:681)
at org.apache.hadoop.hdfs.server.namenode.NameNode.loadNamesystem(NameNode.java:666)
at org.apache.hadoop.hdfs.server.namenode.NameNode.initialize(NameNode.java:728)
at org.apache.hadoop.hdfs.server.namenode.NameNode.(NameNode.java:953)
at org.apache.hadoop.hdfs.server.namenode.NameNode.(NameNode.java:932)
at org.apache.hadoop.hdfs.server.namenode.NameNode.createNameNode(NameNode.java:1673)
at org.apache.hadoop.hdfs.server.namenode.NameNode.main(NameNode.java:1741)
2019-08-01 16:05:45,915 INFO org.apache.hadoop.util.ExitUtil: Exiting with status 1: java.io.IOException: NameNode is not formatted.
可以看到出错得地方是 NameNode is not formatted.
删除该目录下文件
重新格式
如果是首次启动,在namenode1上运行format命令
bin/hadoop namenode -format
如果是非首次启动,则在namenode1上运行以下命令
bin/hdfs namenode -initializeSharedEdits
下载hive3.1.tar包,放到/data/hive3.1目录下。
export HADOOP_HOME=/data/hadoop-2.9.2
export HIVE_CONF_DIR=/data/hive3.1/conf
hive.metastore.local true
javax.jdo.option.ConnectionURL
jdbc:mysql://127.0.0.1:3306/hive?createDatabaseIfNotExist=true&useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&useSSL=false
javax.jdo.option.ConnectionDriverName com.mysql.jdbc.Driver
javax.jdo.option.ConnectionUserName root
javax.jdo.option.ConnectionPassword 111111
hive.cli.print.header true
hive.cli.print.current.db true
hive.metastore.schema.verification
false
初始化hive
cd /data/hive3.1/bin
./schematool -dbType mysql -initSchema
下载sqoop源码包解压到/data/sqoop
错误信息
./sqoop import --connect jdbc:mysql://127.0.0.1:3306/mydata??zeroDateTimeBehavior=CONVERT_TO_NULL --username root --P --table app_h5_start --hive-import --hive-table app_h5_start --bindir ./ -m 1
19/08/01 17:53:19 WARN conf.HiveConf: HiveConf of name hive.metastore.local does not exist
19/08/01 17:53:19 ERROR tool.ImportTool: Import failed: java.io.IOException: Cannot run program "hive": error=2, 没有那个文件或目录
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1048)
at java.lang.Runtime.exec(Runtime.java:620)
at java.lang.Runtime.exec(Runtime.java:528)
at org.apache.sqoop.util.Executor.exec(Executor.java:76)
at org.apache.sqoop.hive.HiveImport.executeExternalHiveScript(HiveImport.java:382)
at org.apache.sqoop.hive.HiveImport.executeScript(HiveImport.java:337)
at org.apache.sqoop.hive.HiveImport.importTable(HiveImport.java:241)
at org.apache.sqoop.tool.ImportTool.importTable(ImportTool.java:537)
at org.apache.sqoop.tool.ImportTool.run(ImportTool.java:628)
at org.apache.sqoop.Sqoop.run(Sqoop.java:147)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
at org.apache.sqoop.Sqoop.runSqoop(Sqoop.java:183)
at org.apache.sqoop.Sqoop.runTool(Sqoop.java:234)
at org.apache.sqoop.Sqoop.runTool(Sqoop.java:243)
at org.apache.sqoop.Sqoop.main(Sqoop.java:252)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:244)
at org.apache.hadoop.util.RunJar.main(RunJar.java:158)
Caused by: java.io.IOException: error=2, 没有那个文件或目录
at java.lang.UNIXProcess.forkAndExec(Native Method)
at java.lang.UNIXProcess.(UNIXProcess.java:247)
at java.lang.ProcessImpl.start(ProcessImpl.java:134)
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1029)
... 20 more
设置hive环境变量
export HIVE_HOME=/data/hive3.1
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin
完整从mysql中导入数据方法
./sqoop import --connect jdbc:mysql://127.0.0.1:3306/mydata?zeroDateTimeBehavior=CONVERT_TO_NULL --username root --P --table app_h5_start --hive-import --hive-table app_h5_start --bindir ./ -m 1
计算数据后插入到指定数据表中
insert into june_active(SELECT app_key,device_type,COUNT(DISTINCT register_device_id) AS total FROM app_h5_start GROUP BY app_key,device_type)
hive (default)> insert into june_active(SELECT app_key,device_type,COUNT(DISTINCT register_device_id) AS total FROM app_h5_start GROUP BY app_key,device_type);
Query ID = root_20190801185848_ad61d15a-4448-4cac-86ae-2b750cfee34f
Total jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Estimated from input data size: 18
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=
In order to set a constant number of reducers:
set mapreduce.job.reduces=
Job running in-process (local Hadoop)
.......
创建hive数据表
create table IF NOT EXISTS default.june_active(app_key string COMMENT 'app_key',device_type string,total string);
hive数据表内容导出
./hive -e "select * from june_active" >> /home/hadoop/output/june_active.txt