基于给开发同学使用与BI用户使用的需求,打算开启HiveServer2服务,集群上面的Hive任务执行、调度,都走HiveServer2方式。之前只有调度任务与开发同学使用,所以使用的是Hive CLI方式,开发同学没有添加权限控制。
使用一段时间之后,发现HiveServer2存在各种问题。突出的问题就是不稳定、运行日志很难获取、任务ID需要解析等,最近遇到一个概率性出现的问题。
INFO : Cleaning up the staging area file:/tmp/hadoop-hive/mapred/staging/hive1294900272/.staging/job_local1294900272_0933
ERROR : Job Submission failed with exception 'java.io.IOException(java.util.concurrent.ExecutionException: java.io.IOException: Unable to rename file: [/tmp/hadoop-hive/mapred/local/1526009
296491_tmp/tmp_hive-exec-1.1.0-cdh5.12.1-core.jar] to [/tmp/hadoop-hive/mapred/local/1526009296491_tmp/hive-exec-1.1.0-cdh5.12.1-core.jar])'
java.io.IOException: java.util.concurrent.ExecutionException: java.io.IOException: Unable to rename file: [/tmp/hadoop-hive/mapred/local/1526009296491_tmp/tmp_hive-exec-1.1.0-cdh5.12.1-core
.jar] to [/tmp/hadoop-hive/mapred/local/1526009296491_tmp/hive-exec-1.1.0-cdh5.12.1-core.jar]
at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:143)
at org.apache.hadoop.mapred.LocalJobRunner$Job.(LocalJobRunner.java:171)
at org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:758)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:244)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1307)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1304)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1917)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1304)
at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:578)
at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:573)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1917)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:573)
at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:564)
at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:436)
at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:142)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:99)
at org.apache.hadoop.hive.ql.exec.TaskRunner.run(TaskRunner.java:79)
Caused by: java.util.concurrent.ExecutionException: java.io.IOException: Unable to rename file: [/tmp/hadoop-hive/mapred/local/1526009296491_tmp/tmp_hive-exec-1.1.0-cdh5.12.1-core.jar] to [
/tmp/hadoop-hive/mapred/local/1526009296491_tmp/hive-exec-1.1.0-cdh5.12.1-core.jar]
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:192)
at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:139)
... 21 more
Caused by: java.io.IOException: Unable to rename file: [/tmp/hadoop-hive/mapred/local/1526009296491_tmp/tmp_hive-exec-1.1.0-cdh5.12.1-core.jar] to [/tmp/hadoop-hive/mapred/local/15260092964
91_tmp/hive-exec-1.1.0-cdh5.12.1-core.jar]
at org.apache.hadoop.yarn.util.FSDownload.unpack(FSDownload.java:327)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:362)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:60)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
INFO : Cleaning up the staging area file:/tmp/hadoop-hive/mapred/staging/hive1101623315/.staging/job_local1101623315_0934
ERROR : Job Submission failed with exception 'java.io.IOException(java.util.concurrent.ExecutionException: java.io.FileNotFoundException: File /tmp/hadoop-hive/mapred/local/1526009296491_tm
p does not exist)'
java.io.IOException: java.util.concurrent.ExecutionException: java.io.FileNotFoundException: File /tmp/hadoop-hive/mapred/local/1526009296491_tmp does not exist
at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:143)
at org.apache.hadoop.mapred.LocalJobRunner$Job.(LocalJobRunner.java:171)
at org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:758)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:244)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1307)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1304)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1917)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1304)
at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:578)
at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:573)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1917)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:573)
at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:564)
at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:436)
at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:142)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:214)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:99)
at org.apache.hadoop.hive.ql.exec.TaskRunner.run(TaskRunner.java:79)
Caused by: java.util.concurrent.ExecutionException: java.io.FileNotFoundException: File /tmp/hadoop-hive/mapred/local/1526009296491_tmp does not exist
at java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.util.concurrent.FutureTask.get(FutureTask.java:192)
at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:139)
... 21 more
Caused by: java.io.FileNotFoundException: File /tmp/hadoop-hive/mapred/local/1526009296491_tmp does not exist
at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:598)
at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:811)
at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:588)
at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileLinkStatusInternal(RawLocalFileSystem.java:827)
at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:813)
at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatus(RawLocalFileSystem.java:784)
at org.apache.hadoop.fs.DelegateToFileSystem.getFileLinkStatus(DelegateToFileSystem.java:132)
at org.apache.hadoop.fs.AbstractFileSystem.renameInternal(AbstractFileSystem.java:701)
at org.apache.hadoop.fs.FilterFs.renameInternal(FilterFs.java:236)
at org.apache.hadoop.fs.AbstractFileSystem.rename(AbstractFileSystem.java:674)
at org.apache.hadoop.fs.FileContext.rename(FileContext.java:932)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:364)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:60)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
ERROR : FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask
INFO : Completed executing command(queryId=hive_20180511112828_b253761f-a1ae-40bc-ae7e-650cfa4c7b79); Time taken: 9.506 seconds
Error: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask (state=08S01,code=1)
Closing: 0:
想要排查问题,该问题又不会定期出现,在运行几百个JOB中,偶尔出现几个这样的报错信息。
最后还是找到了这样一篇文章:一台服务器同时起多个hive跑local mr很大机率会报 FileAlreadyExistsException。根据该文中的报错信息,以及源代码的内容,可以看出我们这里的报错应该也是同类,或同一个出处。至此为我们的问题找打了一个合理的解释,因此可以对症下药解决问题。