hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath1.txt hadoop a spark a hive a hbase a tachyon a storm a redis a hadoop@hadoop:/home/hadoop/blb$ hdfs dfs -text /user/hadoop/libin/input/inputpath2.txt hadoop b spark b kafka b tachyon b oozie b flume b sqoop b solr b hadoop@hadoop:/home/hadoop/blb$
import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class GetIDMapReduce { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if(otherArgs.length!=2){ System.err.println("Usage databaseV1 <inputpath> <outputpath>"); } Job job = Job.getInstance(conf, GetIDMapReduce.class.getSimpleName() + "1"); job.setJarByClass(GetIDMapReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setMapperClass(MyMapper1.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true); } public static class MyMapper1 extends Mapper<LongWritable, Text, Text, NullWritable>{ @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException { FileSplit fileSplit=(FileSplit) context.getInputSplit(); String pathname=fileSplit.getPath().getName(); JobID jobid=context.getJobID(); //获取jobid LongWritable keyout=context.getCurrentKey(); //获取key偏移量 Text valueout=context.getCurrentValue(); //获取一行的值 String jobname=context.getJobName(); //获得job名字 TaskAttemptID taskid=context.getTaskAttemptID(); //获得taskid float progress=context.getProgress(); //获取任务执行进度 String jar = context.getJar(); //作业运行之前,往集群拷贝的作业资源jar //String status = context.getStatus(); String user = context.getUser(); //获取当前用户 //String[] fileTimestamps = context.getFileTimestamps(); int numReduceTasks = context.getNumReduceTasks(); //获得reduce的数量 //Path[] fileClassPaths = context.getFileClassPaths(); Configuration configuration = context.getConfiguration(); //获得作业配置文件 //RawComparator<?> groupingComparator = context.getGroupingComparator(); boolean jobSetupCleanupNeeded = context.getJobSetupCleanupNeeded(); //Get whether job-setup and job-cleanup is needed for the job int maxMapAttempts = context.getMaxMapAttempts(); //the max number of attempts per map task int maxReduceAttempts = context.getMaxReduceAttempts(); //he max number of attempts per reduce task. //@SuppressWarnings("deprecation") //Path[] localCacheFiles = context.getLocalCacheFiles(); //OutputCommitter outputCommitter = context.getOutputCommitter(); Path workingDirectory = context.getWorkingDirectory(); //工作目录 boolean nextKeyValue = context.nextKeyValue(); //下一个键值对 //URI[] cacheFiles = context.getCacheFiles(); URI[] cacheArchives = context.getCacheArchives(); //Get cache archives set in the Configuration Path[] archiveClassPaths = context.getArchiveClassPaths();//Get the archive entries in classpath as an array of Path boolean profileEnabled = context.getProfileEnabled();//Get whether the task profiling is enabled. //String profileParams = context.getProfileParams(); @SuppressWarnings("deprecation") boolean symlink = context.getSymlink();// Originally intended to check if symlinks should be used, but currently symlinks cannot be disabled //RawComparator<?> sortComparator = context.getSortComparator(); //int hashCode = context.hashCode(); context.write(new Text("===================================================================================="), NullWritable.get()); context.write(new Text("pathname--"+pathname), NullWritable.get()); context.write(new Text("jobid--"+jobid.toString()), NullWritable.get()); context.write(new Text("keyout--"+keyout.toString()), NullWritable.get()); context.write(new Text("keyout--"+valueout), NullWritable.get()); context.write(new Text("jobname--"+jobname), NullWritable.get()); context.write(new Text("taskid--"+taskid.toString()), NullWritable.get()); context.write(new Text("progress--"+progress), NullWritable.get()); context.write(new Text("jar--"+jar.toString()), NullWritable.get()); //context.write(new Text("status--"+status), NullWritable.get()); context.write(new Text("user--"+user), NullWritable.get()); //context.write(new Text("fileTimestamps--"+fileTimestamps), NullWritable.get()); context.write(new Text("numReduceTasks--"+numReduceTasks), NullWritable.get()); //context.write(new Text("fileClassPaths--"+fileClassPaths), NullWritable.get()); context.write(new Text("configuration--"+configuration), NullWritable.get()); //context.write(new Text("groupingComparator--"+groupingComparator), NullWritable.get()); context.write(new Text("jobSetupCleanupNeeded--"+jobSetupCleanupNeeded), NullWritable.get()); context.write(new Text("maxMapAttempts--"+maxMapAttempts), NullWritable.get()); context.write(new Text("maxReduceAttempts--"+maxReduceAttempts), NullWritable.get()); //context.write(new Text("localCacheFiles--"+localCacheFiles), NullWritable.get()); //context.write(new Text("outputCommitter--"+outputCommitter), NullWritable.get()); context.write(new Text("workingDirectory--"+workingDirectory), NullWritable.get()); context.write(new Text("nextKeyValue--"+nextKeyValue), NullWritable.get()); //context.write(new Text("cacheFiles--"+cacheFiles), NullWritable.get()); context.write(new Text("cacheArchives--"+cacheArchives), NullWritable.get()); context.write(new Text("archiveClassPaths--"+archiveClassPaths), NullWritable.get()); context.write(new Text("profileEnabled--"+profileEnabled), NullWritable.get()); //context.write(new Text("profileParams--"+profileParams), NullWritable.get()); context.write(new Text("symlink--"+symlink), NullWritable.get()); //context.write(new Text("sortComparator--"+sortComparator), NullWritable.get()); //context.write(new Text("hashCode--"+hashCode), NullWritable.get()); } } }
-rw-r--r-- 2 hadoop hadoop 0 2016-03-29 17:38 /user/hadoop/libin/out1/_SUCCESS -rw-r--r-- 2 hadoop hadoop 3058 2016-03-29 17:38 /user/hadoop/libin/out1/part-m-00000 -rw-r--r-- 2 hadoop hadoop 3045 2016-03-29 17:38 /user/hadoop/libin/out1/part-m-00001
==================================================================================== pathname--inputpath2.txt jobid--job_1446086163035_21583 keyout--9 keyout--spark b jobname--GetIDMapReduce1 taskid--attempt_1446086163035_21583_m_000000_0 progress--0.13636364 jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jar user--hadoop numReduceTasks--0 configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xml jobSetupCleanupNeeded--true maxMapAttempts--4 maxReduceAttempts--4 workingDirectory--hdfs://hadoop:9000/user/hadoop nextKeyValue--true cacheArchives--null archiveClassPaths--null profileEnabled--false symlink--true ==================================================================================== pathname--inputpath2.txt jobid--job_1446086163035_21583 keyout--25 keyout--tachyon b jobname--GetIDMapReduce1 taskid--attempt_1446086163035_21583_m_000000_0 progress--0.37878788 jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jar user--hadoop numReduceTasks--0 configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xml jobSetupCleanupNeeded--true maxMapAttempts--4 maxReduceAttempts--4 workingDirectory--hdfs://hadoop:9000/user/hadoop nextKeyValue--true cacheArchives--null archiveClassPaths--null profileEnabled--false symlink--true ==================================================================================== pathname--inputpath2.txt jobid--job_1446086163035_21583 keyout--43 keyout--flume b jobname--GetIDMapReduce1 taskid--attempt_1446086163035_21583_m_000000_0 progress--0.6515151 jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jar user--hadoop numReduceTasks--0 configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xml jobSetupCleanupNeeded--true maxMapAttempts--4 maxReduceAttempts--4 workingDirectory--hdfs://hadoop:9000/user/hadoop nextKeyValue--true cacheArchives--null archiveClassPaths--null profileEnabled--false symlink--true ==================================================================================== pathname--inputpath2.txt jobid--job_1446086163035_21583 keyout--59 keyout--solr b jobname--GetIDMapReduce1 taskid--attempt_1446086163035_21583_m_000000_0 progress--0.8939394 jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jar user--hadoop numReduceTasks--0 configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xml jobSetupCleanupNeeded--true maxMapAttempts--4 maxReduceAttempts--4 workingDirectory--hdfs://hadoop:9000/user/hadoop nextKeyValue--true cacheArchives--null archiveClassPaths--null profileEnabled--false symlink--true hadoop@hadoop:/home/hadoop/blb$
==================================================================================== pathname--inputpath1.txt jobid--job_1446086163035_21583 keyout--9 keyout--spark a jobname--GetIDMapReduce1 taskid--attempt_1446086163035_21583_m_000001_0 progress--0.15517241 jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jar user--hadoop numReduceTasks--0 configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xml jobSetupCleanupNeeded--true maxMapAttempts--4 maxReduceAttempts--4 workingDirectory--hdfs://hadoop:9000/user/hadoop nextKeyValue--true cacheArchives--null archiveClassPaths--null profileEnabled--false symlink--true ==================================================================================== pathname--inputpath1.txt jobid--job_1446086163035_21583 keyout--24 keyout--hbase a jobname--GetIDMapReduce1 taskid--attempt_1446086163035_21583_m_000001_0 progress--0.41379312 jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jar user--hadoop numReduceTasks--0 configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xml jobSetupCleanupNeeded--true maxMapAttempts--4 maxReduceAttempts--4 workingDirectory--hdfs://hadoop:9000/user/hadoop nextKeyValue--true cacheArchives--null archiveClassPaths--null profileEnabled--false symlink--true ==================================================================================== pathname--inputpath1.txt jobid--job_1446086163035_21583 keyout--42 keyout--storm a jobname--GetIDMapReduce1 taskid--attempt_1446086163035_21583_m_000001_0 progress--0.7241379 jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jar user--hadoop numReduceTasks--0 configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xml jobSetupCleanupNeeded--true maxMapAttempts--4 maxReduceAttempts--4 workingDirectory--hdfs://hadoop:9000/user/hadoop nextKeyValue--true cacheArchives--null archiveClassPaths--null profileEnabled--false symlink--true ==================================================================================== pathname--inputpath1.txt jobid--job_1446086163035_21583 keyout--58 keyout-- jobname--GetIDMapReduce1 taskid--attempt_1446086163035_21583_m_000001_0 progress--1.0 jar--/tmp/hadoop-yarn/staging/hadoop/.staging/job_1446086163035_21583/job.jar user--hadoop numReduceTasks--0 configuration--Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml, job.xml jobSetupCleanupNeeded--true maxMapAttempts--4 maxReduceAttempts--4 workingDirectory--hdfs://hadoop:9000/user/hadoop nextKeyValue--false cacheArchives--null archiveClassPaths--null profileEnabled--false symlink--true