5.打开Eclipse,选择菜单"Window"-->"Preferences",在左侧找到"Hadoop Map/Reduce",
6.打开菜单"Window"中的"Show View"窗口,选择"Map/Reduce Locations":
7:在打开的"Map/Reduce Locations"面板中,点击小象图标,打开新建配置窗口:
10.打开菜单"Window"中的"Show View"窗口,找到"Project Explorer":
11.在"Project Explorer"面板中找到"DFS Locations",展开下面的菜单就可以连接上HDFS,
12.在"Project Explorer"面板中点击鼠标右键,选择新建,就可以创建"Map/Reduce"项目了:
log4j.rootLogger=debug,stdout,R log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n log4j.appender.R=org.apache.log4j.RollingFileAppender log4j.appender.R.File=mapreduce_test.log log4j.appender.R.MaxFileSize=1MB log4j.appender.R.MaxBackupIndex=1 log4j.appender.R.layout=org.apache.log4j.PatternLayout log4j.appender.R.layout.ConversionPattern=%p %t %c - %m%n log4j.logger.com.codefutures=DEBUG

1 package com.xuebusi.hadoop.mr.windows; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.Path; 5 import org.apache.hadoop.io.IntWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Job; 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 9 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 11 12 public class WordCountDriver { 13 14 public static void main(String[] args) throws Exception { 15 16 Configuration conf = new Configuration(); 17 Job job = Job.getInstance(conf); 18 19 job.setJarByClass(WordCountDriver.class); 20 21 job.setMapperClass(WordCountMapper.class); 22 job.setReducerClass(WordCountReducer.class); 23 24 job.setMapOutputKeyClass(Text.class); 25 job.setMapOutputValueClass(IntWritable.class); 26 27 job.setOutputKeyClass(Text.class); 28 job.setOutputValueClass(IntWritable.class); 29 30 job.setInputFormatClass(TextInputFormat.class); 31 32 FileInputFormat.setInputPaths(job, new Path("c:/wordcount/input")); 33 FileOutputFormat.setOutputPath(job, new Path("c:/wordcount/output")); 34 35 boolean res = job.waitForCompletion(true); 36 System.exit(res ? 0 : 1); 37 38 } 39 40 }

1 package com.xuebusi.hadoop.mr.windows; 2 3 import java.io.IOException; 4 5 import org.apache.hadoop.io.IntWritable; 6 import org.apache.hadoop.io.LongWritable; 7 import org.apache.hadoop.io.Text; 8 import org.apache.hadoop.mapreduce.Mapper; 9 10 public class WordCountMapper extends Mapper{ 11 12 @Override 13 protected void map(LongWritable key, Text value, Context context) 14 throws IOException, InterruptedException { 15 // TODO Auto-generated method stub 16 //super.map(key, value, context); 17 18 String line = value.toString(); 19 String[] words = line.split(" "); 20 21 for (String word : words) { 22 context.write(new Text(word), new IntWritable(1)); 23 } 24 } 25 }

1 package com.xuebusi.hadoop.mr.windows; 2 3 import java.io.IOException; 4 5 import org.apache.hadoop.io.IntWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Reducer; 8 9 public class WordCountReducer extends Reducer{ 10 11 @Override 12 protected void reduce(Text key, Iterable values, Context context) 13 throws IOException, InterruptedException { 14 // TODO Auto-generated method stub 15 // super.reduce(arg0, arg1, arg2); 16 17 int count = 0; 18 for (IntWritable value : values) { 19 count += value.get(); 20 } 21 22 context.write(new Text(key), new IntWritable(count)); 23 } 24 25 }
18.在Eclipse中,运行WordCountDriver类中的main方法(右键Run As-->Java Application),可以在控制台看到如下日志信息:

INFO - The url to track the job: http://localhost:8080/
INFO - Running job: job_local1553403857_0001
INFO - OutputCommitter set in config null
INFO - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
INFO - Waiting for map tasks
INFO - Starting task: attempt_local1553403857_0001_m_000000_0
INFO - ProcfsBasedProcessTree currently is supported only on Linux. 1130 INFO - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@5e2c17f7 1131 INFO - Processing split: file:/c:/wordcount/input/words.txt:0+83 1132 DEBUG - Trying map output collector class: org.apache.hadoop.mapred.MapTask$MapOutputBuffer 1133 INFO - (EQUATOR) 0 kvi 26214396(104857584) 1134 INFO - mapreduce.task.io.sort.mb: 100 1135 INFO - soft limit at 83886080 1136 INFO - bufstart = 0; bufvoid = 104857600 1137 INFO - kvstart = 26214396; length = 6553600 1138 INFO - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer 1139 INFO - 1140 INFO - Starting flush of map output 1141 INFO - Spilling map output 1142 INFO - bufstart = 0; bufend = 129; bufvoid = 104857600 1143 INFO - kvstart = 26214396(104857584); kvend = 26214352(104857408); length = 45/6553600 1144 INFO - Finished spill 0 1145 INFO - Task:attempt_local1553403857_0001_m_000000_0 is done. And is in the process of committing 1146 INFO - map 1147 INFO - Task 'attempt_local1553403857_0001_m_000000_0' done. 1148 INFO - Finishing task: attempt_local1553403857_0001_m_000000_0 1149 INFO - map task executor complete. 1150 DEBUG - Starting reduce thread pool executor. 1151 DEBUG - Max local threads: 1 1152 DEBUG - Reduce tasks to process: 1 1153 INFO - Waiting for reduce tasks 1154 INFO - Starting task: attempt_local1553403857_0001_r_000000_0 1155 DEBUG - currentIndex 0 0:0 1156 DEBUG - mapreduce.cluster.local.dir for child : /tmp/hadoop-SYJ/mapred/local/localRunner//SYJ/jobcache/job_local1553403857_0001/attempt_local1553403857_0001_r_000000_0 1157 DEBUG - using new api for output committer 1158 INFO - ProcfsBasedProcessTree currently is supported only on Linux. 1159 INFO - Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@41217e67 1160 INFO - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@25071521 1161 INFO - MergerManager: memoryLimit=1287946240, maxSingleShuffleLimit=321986560, mergeThreshold=850044544, ioSortFactor=10, memToMemMergeOutputsThreshold=10 1162 INFO - attempt_local1553403857_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events 1163 DEBUG - Got 0 map completion events from 0 1164 DEBUG - GetMapEventsThread about to sleep for 1000 1165 DEBUG - LocalFetcher 1 going to fetch: attempt_local1553403857_0001_m_000000_0 1166 DEBUG - attempt_local1553403857_0001_m_000000_0: Proceeding with shuffle since usedMemory (0) is lesser than memoryLimit (1287946240).CommitMemory is (0) 1167 INFO - localfetcher#1 about to shuffle output of map attempt_local1553403857_0001_m_000000_0 decomp: 155 len: 159 to MEMORY 1168 INFO - Read 155 bytes from map-output for attempt_local1553403857_0001_m_000000_0 1169 INFO - closeInMemoryFile -> map-output of size: 155, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->155 1170 DEBUG - map attempt_local1553403857_0001_m_000000_0 done 1 / 1 copied. 1171 INFO - EventFetcher is interrupted.. Returning 1172 INFO - 1 / 1 copied. 1173 INFO - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs 1174 INFO - Merging 1 sorted segments 1175 INFO - Down to the last merge-pass, with 1 segments left of total size: 145 bytes 1176 INFO - Merged 1 segments, 155 bytes to disk to satisfy reduce memory limit 1177 DEBUG - Disk file: /tmp/hadoop-SYJ/mapred/local/localRunner/SYJ/jobcache/job_local1553403857_0001/attempt_local1553403857_0001_r_000000_0/output/map_0.out.merged Length is 159 1178 INFO - Merging 1 files, 159 bytes from disk 1179 INFO - Merging 0 segments, 0 bytes from memory into reduce 1180 INFO - Merging 1 sorted segments 1181 INFO - Down to the last merge-pass, with 1 segments left of total size: 145 bytes 1182 INFO - 1 / 1 copied. 1183 INFO - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords 1184 INFO - Task:attempt_local1553403857_0001_r_000000_0 is done. And is in the process of committing 1185 INFO - 1 / 1 copied. 1186 INFO - Task attempt_local1553403857_0001_r_000000_0 is allowed to commit now 1187 INFO - Saved output of task 'attempt_local1553403857_0001_r_000000_0' to file:/c:/wordcount/output/_temporary/0/task_local1553403857_0001_r_000000 1188 INFO - reduce > reduce 1189 INFO - Task 'attempt_local1553403857_0001_r_000000_0' done. 1190 INFO - Finishing task: attempt_local1553403857_0001_r_000000_0 1191 INFO - reduce task executor complete. 1192 DEBUG - Merging data from DeprecatedRawLocalFileStatus{path=file:/c:/wordcount/output/_temporary/0/task_local1553403857_0001_r_000000; isDirectory=true; modification_time=1484061974082; access_time=0; owner=; group=; permission=rwxrwxrwx; isSymlink=false} to file:/c:/wordcount/output 1193 DEBUG - Merging data from DeprecatedRawLocalFileStatus{path=file:/c:/wordcount/output/_temporary/0/task_local1553403857_0001_r_000000/part-r-00000; isDirectory=false; length=62; replication=1; blocksize=33554432; modification_time=1484061974091; access_time=0; owner=; group=; permission=rw-rw-rw-; isSymlink=false} to file:/c:/wordcount/output/part-r-00000 1194 DEBUG - PrivilegedAction as:SYJ (auth:SIMPLE) from:org.apache.hadoop.fs.FileContext.getAbstractFileSystem(FileContext.java:331) 1195 DEBUG - PrivilegedAction as:SYJ (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Job.updateStatus(Job.java:323) 1196 INFO - Job job_local1553403857_0001 running in uber mode : false 1197 INFO - map 100% reduce 100% 1198 DEBUG - PrivilegedAction as:SYJ (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Job.getTaskCompletionEvents(Job.java:677) 1199 DEBUG - PrivilegedAction as:SYJ (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Job.updateStatus(Job.java:323) 1200 DEBUG - PrivilegedAction as:SYJ (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Job.updateStatus(Job.java:323) 1201 DEBUG - PrivilegedAction as:SYJ (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Job.getTaskCompletionEvents(Job.java:677) 1202 DEBUG - PrivilegedAction as:SYJ (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Job.updateStatus(Job.java:323) 1203 DEBUG - PrivilegedAction as:SYJ (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Job.updateStatus(Job.java:323) 1204 INFO - Job job_local1553403857_0001 completed successfully 1205 DEBUG - PrivilegedAction as:SYJ (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Job.getCounters(Job.java:765) 1206 INFO - Counters: 33 1207 File System Counters 1208 FILE: Number of bytes read=822 1209 FILE: Number of bytes written=505185 1210 FILE: Number of read operations=0 1211 FILE: Number of large read operations=0 1212 FILE: Number of write operations=0 1213 Map-Reduce Framework 1214 Map input records=4 1215 Map output records=12 1216 Map output bytes=129 1217 Map output materialized bytes=159 1218 Input split bytes=99 1219 Combine input records=0 1220 Combine output records=0 1221 Reduce input groups=7 1222 Reduce shuffle bytes=159 1223 Reduce input records=12 1224 Reduce output records=7 1225 Spilled Records=24 1226 Shuffled Maps =1 1227 Failed Shuffles=0 1228 Merged Map outputs=1 1229 GC time elapsed (ms)=0 1230 CPU time spent (ms)=0 1231 Physical memory (bytes) snapshot=0 1232 Virtual memory (bytes) snapshot=0 1233 Total committed heap usage (bytes)=457703424 1234 Shuffle Errors 1235 BAD_ID=0 1236 CONNECTION=0 1237 IO_ERROR=0 1238 WRONG_LENGTH=0 1239 WRONG_MAP=0 1240 WRONG_REDUCE=0 1241 File Input Format Counters 1242 Bytes Read=83 1243 File Output Format Counters 1244 Bytes Written=74 1245 DEBUG - PrivilegedAction as:SYJ (auth:SIMPLE) from:org.apache.hadoop.mapreduce.Job.updateStatus(Job.java:323)