|
|
|
查看 bin/hadoop dfs -ls / |
创建目录 bin/hadoop dfs -mkdir /input |
hello asiainfo
asiainfo is big
hello big
|
Map(interface) | 排序 | 汇总 | Reduce(interface) |
|
|
|
|
package com.hadooptest;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
//定义文件内容的拆分规则
public static class WordCountMapper extends Mapper
String line = null;
@Override
protected void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
line = value.toString();
String[] arry = line.split("/");
for(String s : arry){
context.write(new Text(s),new IntWritable(1));
}
}
}
//定义处理结果的方式
public static class WordCountReducer extends Reducer
@Override
protected void reduce(Text key, Iterable
throws IOException, InterruptedException {
int sum = 0;
for(IntWritable v : values){
sum += v.get();
}
context.write(key,new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
String[] arguments = new GenericOptionsParser(conf,args).getRemainingArgs();
//程序执行需要输入两个参数:输入目录、输出目录,其中输出目录不需要预先创建(如存在需要删除)
if(arguments.length!=2){
System.out.println("invalid arguments");
System.exit(2);
}
//创建调度Job
Job job = new Job(conf,"word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(WordCountMapper.class);
//定义Mapper
job.setReducerClass(WordCountReducer.class);
//定义Mapper
job.setCombinerClass(WordCountReducer.class);
//定义计算合并,与Reducer一样
job.setOutputKeyClass(Text.class);
//定义输出的Key值类型
job.setOutputValueClass(IntWritable.class);
//定义输出的Value类型
FileInputFormat.addInputPath(job, new Path(arguments[0]));
//输入目录
FileOutputFormat.setOutputPath(job,new Path(arguments[1]));
//输出目录
//执行Job
System.exit(job.waitForCompletion(true)?0:1);
}
}
|
$ bin/hadoop jar ./wordCount.jar /input /output |
$ bin/hadoop dfs -ls /output
DEPRECATED: Use of this script to execute hdfs command is deprecated.
Instead use the hdfs command for it.
Found 2 items
-rw-r--r-- 1 hadoop supergroup 0 2016-07-03 16:52 /output/_SUCCESS
-rw-r--r-- 1 hadoop supergroup 14683 2016-07-03 16:52 /output/part-r-00000
|
$ bin/hadoop dfs -cat /output/part-r-00000
DEPRECATED: Use of this script to execute hdfs command is deprecated.
Instead use the hdfs command for it.
1
asiainfo 2
big 2
hello 2
is 1
|