import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.StringTokenizer;
/**
* org.apache.hadoop.mapreduce.Mapper: 输入key,输入value,输出key,输出value
*/
public class WordCountMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// IntWritable one=new IntWritable(1);
//得到输入的每一行数据
String valueStr = value.toString();
for (String line : valueStr.split("\n")) {
StringTokenizer st = new StringTokenizer(line);
//StringTokenizer "kongge"
while (st.hasMoreTokens()){
String word= st.nextToken();
context.write(new Text(word),new IntWritable(1));
}
}
}
}
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* org.apache.hadoop.mapreduce.Reducer 输入key,输出value,输出key,输出value
* 必须匹配map的输出
*/
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> iterable, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable i : iterable) {
sum = sum + i.get();
}
context.write(key, new IntWritable(sum));
}
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static void main(String[] args){
//创建配置对象
Configuration conf=new Configuration();
try{
//创建job对象
Job job = Job.getInstance(conf, "word count");
//Configuration conf, String jobName
//设置运行job的类
job.setJarByClass(WordCount.class);
//设置mapper 类
job.setMapperClass(WordCountMapper.class);
//设置reduce 类
job.setReducerClass(WordCountReducer.class);
//设置map输出的key value
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//设置reduce 输出的 key value
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path("/test/input/friendship.txt"));
FileOutputFormat.setOutputPath(job, new Path("/test/output"));
//提交job
boolean b = job.waitForCompletion(true);
if(!b){
System.out.println("wordcount task fail!");
}
}catch (Exception e){
e.printStackTrace();
}
}
}
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Before;
import org.junit.Test;
import org.study.mr.count.WordCountMapper;
import org.study.mr.count.WordCountReducer;
import java.util.ArrayList;
import java.util.List;
/**
* @author
* @date 2018-12-02 10:02:00
*/
public class WordCountMRTest {
public static final String inputTxt = "hello world\n" +
"hello hadoops\n" +
"hello sunwei\n";
MapDriver<LongWritable, Text, Text, IntWritable> mapDriver;
ReduceDriver<Text, IntWritable, Text, IntWritable> reduceDriver;
MapReduceDriver<LongWritable, Text, Text, IntWritable, Text, IntWritable> mapReduceDriver;
@Before
public void setUp() throws Exception {
WordCountMapper mapper = new WordCountMapper();
WordCountReducer reducer = new WordCountReducer();
mapDriver = MapDriver.newMapDriver(mapper);
reduceDriver = ReduceDriver.newReduceDriver(reducer);
mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer);
}
@Test
public void testMapper() {
mapDriver.withInput(new LongWritable(), new Text("hello"));
mapDriver.withOutput(new Text("hello"), new IntWritable(1));
mapDriver.runTest();
}
@Test
public void testReducer() {
List<IntWritable> values = new ArrayList<IntWritable>();
values.add(new IntWritable(1));
values.add(new IntWritable(1));
reduceDriver.withInput(new Text("hello"), values);
reduceDriver.withOutput(new Text("hello"), new IntWritable(2));
reduceDriver.runTest();
}
@Test
public void testMapReducer() {
mapReduceDriver.withInput(new LongWritable(), new Text("hello"));
List<IntWritable> values = new ArrayList<IntWritable>();
values.add(new IntWritable(1));
mapReduceDriver.withOutput(new Text("hello"), new IntWritable(1));
mapReduceDriver.runTest();
}
}
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0modelVersion>
<groupId>mr-studygroupId>
<artifactId>mr-studyartifactId>
<version>1.0-SNAPSHOTversion>
<properties>
<hadoopVersion>2.8.5hadoopVersion>
properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-commonartifactId>
<version>${hadoopVersion}version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-hdfsartifactId>
<version>${hadoopVersion}version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-mapreduce-client-coreartifactId>
<version>${hadoopVersion}version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-clientartifactId>
<version>${hadoopVersion}version>
dependency>
<dependency>
<groupId>org.apache.mrunitgroupId>
<artifactId>mrunitartifactId>
<version>0.9.0-incubatingversion>
<classifier>hadoop2classifier>
dependency>
<dependency>
<groupId>junitgroupId>
<artifactId>junitartifactId>
<version>4.12version>
<scope>testscope>
dependency>
dependencies>
<build>
<finalName>WordCountfinalName>
<plugins>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-compiler-pluginartifactId>
<version>3.6.0version>
<configuration>
<source>1.8source>
<target>1.8target>
configuration>
plugin>
<plugin>
<artifactId>maven-assembly-pluginartifactId>
<version>2.4version>
<configuration>
<archive>
<manifest>
<mainClass>org.study.mr.count.WordCountmainClass>
manifest>
archive>
<descriptorRefs>
<descriptorRef>jar-with-dependenciesdescriptorRef>
descriptorRefs>
configuration>
<executions>
<execution>
<id>make-assemblyid>
<phase>packagephase>
<goals>
<goal>singlegoal>
goals>
execution>
executions>
plugin>
plugins>
<resources>
<resource>
<directory>src/main/javadirectory>
<includes>
<include>**/*.propertiesinclude>
includes>
resource>
<resource>
<directory>src/main/resourcesdirectory>
resource>
<resource>
<directory>resourcesdirectory>
resource>
resources>
build>
project>
#编译
mvn clean package
# 向hadoop提交任务
hadoop jar WordCount-jar-with-dependencies.jar
/test/input/friendship.txt
On Friendship
And a youth said, "Speak to us of Friendship."
Your friend is your needs answered.
He is your field which you sow with love and reap with thanksgiving.
And he is your board and your fireside.
For you come to him with your hunger, and you seek him for peace.
When your friend speaks his mind you fear not the "nay" in your own mind, nor do you withhold the "ay."
And when he is silent your heart ceases not to listen to his heart;
For without words, in friendship, all thoughts, all desires, all expectations are born and shared, with joy that is unacclaimed.
When you part from your friend, you grieve not;
For that which you love most in him may be clearer in his absence, as the mountain to the climber is clearer from the plain.
And let there be no purpose in friendship save the deepening of the spirit.
For love that seeks aught but the disclosure of its own mystery is not love but a net cast forth: and only the unprofitable is caught.
And let your best be for your friend.
If he must know the ebb of your tide, let him know its flood also.
For what is your friend that you should seek him with hours to kill?
Seek him always with hours to live.
For it is his to fill your need, but not your emptiness.
And in the sweetness of friendship let there be laughter, and sharing of pleasures.
For in the dew of little things the heart finds its morning and is refreshed.
Friendship 5
....省略
# 创建目录
hdfs dfs -mkdir -p /test/input
# 上传文件
hdfs dfs -put 本地路径 /test/input
输出文件存在请删除
# 错误org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory hdfs://master:9000/test/output already exists
# 执行删除
hdfs dfs -rm -r /test/output
查看是否存在
dfs dfs -ls /test/input/
hdfs dfs -cat /test/input/friendship.txt
Exception in thread "main" java.io.IOException: Mkdirs failed to create /var/folders/_p/2ws43j_d0rn32ptggdc089440000gn/T/hadoop-unjar7467945055400525315/META-INF/license
at org.apache.hadoop.util.RunJar.ensureDirectory(RunJar.java:145)
at org.apache.hadoop.util.RunJar.unJar(RunJar.java:110)
at org.apache.hadoop.util.RunJar.unJar(RunJar.java:85)
at org.apache.hadoop.util.RunJar.run(RunJar.java:227)
at org.apache.hadoop.util.RunJar.main(RunJar.java:153)
# 删除LICENSE
zip -d WordCount-jar-with-dependencies.jar META-INF/LICENSE
zip -d WordCount-jar-with-dependencies.jar LICENSE