1官网下载hadoop3.1.4.tar.gz并解压缩
下载hadoop.dll及winexe,下载链接:
https://github.com/ordinaryload/Hadoop-tools
将winexe复制到hadoop-3.1.4/bin,将hadoop.dll复制到windows/system32目录下
2编写程序,按照bili视频编写map,reduce,driver类。
也可按照此博客,使用源码示例,博客地址:
https://www.cnblogs.com/xingluo/p/9512961.html
3添加NativeIO,由于win10摒弃此函数,需自己创建。
参考此博客处理:https://blog.csdn.net/weixin_42229056/article/details/82686172
4直接运行driver的main函数。
driver:
package com.weitao.mr.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
System.setProperty("hadoop.home.dir", "C:\\Users\\asus\\Desktop\\hadoop-3.1.4\\hadoop-3.1.4");
//获取job对象
Configuration conf=new Configuration();
Job job=Job.getInstance(conf);
//设置jar位置
job.setJarByClass(WordCountDriver.class);
//关联map和reduce
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
//设置mapper阶段输出数据key和value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//设置最终数据输出的key和value类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//设置输入路径和输出路径
FileInputFormat.setInputPaths(job, new Path("C:\\Users\\asus\\Desktop\\words.txt"));
FileOutputFormat.setOutputPath(job, new Path("C:\\Users\\asus\\Desktop\\output"));
//提交job
boolean result=job.waitForCompletion(true);
System.exit(result?0:1);
}
}
reducer:
package com.weitao.mr.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WordCountReducer extends Reducer {
IntWritable v=new IntWritable();
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
//合并相同key
int sum=0;
for(IntWritable value:values)
{
sum+=value.get();
}
v.set(sum);
context.write(key,v);
}
}
mapper
package com.weitao.mr.wordcount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WordCountMapper extends Mapper {
Text k=new Text();
IntWritable v=new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//1转为string并切分
String line=value.toString();
String[] words=line.split(" ");
//循环写出
for(String word:words)
{
k.set(word);
context.write(k,v);
}
}
}
pom.xml
4.0.0
org.weitao.mr
mapreduce_start
1.0-SNAPSHOT
org.apache.logging.log4j
log4j-core
2.8.2
org.apache.hadoop
hadoop-common
3.1.4
org.apache.hadoop
hadoop-client
3.1.4
org.apache.hadoop
hadoop-hdfs
3.1.4
org.apache.hadoop
hadoop-mapreduce-client-core
3.1.4
org.apache.hadoop
hadoop-mapreduce-client-jobclient
3.1.4
provided
org.apache.hadoop
hadoop-mapreduce-client-common
3.1.4
org.apache.hive
hive-jdbc
3.1.2
org.apache.hive
hive-exec
3.1.2
org.apache.hive
hive-metastore
3.1.2
org.apache.kafka
kafka-clients
2.4.1
aliyun
https://mvnrepository.com/
org.apache.maven.plugins
maven-compiler-plugin
3.8.1
8
UTF-8
org.apache.maven.plugins
maven-shade-plugin
3.2.0
package
shade
com.ti.mr.getSingleInfo.utils.ConsumeHdfs
*:*
META-INF/*.SF
META-INF/*.DSA
META-INF/*.RSA