import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
* @author Huathy
* @date 2023-10-21 21:17
* @description 组装任务
public class WordCountJob {
public static void main(String[] args) throws Exception {
System.out.println("inputPath => " + args[0]);
System.out.println("outputPath => " + args[1]);
String path = args[0];
String path2 = args[1];
// job需要的配置参数
Configuration configuration = new Configuration();
// 创建job
Job job = Job.getInstance(configuration, "wordCountJob");
// 注意:这一行必须设置,否则在集群的时候将无法找到Job类
// 指定输入文件
FileInputFormat.setInputPaths(job, new Path(path));
FileOutputFormat.setOutputPath(job, new Path(path2));
// 指定map相关配置
// 指定reduce
// 提交任务
* @author Huathy
* @date 2023-10-21 21:39
* @description 创建自定义映射类
* 定义输入输出类型
public static class WordMap extends Mapper<LongWritable, Text, Text, LongWritable> {
* 需要实现map函数
* 这个map函数就是可以接受keyIn,valueIn,产生keyOut、ValueOut
* @param k1
* @param v1
* @param context
* @throws IOException
* @throws InterruptedException
protected void map(LongWritable k1, Text v1, Context context) throws IOException, InterruptedException {
// k1表示每行的行首偏移量,v1表示每一行的内容
// 对获取到的每一行数据进行切割,把单词切割出来
String[] words = v1.toString().split("\W");
// 迭代切割的单词数据
for (String word : words) {
// 将迭代的单词封装为的形式
Text k2 = new Text(word);
System.out.println("k2: " + k2.toString());
LongWritable v2 = new LongWritable(1);
// 将输出
context.write(k2, v2);
* @author Huathy
* @date 2023-10-21 22:08
* @description 自定义的reducer类
public static class WordReduce extends Reducer<Text, LongWritable, Text, LongWritable> {
* 针对v2s的数据进行累加求和,并且把最终的数据转为k3,v3输出
* @param k2
* @param v2s
* @param context
* @throws IOException
* @throws InterruptedException
protected void reduce(Text k2, Iterable<LongWritable> v2s, Context context) throws IOException, InterruptedException {
long sum = 0L;
for (LongWritable v2 : v2s) {
sum += v2.get();
// 组装K3,V3
LongWritable v3 = new LongWritable(sum);
System.out.println("k3: " + k2.toString() + " -- v3: " + v3.toString());
context.write(k2, v3);
[root@cent7-1 hadoop-3.2.4]# hadoop jar wc.jar WordCountJob hdfs://cent7-1:9000/hello.txt hdfs://cent7-1:9000/out /home/hadoop-3.2.4/wc.jar
inputPath => hdfs://cent7-1:9000/hello.txt
outputPath => hdfs://cent7-1:9000/out
set jar => /home/hadoop-3.2.4/wc.jar
2023-10-22 15:30:34,183 INFO client.RMProxy: Connecting to ResourceManager at /
2023-10-22 15:30:35,183 WARN mapreduce.JobResourceUploader: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
2023-10-22 15:30:35,342 INFO mapreduce.JobResourceUploader: Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/root/.staging/job_1697944187818_0010
2023-10-22 15:30:36,196 INFO input.FileInputFormat: Total input files to process : 1
2023-10-22 15:30:37,320 INFO mapreduce.JobSubmitter: number of splits:1
2023-10-22 15:30:37,694 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1697944187818_0010
2023-10-22 15:30:37,696 INFO mapreduce.JobSubmitter: Executing with tokens: []
2023-10-22 15:30:38,033 INFO conf.Configuration: resource-types.xml not found
2023-10-22 15:30:38,034 INFO resource.ResourceUtils: Unable to find 'resource-types.xml'.
2023-10-22 15:30:38,188 INFO impl.YarnClientImpl: Submitted application application_1697944187818_0010
2023-10-22 15:30:38,248 INFO mapreduce.Job: The url to track the job: http://cent7-1:8088/proxy/application_1697944187818_0010/
2023-10-22 15:30:38,249 INFO mapreduce.Job: Running job: job_1697944187818_0010
2023-10-22 15:30:51,749 INFO mapreduce.Job: Job job_1697944187818_0010 running in uber mode : false
2023-10-22 15:30:51,751 INFO mapreduce.Job: map 0% reduce 0%
2023-10-22 15:30:59,254 INFO mapreduce.Job: map 100% reduce 0%
2023-10-22 15:31:08,410 INFO mapreduce.Job: map 100% reduce 100%
2023-10-22 15:31:09,447 INFO mapreduce.Job: Job job_1697944187818_0010 completed successfully
2023-10-22 15:31:09,578 INFO mapreduce.Job: Counters: 54
[root@cent7-1 hadoop-3.2.4]#
sbin/mr-jobhistory-daemon.sh start historyserver
yarn application -list
yarn application -kill [application_Id]
# 查看正在进行的任务列表
[root@cent7-1 hadoop-3.2.4]# yarn application -list
2023-10-22 16:18:38,756 INFO client.RMProxy: Connecting to ResourceManager at /
Total number of applications (application-types: [], states: [SUBMITTED, ACCEPTED, RUNNING] and tags: []):1
Application-Id Application-Name Application-Type User Queue State Final-State Progress Tracking-URL
application_1697961350721_0002 wordCountJob MAPREDUCE root default ACCEPTED UNDEFINED 0% N/A
# 结束任务
[root@cent7-1 hadoop-3.2.4]# yarn application -kill application_1697961350721_0002
2023-10-22 16:18:55,669 INFO client.RMProxy: Connecting to ResourceManager at /
Killing application application_1697961350721_0002
2023-10-22 16:18:56,795 INFO impl.YarnClientImpl: Killed application application_1697961350721_0002