MapReduce开发环境搭建

MapReduce开发环境搭建

1官网下载hadoop3.1.4.tar.gz并解压缩

下载hadoop.dll及winexe,下载链接:

https://github.com/ordinaryload/Hadoop-tools

将winexe复制到hadoop-3.1.4/bin,将hadoop.dll复制到windows/system32目录下

2编写程序,按照bili视频编写map,reduce,driver类。

也可按照此博客,使用源码示例,博客地址:

https://www.cnblogs.com/xingluo/p/9512961.html

3添加NativeIO,由于win10摒弃此函数,需自己创建。

参考此博客处理:https://blog.csdn.net/weixin_42229056/article/details/82686172

4直接运行driver的main函数。

源码

driver:

package com.weitao.mr.wordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class WordCountDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        System.setProperty("hadoop.home.dir", "C:\\Users\\asus\\Desktop\\hadoop-3.1.4\\hadoop-3.1.4");
        //获取job对象
        Configuration conf=new Configuration();
        Job job=Job.getInstance(conf);
        //设置jar位置
        job.setJarByClass(WordCountDriver.class);
        //关联map和reduce
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);

        //设置mapper阶段输出数据key和value类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        //设置最终数据输出的key和value类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        //设置输入路径和输出路径
        FileInputFormat.setInputPaths(job, new Path("C:\\Users\\asus\\Desktop\\words.txt"));
        FileOutputFormat.setOutputPath(job, new Path("C:\\Users\\asus\\Desktop\\output"));
        //提交job
        boolean result=job.waitForCompletion(true);
        System.exit(result?0:1);
    }
}

reducer:

package com.weitao.mr.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WordCountReducer extends Reducer {
    IntWritable v=new IntWritable();
    @Override
    protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
        //合并相同key
        int sum=0;
        for(IntWritable value:values)
        {
            sum+=value.get();
        }

        v.set(sum);
        context.write(key,v);
    }
}

mapper

package com.weitao.mr.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WordCountMapper extends Mapper {
    Text k=new Text();
    IntWritable v=new IntWritable(1);
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //1转为string并切分
        String line=value.toString();
        String[] words=line.split(" ");
        //循环写出
        for(String word:words)
        {
            k.set(word);
            context.write(k,v);
        }
    }
}

pom.xml



    4.0.0

    org.weitao.mr
    mapreduce_start
    1.0-SNAPSHOT

    
        
            org.apache.logging.log4j
            log4j-core
            2.8.2
        
        
            org.apache.hadoop
            hadoop-common
            3.1.4
        
        
            org.apache.hadoop
            hadoop-client
            3.1.4
        
        
        
            org.apache.hadoop
            hadoop-hdfs
            3.1.4
        
        
            org.apache.hadoop
            hadoop-mapreduce-client-core
            3.1.4
        
        
            org.apache.hadoop
            hadoop-mapreduce-client-jobclient
            3.1.4
            provided
        
        
            org.apache.hadoop
            hadoop-mapreduce-client-common
            3.1.4
        
        
            org.apache.hive
            hive-jdbc
            3.1.2
        
        
            org.apache.hive
            hive-exec
            3.1.2
        
        
            org.apache.hive
            hive-metastore
            3.1.2
        
        
            org.apache.kafka
            kafka-clients
            2.4.1
        
    

    
        
            aliyun 
            https://mvnrepository.com/
        
    
    
        
            
                org.apache.maven.plugins
                maven-compiler-plugin
                3.8.1
                
                    8
                    8
                    UTF-8
                
            
            
                org.apache.maven.plugins
                maven-shade-plugin
                3.2.0
                
                    
                        package
                        
                            shade
                        
                        
                            
                                
                                    com.ti.mr.getSingleInfo.utils.ConsumeHdfs
                                
                            
                            
                                
                                    *:*
                                    
                                        META-INF/*.SF
                                        META-INF/*.DSA
                                        META-INF/*.RSA
                                    
                                
                            
                        
                    
                
            
        
    


你可能感兴趣的:(HADOOP,mapreduce,hadoop,大数据)