mr实现count


<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0modelVersion>

    <groupId>com.cc.pxj.wfygroupId>
    <artifactId>phoneWcRuoZeartifactId>
    <version>1.0-SNAPSHOTversion>

    <properties>
        <project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
        <maven.compiler.source>1.8maven.compiler.source>
        <maven.compiler.target>1.8maven.compiler.target>
        <hadoop.version>2.6.0-cdh5.16.2hadoop.version>
    properties>
    <repositories>
        <repository>
            <id>clouderaid>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/url>
        repository>
    repositories>

    <dependencies>
        
        
        <dependency>
            <groupId>org.apache.hadoopgroupId>
            <artifactId>hadoop-clientartifactId>
            <version>${hadoop.version}version>
        dependency>


        
        <dependency>
            <groupId>junitgroupId>
            <artifactId>junitartifactId>
            <version>4.11version>
            <scope>testscope>
        dependency>

        <dependency>
            <groupId>mysqlgroupId>
            <artifactId>mysql-connector-javaartifactId>
            <version>5.1.17version>
        dependency>

    dependencies>

    <build>
        <pluginManagement>
            <plugins>
                
                <plugin>
                    <artifactId>maven-clean-pluginartifactId>
                    <version>3.1.0version>
                plugin>
                
                <plugin>
                    <artifactId>maven-resources-pluginartifactId>
                    <version>3.0.2version>
                plugin>
                <plugin>
                    <artifactId>maven-compiler-pluginartifactId>
                    <version>3.8.0version>
                plugin>
                <plugin>
                    <artifactId>maven-surefire-pluginartifactId>
                    <version>2.22.1version>
                plugin>
                <plugin>
                    <artifactId>maven-jar-pluginartifactId>
                    <version>3.0.2version>
                plugin>
                <plugin>
                    <artifactId>maven-install-pluginartifactId>
                    <version>2.5.2version>
                plugin>
                <plugin>
                    <artifactId>maven-deploy-pluginartifactId>
                    <version>2.8.2version>
                plugin>
                
                <plugin>
                    <artifactId>maven-site-pluginartifactId>
                    <version>3.7.1version>
                plugin>
                <plugin>
                    <artifactId>maven-project-info-reports-pluginartifactId>
                    <version>3.0.0version>
                plugin>
            plugins>
        pluginManagement>
    build>
project>

java代码

package com.ccj.pxj.homework.count;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WcCountMapper extends Mapper<LongWritable,Text, Text, IntWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] words = line.split(",");
        for (String word : words) {
            context.write(new Text(word),new IntWritable(1));
        }
    }
}
package com.ccj.pxj.homework.count;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WcCountReducer extends Reducer<Text, IntWritable,Text,IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
       int sum=0;
        for (IntWritable value : values) {
             sum+=value.get();
        }
        context.write(key,new IntWritable(sum));
    }
}
package com.ccj.pxj.homework.count;

import com.ccj.pxj.phone.utils.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WcCountDriver implements Tool {
    private  Configuration conf;
    @Override
    public int run(String[] args) throws Exception {
        String OutPath="wc/count/out";
        String InputPath="data/wc.txt";
        // 1. 获得 Job 对象
        Job job = Job.getInstance(getConf());
        // 2. 设置主类
        job.setJarByClass(WcCountDriver.class);
        // 3. 设置 Mapper 类
        job.setMapperClass(WcCountMapper.class);
        // 4. 不需要reduce
        job.setReducerClass(WcCountReducer.class);

        // 5. 设置 Map key-value类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        // 6. 设置输入路径
        FileUtils.deleteOutput(conf,OutPath);
        FileInputFormat.setInputPaths(job, new Path(InputPath));

        // 7 设置输出路径
        FileOutputFormat.setOutputPath(job,new Path(OutPath));

        return job.waitForCompletion(true) ? 0 : 1;

    }

    @Override
    public void setConf(Configuration conf) {
       this.conf=conf;
    }

    @Override
    public Configuration getConf() {
        return this.conf;
    }
    public static void main(String[] args) throws Exception {
        int resultCode = ToolRunner.run(new WcCountDriver(), args);
        if (resultCode == 0) {
            System.out.println("执行成功!");
        } else {
            System.out.println("执行失败!");
        }

    }
}

作者:pxj(潘陈)

你可能感兴趣的:(mr实现count)