MapReduce求各年销售笔数、销售总额实验(流程+代码)

实验:求出各年销售笔数、各年销售总额

原始数据:

各字段说明如下:

字段名 类型 是否能为空 备注
PROD_ID int 产品ID
CUST_ID int 客户ID
TIME Date 日期
HANNEL_ID int 渠道ID
PROMO_ID int 促销ID
QUANTITY_SOLD int 销售的数量(件)
AMOUNT_SOLD float(10,2) 销售的总额(元)
MapReduce求各年销售笔数、销售总额实验(流程+代码)_第1张图片部分数据

Map阶段: 读取数据,k1 为偏移量、v1 为一行数据,将销售笔数和销售额封装为java bean对象,作为 v2,k2为年份(有1998, 1999, 2000, 2001);
Shuffle阶段: 有 4个年份,因此设置 4个分区,排序、规约、分组采取系统默认;
Reduce阶段: 对 shuffle 过来的新的 v2,即每个集合的 bean 对象中的销售笔数和销售额分别进行相加,得到 v3,即每个年份的销售笔数和销售总额,k3是 k2,保持不变。

MapReduce求各年销售笔数、销售总额实验(流程+代码)_第2张图片流程图

结果:

输出结果1
输出结果2
输出结果3
输出结果4


其他 MapReduce 实验:

WordCount实验

薪水分区实验

职位和薪水序列化排序实验


本次实验的代码

sale_bean代码:

package lhr.word_count.homework;

import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class sale_bean implements WritableComparable<sale_bean> {
    int QUANTITY_SOLD;
    float AMOUNT_SOLD;

    public void setQUANTITY_SOLD(int QUANTITY_SOLD) {
        this.QUANTITY_SOLD = QUANTITY_SOLD;
    }

    public int getQUANTITY_SOLD() {
        return QUANTITY_SOLD;
    }

    public float getAMOUNT_SOLD() {
        return AMOUNT_SOLD;
    }

    public void setAMOUNT_SOLD(float AMOUNT_SOLD) {
        this.AMOUNT_SOLD = AMOUNT_SOLD;
    }

    @Override
    public String toString() {
        return "销售笔数:" + QUANTITY_SOLD + "\t" + "销售总额:" + AMOUNT_SOLD;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeInt(this.QUANTITY_SOLD);
        dataOutput.writeFloat(this.AMOUNT_SOLD);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.QUANTITY_SOLD = dataInput.readInt();
        this.AMOUNT_SOLD = dataInput.readFloat();
    }

    @Override
    public int compareTo(sale_bean o) {
        return 0;
    }
}

Map代码:

package lhr.word_count.homework;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;

public class sale_Mapper extends Mapper<LongWritable, Text, Text, sale_bean> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        /*
            读取数据,k1为偏移量、v1为一行数据,将销售笔数和销售额封装为bean对象,作为v2,
            k2为年份(有1998,1999,2000,2001)
         */
        String[] mes = value.toString().split("\t");

        sale_bean bean = new sale_bean();
        bean.setQUANTITY_SOLD(Integer.parseInt(mes[5]));
        bean.setAMOUNT_SOLD(Float.parseFloat(mes[6]));

        context.write(new Text(mes[2].split("[-/]")[0]), bean);
    }
}

Partiton代码:

package lhr.word_count.homework;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class sale_Partitioner extends Partitioner<Text, sale_bean> {
    @Override
    public int getPartition(Text text, sale_bean sale_bean, int i) {
        /*
            有4年,因此设置4个分区
         */
        if (Integer.parseInt(text.toString()) == 1998) {
            return 0;
        } else if (Integer.parseInt(text.toString()) == 1999) {
            return 1;
        } else if (Integer.parseInt(text.toString()) == 2000) {
            return 2;
        } else return 3;
    }
}

Reduce代码:

package lhr.word_count.homework;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;

public class sale_Reducer extends Reducer<Text, sale_bean, Text, sale_bean> {
    @Override
    protected void reduce(Text key, Iterable<sale_bean> values, Context context) throws IOException, InterruptedException {
        /*
            对shuffle过来的新的v2,即每个集合的bean对象中的销售笔数和销售额分别进行相加,
            得到v3,即每个年份的销售笔数和销售总额
            k3是k2,不变
         */
        int count1 = 0;
        float count2 = 0;

        for (sale_bean value : values) {
            count1 += value.getQUANTITY_SOLD();
            count2 += value.getAMOUNT_SOLD();
        }

        sale_bean sale_bean = new sale_bean();
        sale_bean.setQUANTITY_SOLD(count1);
        sale_bean.setAMOUNT_SOLD(count2);

        context.write(key, sale_bean);
    }
}

Main代码:

package lhr.word_count.homework;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.net.URI;

public class sale_Main extends Configured implements Tool {

    @Override
    public int run(String[] strings) throws Exception {
        Job job = Job.getInstance(super.getConf(), "sale");
        job.setJarByClass(sale_Main.class);

        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job, new Path("file:///D:\\input3"));
//        TextInputFormat.addInputPath(job, new Path("hdfs://hadoop11:8020/sale_count"));

        job.setMapperClass(sale_Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setOutputValueClass(sale_bean.class);

        //指定分区的类
        job.setPartitionerClass(sale_Partitioner.class);

        job.setReducerClass(sale_Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(sale_bean.class);

        //设置4个区
        job.setNumReduceTasks(4);

        job.setOutputFormatClass(TextOutputFormat.class);
//        Path path = new Path("hdfs://hadoop11:8020/sale_count_result");
        Path path = new Path("file:///D:\\output3");
        TextOutputFormat.setOutputPath(job, path);

//        FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop11:8020"), super.getConf(), "root");
//        if (fileSystem.exists(path)) {
//            fileSystem.delete(path, true);
//        }

        boolean b = job.waitForCompletion(true);
        return b ? 0 : 1;
    }

    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration();
        int run = ToolRunner.run(configuration, new sale_Main(), args);
        System.exit(run);
    }
}

你可能感兴趣的:(大数据,大数据,mapreduce)