package com.hadoop.reduce.model;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* 订单商品对象
* @author linhaiy
* @date 2019.05.18
*/
public class OrderInfo implements Writable, Cloneable {
// 订单号
private Integer orderId;
// 时间
private String orderDate;
// 产品编号
private String pid;
// 数量
private Integer amount;
// 产品名称
private String pname;
// 种类
private Integer categoryId;
// 价格
private Double price;
/**
* 这个字段需要理解
* 因为这个对象,包含了订单与产品的两个文件的内容,当我们加载一个文件的时候,肯定只能加载一部分的信息,另一部分是加载不到的,需要在join的时候,
* 加进去,这个字段就代表着这个对象存的是哪些信息 如果为0 则是存了订单信息 如果为1 则是存了产品信息
*/
private String flag;
public OrderInfo() {
}
@Override
public Object clone() throws CloneNotSupportedException {
return super.clone();
}
@Override
public void write(DataOutput output) throws IOException {
output.writeInt(orderId);
output.writeUTF(orderDate);
output.writeUTF(pid);
output.writeInt(amount);
output.writeUTF(pname);
output.writeInt(categoryId);
output.writeDouble(price);
output.writeUTF(flag);
}
@Override
public void readFields(DataInput input) throws IOException {
orderId = input.readInt();
orderDate = input.readUTF();
pid = input.readUTF();
amount = input.readInt();
pname = input.readUTF();
categoryId = input.readInt();
price = input.readDouble();
flag = input.readUTF();
}
public void set(Integer orderId, String orderDate, String pid, Integer amount, String pname, Integer categoryId,
Double price, String flag) {
this.orderId = orderId;
this.orderDate = orderDate;
this.pid = pid;
this.amount = amount;
this.pname = pname;
this.categoryId = categoryId;
this.price = price;
this.flag = flag;
}
public Integer getOrderId() {
return orderId;
}
public void setOrderId(Integer orderId) {
this.orderId = orderId;
}
public String getOrderDate() {
return orderDate;
}
public void setOrderDate(String orderDate) {
this.orderDate = orderDate;
}
public String getPid() {
return pid;
}
public void setPid(String pid) {
this.pid = pid;
}
public Integer getAmount() {
return amount;
}
public void setAmount(Integer amount) {
this.amount = amount;
}
public String getPname() {
return pname;
}
public void setPname(String pname) {
this.pname = pname;
}
public Integer getCategoryId() {
return categoryId;
}
public void setCategoryId(Integer categoryId) {
this.categoryId = categoryId;
}
public Double getPrice() {
return price;
}
public void setPrice(Double price) {
this.price = price;
}
public String getFlag() {
return flag;
}
public void setFlag(String flag) {
this.flag = flag;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("{");
sb.append("\"orderId\":").append(orderId);
sb.append(",\"orderDate\":\"").append(orderDate).append('\"');
sb.append(",\"pid\":").append(pid);
sb.append(",\"amount\":").append(amount);
sb.append(",\"pname\":\"").append(pname).append('\"');
sb.append(",\"categoryId\":").append(categoryId);
sb.append(",\"price\":").append(price);
sb.append(",\"flag\":\"").append(flag).append('\"');
sb.append('}');
return sb.toString();
}
}
package com.hadoop.reduce.mapper;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import com.hadoop.reduce.model.OrderInfo;
import java.io.IOException;
/**
* mapreduce 表join功能
* @author linhaiy
* @date 2019.05.18
*/
public class JoinMapper extends Mapper {
private Text text = new Text();
private OrderInfo orderInfo = new OrderInfo();
private final static String ORDER_FILE_NAME = "order";
private final static String PRODUCT_FILE_NAME = "product";
private final static String ORDER_FLAG = "0";
private final static String PRODUCT_FLAG = "1";
/**
* 读取 order.txt 内容格式 1001,20170822,p1,3 读取 product.txt 内容格式 p1,防空火箭,1,20.2
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = new String(value.getBytes(), 0, value.getLength(), "GBK");
// 跳过标题,标题带有#号
if (line.startsWith("#")) {
return;
}
//获取当前任务的输入切片,这个InputSplit是一个最上层抽象类,可以转换成FileSplit
InputSplit inputSplit = context.getInputSplit();
FileSplit fileSplit = (FileSplit) inputSplit;
// 得到的是文件名,这里根据文件名来判断是哪一种类型的数据,得到的是order或者product
String fileName = fileSplit.getPath().getName();
// 我们这里通过文件名判断是哪种数据
String pid = "";
String[] spilt = line.split(",");
if (fileName.startsWith(ORDER_FILE_NAME)) {
// 加载订单内容,订单数据里面有 订单号,时间,产品ID,数量
Integer orderId = Integer.parseInt(spilt[0]);
String orderDate = spilt[1];
pid = spilt[2];
Integer amount = Integer.parseInt(spilt[3]);
// set(Integer orderId, String orderDate, String pid, Integer amount, String pname, Integer categoryId, Double price, String flag)
orderInfo.set(orderId, orderDate, pid, amount, "", 0, 0.0, ORDER_FLAG);
} else {
// 加载产品内容,产品数据有 产品编号,产品名称,种类,价格
pid = spilt[0];
String pname = spilt[1];
Integer categoryId = Integer.parseInt(spilt[2]);
Double price = Double.valueOf(spilt[3]);
orderInfo.set(0, "", pid, 0, pname, categoryId, price, PRODUCT_FLAG);
}
text.set(pid);
context.write(text, orderInfo);
}
}
package com.hadoop.reduce.reducer;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import com.hadoop.reduce.model.OrderInfo;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* mapreduce的表join操作
* @author linhaiy
* @date 2019.05.18
*/
public class JoinReduce extends Reducer {
private final static String ORDER_FLAG = "0";
private final static String PRODUCT_FLAG = "1";
/**
* 解析mapper读取后的文件格式 产品pid orderInfo对象
* @param key
* @param values
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void reduce(Text key, Iterable values, Context context)
throws IOException, InterruptedException {
// 这个对象用来存放产品的数据,一个产品所以只有一个对象
OrderInfo product = new OrderInfo();
// 这个list用来存放所有的订单数据,订单肯定是有多个的
List list = new ArrayList<>();
// 循环map输出
for (OrderInfo info : values) {
// 判断是订单还是产品的map输出
if (ORDER_FLAG.equals(info.getFlag())) {
// 订单表数据
OrderInfo tmp = new OrderInfo();
try {
tmp = (OrderInfo) info.clone();
} catch (Exception e) {
e.printStackTrace();
}
list.add(tmp);
} else {
// 产品表数据
try {
product = (OrderInfo) info.clone();
} catch (Exception e) {
e.printStackTrace();
}
}
}
// 经过上面的操作,就把订单与产品完全分离出来了,订单在list集合中,产品在单独的一个对象中
// 然后可以分别综合设置进去
for (OrderInfo tmp : list) {
tmp.setPname(product.getPname());
tmp.setCategoryId(product.getCategoryId());
tmp.setPrice(product.getPrice());
// 最后输出
context.write(tmp, NullWritable.get());
}
}
}
package com.hadoop.reduce.service;
import java.io.IOException;
import javax.annotation.PostConstruct;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import com.hadoop.reduce.bean.StaffProvincePartitioner;
import com.hadoop.reduce.bean.WeiboInputFormat;
import com.hadoop.reduce.mapper.CounterMapper;
import com.hadoop.reduce.mapper.FriendsMapper;
import com.hadoop.reduce.mapper.JoinMapper;
import com.hadoop.reduce.mapper.StaffMap;
import com.hadoop.reduce.mapper.WeatherMap;
import com.hadoop.reduce.mapper.WeiboMapper;
import com.hadoop.reduce.mapper.WordCount;
import com.hadoop.reduce.mapper.WordCountMap;
import com.hadoop.reduce.model.GroupSortModel;
import com.hadoop.reduce.model.OrderInfo;
import com.hadoop.reduce.model.StaffModel;
import com.hadoop.reduce.model.Weibo;
import com.hadoop.reduce.reducer.FriendsReduce;
import com.hadoop.reduce.reducer.JoinReduce;
import com.hadoop.reduce.reducer.StaffReduce;
import com.hadoop.reduce.reducer.WeatherReduce;
import com.hadoop.reduce.reducer.WeiboReduce;
import com.hadoop.reduce.reducer.WordCountReduce;
import com.hadoop.util.GroupSort;
/**
* Map/Reduce工具类
* @author linhaiy
* @date 2019.05.18
*/
@Component
public class ReduceJobsUtils {
@Value("${hdfs.path}")
private String path;
private static String hdfsPath;
/**
* 获取HDFS配置信息
* @return
*/
public static Configuration getConfiguration() {
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS", hdfsPath);
configuration.set("mapred.job.tracker", hdfsPath);
// 运行在yarn的集群模式
// configuration.set("mapreduce.framework.name", "yarn");
// 这个配置是让main方法寻找该机器的mr环境
// configuration.set("yarn.resourcemanmager.hostname", "node1");
return configuration;
}
/**
* mapreduce 表join
* @param jobName
* @param inputPath
* @param outputPath
* @throws IOException
* @throws ClassNotFoundException
* @throws InterruptedException
*/
public static void join(String jobName, String inputPath, String outputPath)
throws IOException, ClassNotFoundException, InterruptedException {
Configuration config = getConfiguration();
Job job = Job.getInstance(config, jobName);
// 设置jar中的启动类,可以根据这个类找到相应的jar包
job.setJarByClass(OrderInfo.class);
job.setMapperClass(JoinMapper.class);
job.setReducerClass(JoinReduce.class);
// 设置Mapper的输出
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(OrderInfo.class);
// 设置reduce的输出
job.setOutputKeyClass(OrderInfo.class);
job.setOutputValueClass(NullWritable.class);
// 指定输入输出文件的位置
FileInputFormat.setInputPaths(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.waitForCompletion(true);
}
@PostConstruct
public void getPath() {
hdfsPath = this.path;
}
public static String getHdfsPath() {
return hdfsPath;
}
}
package com.hadoop.reduce.service;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.springframework.stereotype.Service;
import com.hadoop.hdfs.service.HdfsService;
/**
* 单词统计
* @author linhaiy
* @date 2019.05.18
*/
@Service
public class MapReduceService {
// 默认reduce输出目录
private static final String OUTPUT_PATH = "/output";
/**
* mapreduce 表join操作
* @param jobName
* @param inputPath
* @throws Exception
*/
public void join(String jobName, String inputPath) throws Exception {
if (StringUtils.isEmpty(jobName) || StringUtils.isEmpty(inputPath)) {
return;
}
// 输出目录 = output/当前Job
String outputPath = OUTPUT_PATH + "/" + jobName;
if (HdfsService.existFile(outputPath)) {
HdfsService.deleteFile(outputPath);
}
ReduceJobsUtils.join(jobName, inputPath, outputPath);
}
}
package com.hadoop.reduce.controller;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import com.hadoop.reduce.service.MapReduceService;
import com.hadoop.util.Result;
/**
* MapReduce处理控制层
* @author linhaiy
* @date 2019.05.18
*/
@RestController
@RequestMapping("/hadoop/reduce")
public class MapReduceAction {
@Autowired
MapReduceService mapReduceService;
/**
* mapreduce 表join操作
* @param jobName
* @param inputPath
* @return
* @throws Exception
*/
@RequestMapping(value = "join",method= RequestMethod.POST)
@ResponseBody
public Result join(@RequestParam("jobName") String jobName, @RequestParam("inputPath") String inputPath) throws Exception{
if (StringUtils.isEmpty(jobName) || StringUtils.isEmpty(inputPath)) {
return new Result(Result.FAILURE, "请求参数为空");
}
mapReduceService.join(jobName, inputPath);
return new Result(Result.SUCCESS, "表join操作成功");
}
}