(1)必须实现Writable接口
(2)反序列化时,需要反射调用空参构造函数,所以必须有空参构造
public FlowBean() {
super();
}
(3)重写序列化方法
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(upFlow);
out.writeLong(downFlow);
out.writeLong(sumFlow);
}
(4)重写反序列化方法
@Override
public void readFields(DataInput in) throws IOException {
upFlow = in.readLong();
downFlow = in.readLong();
sumFlow = in.readLong();
}
(5)注意反序列化的顺序和序列化的顺序完全一致
(6)要想把结果显示在文件中,需要重写toString(),可用”\t”分开,方便后续用。
(7)如果需要将自定义的bean放在key中传输,则还需要实现Comparable接口,因为MapReduce框中的Shuffle过程要求对key必须能排序。详见后面排序案例。
@Override
public int compareTo(FlowBean o) {
// 倒序排列,从大到小
return this.sumFlow > o.getSumFlow() ? -1 : 1;
}
public class WholeFileInputformat extends FileInputFormat<Text, BytesWritable> {
@Override
protected boolean isSplitable(JobContext context, Path filename) {
return false;
}
@Override
public RecordReader<Text, BytesWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
WholeRecordReader recordReader = new WholeRecordReader();
recordReader.initialize(inputSplit,taskAttemptContext);
return recordReader;
}
}
public class WholeRecordReader extends RecordReader<Text, BytesWritable> {
FileSplit split;
Configuration configuration;
Text k = new Text();
BytesWritable v = new BytesWritable();
boolean isProgress = true;
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
//初始化
this.split = (FileSplit)inputSplit;
configuration = taskAttemptContext.getConfiguration();
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
//核心业务逻辑
if(isProgress){
byte[] buf = new byte[(int)split.getLength()];
//1.获取fs对象
Path path = split.getPath();
FileSystem fs = path.getFileSystem(configuration);
//2.获取输入流
FSDataInputStream fis = fs.open(path);
//3.拷贝
IOUtils.readFully(fis,buf,0,buf.length);
//4.封装v
v.set(buf,0,buf.length);
//5.封装k
k.set(path.toString());
//6.关闭资源
IOUtils.closeStream(fis);
isProgress = false;
return true;
}
return false;
}
@Override
public Text getCurrentKey() throws IOException, InterruptedException {
return k;
}
@Override
public BytesWritable getCurrentValue() throws IOException, InterruptedException {
return v;
}
@Override
public float getProgress() throws IOException, InterruptedException {
return 0;
}
@Override
public void close() throws IOException {
}
}
//设置输入的inputFormat
job.setInputFormatClass(WholeFileInputformat.class);
//设置输出的outputFormat
job.setOutputFormatClass(SequenceFileOutputFormat.class);
(1)自定义类继承Partitioner,重写getPartiton()方法
public class ProvincePartitione extends Partitioner<Text,FlowBean> {
@Override
public int getPartition(Text key, FlowBean value, int i) {
//key是手机号
//value是浏流量信息
//获取手机号前三位
String proPhoneNum = key.toString().substring(0, 3);
int partiton = 4;
if("136".equals(proPhoneNum)){
partiton = 0;
}else if("137".equals(proPhoneNum)){
partiton = 1;
}else if("138".equals(proPhoneNum)){
partiton = 2;
}else if("139".equals(proPhoneNum)){
partiton = 3;
}
return partiton;
}
}
(2)在Job驱动类(Driver)中,设置自定义Partitioner
job.setPartitionerClass(ProvincePartitione.class);
(3)自定义Partition后,要根据自定义Partitioner的逻辑设置相应数量的ReduceTask
job.setNumReduceTasks(5);
bean对象做为key传输,需要实现WritableComparable接口重写compareTo()方法,就可以实现排序。(注意:要排序mapper和reducer类的key类型均为Bean类型)
public class FlowBean implements WritableComparable<FlowBean> {
private long upFlow;
private long downFlow;
private long sumFlow;
public FlowBean() {
super();
}
public FlowBean(long upFlow, long downFlow) {
this.upFlow = upFlow;
this.downFlow = downFlow;
sumFlow = upFlow + downFlow;
}
@Override
public int compareTo(FlowBean bean) {
int result;
//核心比较条件判断
if(sumFlow > bean.getSumFlow()){
result = -1;
}else if (sumFlow < bean.getSumFlow()){
result = 1;
}else {
result = 0;
}
return result;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(upFlow);
out.writeLong(downFlow);
out.writeLong(sumFlow);
}
@Override
public void readFields(DataInput in) throws IOException {
upFlow = in.readLong();
downFlow = in.readLong();
sumFlow = in.readLong();
}
public long getUpFlow() {
return upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
}
public long getDownFlow() {
return downFlow;
}
public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
}
public long getSumFlow() {
return sumFlow;
}
public void setSumFlow(long sumFlow) {
this.sumFlow = sumFlow;
}
@Override
public String toString() {
return upFlow + "\t" + downFlow + "\t" + sumFlow;
}
}
在上述全排序的基础上
(1)增加自定义分区类
public class ProvincePartitioner extends Partitioner<FlowBean, Text> {
@Override
public int getPartition(FlowBean key, Text value, int numPartitions) {
// 1 获取手机号码前三位
String preNum = value.toString().substring(0, 3);
int partition = 4;
// 2 根据手机号归属地设置分区
if ("136".equals(preNum)) {
partition = 0;
}else if ("137".equals(preNum)) {
partition = 1;
}else if ("138".equals(preNum)) {
partition = 2;
}else if ("139".equals(preNum)) {
partition = 3;
}
return partition;
}
}
(2)在驱动类中添加分区类
// 加载自定义分区类
job.setPartitionerClass(ProvincePartitioner.class);
// 设置Reducetask个数
job.setNumReduceTasks(5);
6.自定义Combiner
(1)自定义一个Combiner继承Reducer,重写Reduce()方法
public class WordcountCombiner extends Reducer<Text, IntWritable, Text,IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
// 1 汇总操作
int count = 0;
for(IntWritable v :values){
count += v.get();
}
// 2 写出
context.write(key, new IntWritable(count));
}
}
(2)在Job驱动类中设置
job.setCombinerClass(WordcountCombiner.class);
(1)自定义类继承WritableComparator
(2)重写compare()方法
@Override
public int compare(WritableComparable a, WritableComparable b) {
// 比较的业务逻辑
return result;
}
(3)创建一个构造将比较对象的类传给父类(没有这一步会传空指针,报空指针异常)
protected OrderGroupingComparator() {
super(OrderBean.class, true);
}
自定义一个OutputFormat类
package outputformat;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class FilterOutputFormat extends FileOutputFormat<Text, NullWritable> {
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
return new FRecordWriter(job);
}
}
编写RecordWriter类
package outputformat;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
public class FRecordWriter extends RecordWriter<Text, NullWritable> {
FSDataOutputStream fosatguigu;
FSDataOutputStream fosother;
public FRecordWriter(TaskAttemptContext job) throws IOException {
//1获取文件系统
FileSystem fs = FileSystem.get(job.getConfiguration());
//2创建输出到atguigu.log的输出流
fosatguigu = fs.create(new Path("f:/MR_IO/output_atguigu/atguigu.log"));
//3创建输出到other.log的输出
fosother = fs.create(new Path("f:/MR_IO/output_other/other.log"));
}
@Override
public void write(Text key, NullWritable value) throws IOException, InterruptedException {
//判断key当中是否有atguigu,如果有写出到atguigu.log 如果每日有写出到other.log
if(key.toString().contains("atguigu")){
fosatguigu.write(key.toString().getBytes());
}else {
fosother.write(key.toString().getBytes());
}
}
@Override
public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
IOUtils.closeStream(fosatguigu);
IOUtils.closeStream(fosother);
}
}
编写FilterDriver类
// 要将自定义的输出格式组件设置到job中
job.setOutputFormatClass(FilterOutputFormat.class);
//压缩方法
private static void compress(String fileName, String method) throws IOException, ClassNotFoundException {
//1.获取输入流
FileInputStream fis = new FileInputStream(new File(fileName));
Class classcodec = Class.forName(method);
CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(classcodec, new Configuration());
//2.获取输出流
FileOutputStream fos = new FileOutputStream(new File(fileName + codec.getDefaultExtension()));
CompressionOutputStream cos = codec.createOutputStream(fos);
//3.流的对拷
IOUtils.copyBytes(fis, cos, 1024*1024, false);
//4.关闭资源
IOUtils.closeStream(cos);
IOUtils.closeStream(fos);
IOUtils.closeStream(fis);
}
//解压缩方法
private static void decompress(String fileName) throws IOException {
//1.压缩方式检查
CompressionCodecFactory factory = new CompressionCodecFactory(new Configuration());
CompressionCodec codec = factory.getCodec(new Path(fileName));
if(codec == null){
System.out.print("can't process!");
return;
}
//2.获取输入流
FileInputStream fis = new FileInputStream(new File(fileName));
CompressionInputStream cis = codec.createInputStream(fis);
//3.获取输出流
FileOutputStream fos = new FileOutputStream(new File(fileName + "decode"));
//4.流的对拷
IOUtils.copyBytes(cis, fos, 1024*1024, false);
//5.关闭资源
IOUtils.closeStream(fos);
IOUtils.closeStream(cis);
IOUtils.closeStream(fos);
}