自定义Partitioner


自定义Partitioner_第1张图片

package com.ccse.hadoop.partitioner;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;

import com.ccse.hadoop.mapreduce.KpiWritable;

public class MobileApp {

	public static final String INPUT_PATH = "hdfs://chaoren1:9000/mobile/mobile.dat";
	public static final String OUTPUT_PATH = "hdfs://chaoren1:9000/mobileout";
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf);
		fileSystem.delete(new Path(OUTPUT_PATH), true);
		
		Job job = new Job(conf, MobileApp.class.getSimpleName());
		job.setJarByClass(MobileApp.class);
		
		FileInputFormat.setInputPaths(job, new Path(INPUT_PATH));
		
		job.setMapperClass(MyMapper.class);
		job.setPartitionerClass(HashPartitioner.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(KpiWritable.class);
		
		job.setReducerClass(MyReducer.class);
		job.setNumReduceTasks(2);    //设置2个Reduce工作
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(KpiWritable.class);
		
		FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
		
		job.waitForCompletion(true);
	}
	
	public static class MyMapper extends Mapper {
		private Text mapperKey = new Text();
		@Override
		protected void map(LongWritable key, Text value,
				Mapper.Context context)
				throws IOException, InterruptedException {
			String target = value.toString();
			String[] kpis = target.split("\t");
			mapperKey.set(kpis[1]);
			context.write(mapperKey, new KpiWritable(Long.parseLong(kpis[5]), Long.parseLong(kpis[6]), 
					Long.parseLong(kpis[7]), Long.parseLong(kpis[8])));
		}
		
	}
	
	public static class MyReducer extends Reducer {
		@Override
		protected void reduce(Text key, Iterable values,
				Reducer.Context context)
				throws IOException, InterruptedException {
			long upPackNum = 0l;
			long downPackNum = 0l;
			long upPayLoad = 0;
			long downPayLoad = 0;
			if (values != null) {
				while (values.iterator().hasNext()) {
					KpiWritable kpi = values.iterator().next();
					upPackNum += kpi.getUpPackNum();
					downPackNum += kpi.getDownPackNum();
					upPayLoad += kpi.getUpPayLoad();
					downPayLoad += kpi.getDownPayLoad();
				}
				context.write(key, new KpiWritable(upPackNum, downPackNum, upPayLoad, downPayLoad));
			}
		}
	}

}
设置默认Partitioner的代码如上所示,这样会产生2个输出文件。


package com.ccse.hadoop.partitioner;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;

import com.ccse.hadoop.mapreduce.KpiWritable;

public class MobileApp {

	public static final String INPUT_PATH = "hdfs://chaoren1:9000/mobile/mobile.dat";
	public static final String OUTPUT_PATH = "hdfs://chaoren1:9000/mobileout";
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf);
		fileSystem.delete(new Path(OUTPUT_PATH), true);
		
		Job job = new Job(conf, MobileApp.class.getSimpleName());
		job.setJarByClass(MobileApp.class);
		
		FileInputFormat.setInputPaths(job, new Path(INPUT_PATH));
		
		job.setMapperClass(MyMapper.class);
		job.setPartitionerClass(MyPartitioner.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(KpiWritable.class);
		
		job.setReducerClass(MyReducer.class);
		job.setNumReduceTasks(2);    //设置2个Reduce工作
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(KpiWritable.class);
		
		FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
		
		job.waitForCompletion(true);
	}
	
	public static class MyMapper extends Mapper {
		private Text mapperKey = new Text();
		@Override
		protected void map(LongWritable key, Text value,
				Mapper.Context context)
				throws IOException, InterruptedException {
			String target = value.toString();
			String[] kpis = target.split("\t");
			mapperKey.set(kpis[1]);
			context.write(mapperKey, new KpiWritable(Long.parseLong(kpis[5]), Long.parseLong(kpis[6]), 
					Long.parseLong(kpis[7]), Long.parseLong(kpis[8])));
		}
		
	}
	
	public static class MyReducer extends Reducer {
		@Override
		protected void reduce(Text key, Iterable values,
				Reducer.Context context)
				throws IOException, InterruptedException {
			long upPackNum = 0l;
			long downPackNum = 0l;
			long upPayLoad = 0;
			long downPayLoad = 0;
			if (values != null) {
				while (values.iterator().hasNext()) {
					KpiWritable kpi = values.iterator().next();
					upPackNum += kpi.getUpPackNum();
					downPackNum += kpi.getDownPackNum();
					upPayLoad += kpi.getUpPayLoad();
					downPayLoad += kpi.getDownPayLoad();
				}
				context.write(key, new KpiWritable(upPackNum, downPackNum, upPayLoad, downPayLoad));
			}
		}
	}
	
	public static class MyPartitioner extends Partitioner {

		@Override
		public int getPartition(Text key, KpiWritable value, int numPartitions) {
			final int length = key.toString().length();
			return length == 11 ? 0 : 1;   //如果长度为11,则为手机号,否则不是手机号
		}
		
	}

}

自定义了Partitioner后的程序,这样为手机号的记录生成在一个文件中,不为手机号的记录生成在另外一个文件中。



你可能感兴趣的:(hadoop)