05 MapReduce应用案例02

6、統計每個月份中,最高的三個溫度。

輸入格式:年月日 空格 時分秒 TAB 溫度

inputfile:

1949-10-01 14:21:02    34c
1949-10-02 14:01:02    36c
1950-01-01 11:21:02    32c
1950-10-01 12:21:02    37c
1951-12-01 12:21:02    23c
1950-10-02 12:21:02    41c
1950-10-03 12:21:02    27c
1951-07-01 12:21:02    45c
1951-07-02 12:21:02    46c
1951-07-03 12:21:03    47c

 
 

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class WRunner {
	public static void main(String[] args) throws Exception{
		Configuration conf = new Configuration();

		Job job = Job.getInstance(conf);
		
		job.setJobName("weather");
		job.setJarByClass(WRunner.class);
		
		job.setMapperClass(WMapper.class);
		job.setReducerClass(WReducer.class);
		
		job.setMapOutputKeyClass(MyKey.class);
		job.setMapOutputValueClass(DoubleWritable.class);
		
		job.setPartitionerClass(MyPartitioner.class);
		job.setSortComparatorClass(MySort.class);
		job.setGroupingComparatorClass(MyGroup.class);
		
		job.setInputFormatClass(KeyValueTextInputFormat.class);
		
		job.setNumReduceTasks(3);
		
		Path in = new Path("/home/jinzhao/mrtest/input");
		FileInputFormat.setInputPaths(job, in);
		
		Path out = new Path("/home/jinzhao/mrtest/output");
		FileSystem fs = FileSystem.get(conf);
		if (fs.exists(out))
			fs.delete(out, true);
		FileOutputFormat.setOutputPath(job, out);
		
		job.waitForCompletion(true);
	}
	
	static class WMapper extends Mapper<Text, Text, MyKey, DoubleWritable>{
		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
		NullWritable nw = NullWritable.get();
		
		@Override
		protected void map(Text key, Text value, Context context)
				throws IOException, InterruptedException {
			try {
				Date date = sdf.parse(key.toString());
				Calendar c = Calendar.getInstance();
				c.setTime(date);
				int year = c.get(Calendar.YEAR);
				int month = c.get(Calendar.MONTH);
				int day = c.get(Calendar.DAY_OF_MONTH);
				
				String h = value.toString().trim();
				double hot = Double.parseDouble(h.substring(0, h.length()-1));
				
				context.write(new MyKey(year, month, day, hot), new DoubleWritable(hot));
			} catch (ParseException e) {
				e.printStackTrace();
			}
		}
		
	}
	
	static class WReducer extends Reducer<MyKey, DoubleWritable, Text, NullWritable>{

		@Override
		protected void reduce(MyKey key, Iterable<DoubleWritable> values, Context context)
				throws IOException, InterruptedException {
			int i=0;
			for(DoubleWritable v : values){
				++i;
				String msg = key.getYear() + "\t" + (key.getMonth() + 1) + "\t" + (key.getDay()+1) + "\t" + v.get();
				context.write(new Text(msg), NullWritable.get());
				if (i == 3)
					break;
			}
		}

		
		
	}
}
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

/**
 * 序列化所传输的对象
 */
public class MyKey implements WritableComparable<MyKey> {
	private int year;
	private int month;
	private int day;
	private double hot;
	
	public MyKey(){
		super();
	}
	
	public MyKey(int year, int month, int day, double hot){
		this.year = year;
		this.month = month;
		this.day = day;
		this.hot = hot;
	}
	
	
	
	public int getYear() {
		return year;
	}

	public void setYear(int year) {
		this.year = year;
	}

	public int getMonth() {
		return month;
	}

	public void setMonth(int month) {
		this.month = month;
	}

	public int getDay() {
		return day;
	}

	public void setDay(int day) {
		this.day = day;
	}

	public double getHot() {
		return hot;
	}

	public void setHot(double hot) {
		this.hot = hot;
	}

	@Override
	public void readFields(DataInput arg0) throws IOException {
		this.year = arg0.readInt();
		this.month = arg0.readInt();
		this.hot = arg0.readDouble();
		this.day = arg0.readInt();
		
	}
	@Override
	public void write(DataOutput arg0) throws IOException {
		arg0.writeInt(year);
		arg0.writeInt(month);
		arg0.writeDouble(hot);	
		arg0.writeInt(day);
	}
	
	/**
	 * 判断是否是同一个对象,当对象作为key时。
	 */
	@Override
	public int compareTo(MyKey arg0) {
		int r1 = Integer.compare(this.year, arg0.getYear());
		
		if (r1 == 0){
			int r2 = Integer.compare(this.month, arg0.getMonth());
			if (r2 == 0){
				return Double.compare(this.hot, arg0.getHot());
			}
			else{
				return r2;
			}
			
		}
		else
			return r1;
	}
	
	
}

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

/**
 * 分组,将具有相同年份和月份的MyKey作为一组,即传递给一个reduce函数进行处理。
 */
public class MyGroup extends WritableComparator{
	public MyGroup(){
		super(MyKey.class, true);
	}
	
	public int compare (WritableComparable a, WritableComparable b){
		MyKey k1 = (MyKey)a;
		MyKey k2 = (MyKey)b;
		
		int r1 = Integer.compare(k1.getYear(), k2.getYear());
		if (r1 == 0){
			return Integer.compare(k1.getMonth(), k2.getMonth());
		}
		else
			return r1;
		
	}
}

package hadoop.wheather;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

/**
 * 自定义的排序,先分组,再排序
 */
public class MySort extends WritableComparator{
	public MySort(){
		super(MyKey.class, true);
	}
	
	public int compare (WritableComparable a, WritableComparable b){
		MyKey k1 = (MyKey)a;
		MyKey k2 = (MyKey)b;
		
		int r1 = Integer.compare(k1.getYear(), k2.getYear());
		if (r1 == 0){
			int r2 = Integer.compare(k1.getMonth(), k2.getMonth());
			if (r2 == 0){
				return -Double.compare(k1.getHot(), k2.getHot());
			}
			else
				return r2;
		}
		else
			return r1;
		
	}
}

package hadoop.wheather;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.mapreduce.Partitioner;

/**
 * 分区,每个分区由一个reduce进程来处理
 */
public class MyPartitioner extends Partitioner<MyKey, DoubleWritable>{

	@Override
	public int getPartition(MyKey key, DoubleWritable value, int numReduceTasks) {
		return(key.getYear() - 1949)%numReduceTasks;
	}
	
}

7、社交網路的朋友推薦算法

格式:用戶 TAB 朋友1 空格 朋友2 空格 ...

inputfile:

小明    老王 如花 林志玲
老王    小明 凤姐
如花    小明 李刚 凤姐
林志玲    小明 李刚 凤姐 郭美美
李刚    如花 凤姐 林志玲
郭美美    凤姐 林志玲
凤姐    如花 老王 林志玲 郭美美

第一次輸出:

格式:用戶1 空格 用戶2 TAB 次數

第二次輸出:

格式:用戶 TAB 推薦1 空格 推薦2 空格...

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Friends {

	static class FofMapper extends Mapper<Text, Text, Fof, IntWritable>{

		@Override
		protected void map(Text key, Text value, Context context)
				throws IOException, InterruptedException {
			String user = key.toString();
			String[] friends = value.toString().split(" ");
			for (int i = 0; i < friends.length; ++i){
				context.write(new Fof(user, friends[i]), new IntWritable(0));
				for (int j = i + 1; j < friends.length; ++j)
					context.write(new Fof(friends[i], friends[j]), new IntWritable(1));
			}
		}
	}
	
	static class FofReducer extends Reducer<Fof, IntWritable, Fof, IntWritable>{

		@Override
		protected void reduce(Fof key, Iterable<IntWritable> values, Context context)
				throws IOException, InterruptedException {
			int sum = 0;
			boolean flag = true;
			for (IntWritable i : values){
				if (i.get() == 0){
					flag = false;
					break;
				}else{
					sum = sum + i.get();
				}
			}
			
			if (flag)
				context.write(key, new IntWritable(sum));
		}
		
	}
	
	public static void main(String[] args){
		try {
			Configuration conf = new Configuration();
			
			Job job = Job.getInstance(conf);
			job.setJarByClass(Friends.class);
			job.setJobName("friend-I");
			job.setMapperClass(FofMapper.class);
			job.setReducerClass(FofReducer.class);
			job.setMapOutputKeyClass(Fof.class);
			job.setMapOutputValueClass(IntWritable.class);
			
			job.setInputFormatClass(KeyValueTextInputFormat.class);
			
			Path in = new Path("/home/jinzhao/mrtest/input");
			FileInputFormat.setInputPaths(job, in);
			
			Path out = new Path("/home/jinzhao/mrtest/output");
			FileSystem fs = FileSystem.get(conf);
			if (fs.exists(out))
				fs.delete(out, true);
			FileOutputFormat.setOutputPath(job,  out);
			
			if ( job.waitForCompletion(true)){
				Job job2 = Job.getInstance(conf);
				job2.setJarByClass(Friends.class);
				job2.setJobName("friend-II");
				job2.setMapperClass(SortMapper.class);
				job2.setReducerClass(SortReducer.class);
				job2.setMapOutputKeyClass(User.class);
				job2.setMapOutputValueClass(User.class);
				
				job2.setInputFormatClass(KeyValueTextInputFormat.class);
				
				job2.setSortComparatorClass(FSort.class);
				job2.setGroupingComparatorClass(FGroup.class);
				
				Path in2 = new Path("/home/jinzhao/mrtest/output");
				FileInputFormat.setInputPaths(job2, in2);
				
				Path out2 = new Path("/home/jinzhao/mrtest/output2");
				if (fs.exists(out2))
					fs.delete(out2, true);
				FileOutputFormat.setOutputPath(job2,  out2);
				job2.waitForCompletion(true);
			}
		} catch (Exception e){
			e.printStackTrace();
		}

	}
	
	static class SortMapper extends Mapper<Text, Text, User, User>{

		@Override
		protected void map(Text key, Text value, Context context)
				throws IOException, InterruptedException {
			String[] friends = key.toString().split(" ");
			int count = Integer.parseInt(value.toString());
			context.write(new User(friends[0], count), new User(friends[1], count));
			context.write(new User(friends[1], count), new User(friends[0], count));
		}
		
	}
	
	static class SortReducer extends Reducer<User, User, Text, Text>{

		@Override
		protected void reduce(User key, Iterable<User> values, Context context)
				throws IOException, InterruptedException {
			StringBuilder sb = new StringBuilder();
			for (User i : values)
				sb.append(i.getUsername() + "," + i.getCount() + " ");
			
			context.write(new Text(key.getUsername()), new Text(sb.toString().trim()));
		}
		
		
	}
}

import org.apache.hadoop.io.Text;

public class Fof extends Text{
	public Fof(){
		super();
	}
	
	public Fof(String a, String b){
		super(getFof(a, b));
	}
	
	public static String getFof(String a, String b){
		int r = a.compareTo(b);
		if (r < 0)
			return a + " " + b;
		else 
			return b + " " + a;
	} 
}

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;


public class User implements WritableComparable<User>{
	private String username;
	private int count;
	
	public User(){}
	public User(String username, int count){
		this.username = username;
		this.count = count;
	}

	public String getUsername() {
		return username;
	}

	public void setUsername(String username) {
		this.username = username;
	}

	public int getCount() {
		return count;
	}

	public void setCount(int count) {
		this.count = count;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeUTF(username);
		out.writeInt(count);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		this.username = in.readUTF();
		this.count = in.readInt();
	}


	@Override
	public int compareTo(User arg0) {
		int c1 = this.username.compareTo(arg0.username);
		if (c1 == 0){
			return this.count - arg0.getCount();
		} else
			return c1;
	}

}

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class FGroup extends WritableComparator{
	public FGroup(){
		super(User.class, true);
	}

	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		User u1 = (User)a;
		User u2 = (User)b;
		
		return  u1.getUsername().compareTo(u2.getUsername());

	}
	
	
}

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class FSort extends WritableComparator{
	public FSort(){
		super(User.class, true);
	}

	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		User u1 = (User)a;
		User u2 = (User)b;
		
		int c1 = u1.getUsername().compareTo(u2.getUsername());
		if (c1==0){
			return u2.getCount() - u1.getCount();
		} else
			return c1;

	}
}





你可能感兴趣的:(05 MapReduce应用案例02)