輸入格式:年月日 空格 時分秒 TAB 溫度
inputfile:
1949-10-01 14:21:02 34c
1949-10-02 14:01:02 36c
1950-01-01 11:21:02 32c
1950-10-01 12:21:02 37c
1951-12-01 12:21:02 23c
1950-10-02 12:21:02 41c
1950-10-03 12:21:02 27c
1951-07-01 12:21:02 45c
1951-07-02 12:21:02 46c
1951-07-03 12:21:03 47c
import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WRunner { public static void main(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJobName("weather"); job.setJarByClass(WRunner.class); job.setMapperClass(WMapper.class); job.setReducerClass(WReducer.class); job.setMapOutputKeyClass(MyKey.class); job.setMapOutputValueClass(DoubleWritable.class); job.setPartitionerClass(MyPartitioner.class); job.setSortComparatorClass(MySort.class); job.setGroupingComparatorClass(MyGroup.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setNumReduceTasks(3); Path in = new Path("/home/jinzhao/mrtest/input"); FileInputFormat.setInputPaths(job, in); Path out = new Path("/home/jinzhao/mrtest/output"); FileSystem fs = FileSystem.get(conf); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); job.waitForCompletion(true); } static class WMapper extends Mapper<Text, Text, MyKey, DoubleWritable>{ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); NullWritable nw = NullWritable.get(); @Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { try { Date date = sdf.parse(key.toString()); Calendar c = Calendar.getInstance(); c.setTime(date); int year = c.get(Calendar.YEAR); int month = c.get(Calendar.MONTH); int day = c.get(Calendar.DAY_OF_MONTH); String h = value.toString().trim(); double hot = Double.parseDouble(h.substring(0, h.length()-1)); context.write(new MyKey(year, month, day, hot), new DoubleWritable(hot)); } catch (ParseException e) { e.printStackTrace(); } } } static class WReducer extends Reducer<MyKey, DoubleWritable, Text, NullWritable>{ @Override protected void reduce(MyKey key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException { int i=0; for(DoubleWritable v : values){ ++i; String msg = key.getYear() + "\t" + (key.getMonth() + 1) + "\t" + (key.getDay()+1) + "\t" + v.get(); context.write(new Text(msg), NullWritable.get()); if (i == 3) break; } } } }
import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.WritableComparable; /** * 序列化所传输的对象 */ public class MyKey implements WritableComparable<MyKey> { private int year; private int month; private int day; private double hot; public MyKey(){ super(); } public MyKey(int year, int month, int day, double hot){ this.year = year; this.month = month; this.day = day; this.hot = hot; } public int getYear() { return year; } public void setYear(int year) { this.year = year; } public int getMonth() { return month; } public void setMonth(int month) { this.month = month; } public int getDay() { return day; } public void setDay(int day) { this.day = day; } public double getHot() { return hot; } public void setHot(double hot) { this.hot = hot; } @Override public void readFields(DataInput arg0) throws IOException { this.year = arg0.readInt(); this.month = arg0.readInt(); this.hot = arg0.readDouble(); this.day = arg0.readInt(); } @Override public void write(DataOutput arg0) throws IOException { arg0.writeInt(year); arg0.writeInt(month); arg0.writeDouble(hot); arg0.writeInt(day); } /** * 判断是否是同一个对象,当对象作为key时。 */ @Override public int compareTo(MyKey arg0) { int r1 = Integer.compare(this.year, arg0.getYear()); if (r1 == 0){ int r2 = Integer.compare(this.month, arg0.getMonth()); if (r2 == 0){ return Double.compare(this.hot, arg0.getHot()); } else{ return r2; } } else return r1; } }
import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; /** * 分组,将具有相同年份和月份的MyKey作为一组,即传递给一个reduce函数进行处理。 */ public class MyGroup extends WritableComparator{ public MyGroup(){ super(MyKey.class, true); } public int compare (WritableComparable a, WritableComparable b){ MyKey k1 = (MyKey)a; MyKey k2 = (MyKey)b; int r1 = Integer.compare(k1.getYear(), k2.getYear()); if (r1 == 0){ return Integer.compare(k1.getMonth(), k2.getMonth()); } else return r1; } }
package hadoop.wheather; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; /** * 自定义的排序,先分组,再排序 */ public class MySort extends WritableComparator{ public MySort(){ super(MyKey.class, true); } public int compare (WritableComparable a, WritableComparable b){ MyKey k1 = (MyKey)a; MyKey k2 = (MyKey)b; int r1 = Integer.compare(k1.getYear(), k2.getYear()); if (r1 == 0){ int r2 = Integer.compare(k1.getMonth(), k2.getMonth()); if (r2 == 0){ return -Double.compare(k1.getHot(), k2.getHot()); } else return r2; } else return r1; } }
package hadoop.wheather; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.mapreduce.Partitioner; /** * 分区,每个分区由一个reduce进程来处理 */ public class MyPartitioner extends Partitioner<MyKey, DoubleWritable>{ @Override public int getPartition(MyKey key, DoubleWritable value, int numReduceTasks) { return(key.getYear() - 1949)%numReduceTasks; } }
格式:用戶 TAB 朋友1 空格 朋友2 空格 ...
inputfile:
小明 老王 如花 林志玲
老王 小明 凤姐
如花 小明 李刚 凤姐
林志玲 小明 李刚 凤姐 郭美美
李刚 如花 凤姐 林志玲
郭美美 凤姐 林志玲
凤姐 如花 老王 林志玲 郭美美
第一次輸出:
格式:用戶1 空格 用戶2 TAB 次數
第二次輸出:
格式:用戶 TAB 推薦1 空格 推薦2 空格...
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Friends { static class FofMapper extends Mapper<Text, Text, Fof, IntWritable>{ @Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { String user = key.toString(); String[] friends = value.toString().split(" "); for (int i = 0; i < friends.length; ++i){ context.write(new Fof(user, friends[i]), new IntWritable(0)); for (int j = i + 1; j < friends.length; ++j) context.write(new Fof(friends[i], friends[j]), new IntWritable(1)); } } } static class FofReducer extends Reducer<Fof, IntWritable, Fof, IntWritable>{ @Override protected void reduce(Fof key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; boolean flag = true; for (IntWritable i : values){ if (i.get() == 0){ flag = false; break; }else{ sum = sum + i.get(); } } if (flag) context.write(key, new IntWritable(sum)); } } public static void main(String[] args){ try { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(Friends.class); job.setJobName("friend-I"); job.setMapperClass(FofMapper.class); job.setReducerClass(FofReducer.class); job.setMapOutputKeyClass(Fof.class); job.setMapOutputValueClass(IntWritable.class); job.setInputFormatClass(KeyValueTextInputFormat.class); Path in = new Path("/home/jinzhao/mrtest/input"); FileInputFormat.setInputPaths(job, in); Path out = new Path("/home/jinzhao/mrtest/output"); FileSystem fs = FileSystem.get(conf); if (fs.exists(out)) fs.delete(out, true); FileOutputFormat.setOutputPath(job, out); if ( job.waitForCompletion(true)){ Job job2 = Job.getInstance(conf); job2.setJarByClass(Friends.class); job2.setJobName("friend-II"); job2.setMapperClass(SortMapper.class); job2.setReducerClass(SortReducer.class); job2.setMapOutputKeyClass(User.class); job2.setMapOutputValueClass(User.class); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setSortComparatorClass(FSort.class); job2.setGroupingComparatorClass(FGroup.class); Path in2 = new Path("/home/jinzhao/mrtest/output"); FileInputFormat.setInputPaths(job2, in2); Path out2 = new Path("/home/jinzhao/mrtest/output2"); if (fs.exists(out2)) fs.delete(out2, true); FileOutputFormat.setOutputPath(job2, out2); job2.waitForCompletion(true); } } catch (Exception e){ e.printStackTrace(); } } static class SortMapper extends Mapper<Text, Text, User, User>{ @Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { String[] friends = key.toString().split(" "); int count = Integer.parseInt(value.toString()); context.write(new User(friends[0], count), new User(friends[1], count)); context.write(new User(friends[1], count), new User(friends[0], count)); } } static class SortReducer extends Reducer<User, User, Text, Text>{ @Override protected void reduce(User key, Iterable<User> values, Context context) throws IOException, InterruptedException { StringBuilder sb = new StringBuilder(); for (User i : values) sb.append(i.getUsername() + "," + i.getCount() + " "); context.write(new Text(key.getUsername()), new Text(sb.toString().trim())); } } }
import org.apache.hadoop.io.Text; public class Fof extends Text{ public Fof(){ super(); } public Fof(String a, String b){ super(getFof(a, b)); } public static String getFof(String a, String b){ int r = a.compareTo(b); if (r < 0) return a + " " + b; else return b + " " + a; } }
import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.WritableComparable; public class User implements WritableComparable<User>{ private String username; private int count; public User(){} public User(String username, int count){ this.username = username; this.count = count; } public String getUsername() { return username; } public void setUsername(String username) { this.username = username; } public int getCount() { return count; } public void setCount(int count) { this.count = count; } @Override public void write(DataOutput out) throws IOException { out.writeUTF(username); out.writeInt(count); } @Override public void readFields(DataInput in) throws IOException { this.username = in.readUTF(); this.count = in.readInt(); } @Override public int compareTo(User arg0) { int c1 = this.username.compareTo(arg0.username); if (c1 == 0){ return this.count - arg0.getCount(); } else return c1; } }
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; public class FGroup extends WritableComparator{ public FGroup(){ super(User.class, true); } @Override public int compare(WritableComparable a, WritableComparable b) { User u1 = (User)a; User u2 = (User)b; return u1.getUsername().compareTo(u2.getUsername()); } }
import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; public class FSort extends WritableComparator{ public FSort(){ super(User.class, true); } @Override public int compare(WritableComparable a, WritableComparable b) { User u1 = (User)a; User u2 = (User)b; int c1 = u1.getUsername().compareTo(u2.getUsername()); if (c1==0){ return u2.getCount() - u1.getCount(); } else return c1; } }