mapreduce的实际应用

上一篇 | 下一篇:hadoop mapreduce…
需求:
1、求出评分最高的电影
2.评论次数最多的电影

package com.yinwx.movie;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**

  • Created by 18573 on 2018/11/8.

*/
public class MovieBean implements WritableComparable{

private String movie;
private int rate;
private int timeStamp;
private int uid;
private int coment;

public MovieBean() {
}

public MovieBean(String movie, int rate, int timeStamp, int uid,int coment) {
this.movie = movie;
this.rate = rate;
this.timeStamp = timeStamp;
this.uid = uid;
this.coment = coment;
}

public void set(String movie, String rate, String timeStamp, String uid) {
this.movie = movie;
this.rate = Integer.parseInt(rate);
this.timeStamp = Integer.parseInt(timeStamp);
this.uid = Integer.parseInt(uid);
}

public String getMovie() {
return movie;
}

public void setMovie(String movie) {
this.movie = movie;
}

public int getRate() {
return rate;
}

public void setRate(int rate) {
this.rate = rate;
}

public int getTimeStamp() {
return timeStamp;
}

public void setTimeStamp(int timeStamp) {
this.timeStamp = timeStamp;
}

public int getUid() {
return uid;
}

public void setUid(int uid) {
this.uid = uid;
}

public int getComent() {
return coment;
}

public void setComent(int coment) {
this.coment = coment;
}

public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(movie);
dataOutput.writeInt(rate);
dataOutput.writeInt(timeStamp);
dataOutput.writeInt(uid);
dataOutput.writeInt(coment);

}

public void readFields(DataInput dataInput) throws IOException {
this.movie = dataInput.readUTF();
this.rate = dataInput.readInt();
this.timeStamp = dataInput.readInt();
this.uid = dataInput.readInt();
this.coment = dataInput.readInt();

}

public int compareTo(MovieBean o) {

//return (int)(this.getTimeStamp()-o.getTimeStamp());
return o.getComent()-this.getComent();
}

@Override
public String toString() {
return “\t” + rate + “\t” + coment;
}
}

package com.yinwx.movie;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

/**

  • Created by 18573 on 2018/11/8.
    */
    public class MovieDriver {

private static class MovieMapper extends Mapper{

Text k = new Text();
MovieBean v = new MovieBean();

@Override
protected void map(LongWritable key,Text value, Context context) throws IOException, InterruptedException {

String line = value.toString().replace(""","").replace("{","").replace("}",""); //获取一行
String[] arr = line.split(",");
String moviecode = arr[0].split("?[1];
String rate = arr[1].split("?[1];
String timeStamp = arr[2].split("?[1];
String uid = arr[3].split("?[1];
k.set(moviecode);
v.set(moviecode,rate,timeStamp,uid);
context.write(k,v);

}
}

private static class MovieReducer extends Reducer{

TreeMap treeMap = new TreeMap();

@Override
protected void reduce(Text key, Iterable values, Context context)
throws IOException, InterruptedException {

int rate = 0;
int comment = 0;
for(MovieBean movie:values){
rate += movie.getRate();
comment++;
}
Text text = new Text(key.toString());
MovieBean bean = new MovieBean();
bean.setRate(rate);
bean.setComent(comment);

treeMap.put(bean,text);

}

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
Set> entrySet = treeMap.entrySet();
for(Map.Entry ent :entrySet){
context.write(ent.getValue(), ent.getKey());
}
}
}

public static void main(String[] args) throws Exception{

Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(MovieDriver.class);
job.setMapperClass(MovieMapper.class);
job.setReducerClass(MovieReducer.class);

job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(MovieBean.class);

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

//告诉框架,我们要处理的数据文件在那个路劲下
FileInputFormat.setInputPaths(job, new Path(“D://movie/input”));

Path out = new Path(“D:/movie/output”);
FileSystem fileSystem = FileSystem.get(conf);
if (fileSystem.exists(out)) {
fileSystem.delete(out, true);
}
//告诉框架,我们的处理结果要输出到什么地方
FileOutputFormat.setOutputPath(job, out);

boolean res = job.waitForCompletion(true);

System.exit(res?0:1);

}
}

你可能感兴趣的:(hadoop)