JSON(JavaScript Object Notation) 是一种轻量级的数据交换格式.
movie.txt
{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"1"}
{"movie":"661","rate":"3","timeStamp":"978302109","uid":"2"}
{"movie":"1193","rate":"3","timeStamp":"978300760","uid":"3"}
{"movie":"661","rate":"1","timeStamp":"978302109","uid":"1"}
{"movie":"1193","rate":"2","timeStamp":"978300760","uid":"2"}
{"movie":"661","rate":"4","timeStamp":"978302109","uid":"3"}
{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"3"}
{"movie":"661","rate":"1","timeStamp":"978302109","uid":"1"}
{"movie":"1193","rate":"3","timeStamp":"978300760","uid":"2"}
{"movie":"661","rate":"4","timeStamp":"978302109","uid":"1"}
{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"3"}
{"movie":"661","rate":"1","timeStamp":"978302109","uid":"1"}
{"movie":"1193","rate":"2","timeStamp":"978300760","uid":"2"}
{"movie":"661","rate":"4","timeStamp":"978302109","uid":"5"}
对应的javabean类
public class MovieBean {
private String movie ;
private double rate;
private long timeStamp;
private String uid;
}
pom.xml 添加解析工具
//fastjson 阿里的
com.alibaba
fastjson
1.2.79
//gson 谷歌的
com.google.code.gson
gson
2.8.9
代码实现
public class Test {
public static void main(String[] args) throws IOException {
//获取resources下的文件路径
String path = Test.class.getClassLoader().getResource("movie.txt").getPath();
BufferedReader br = new BufferedReader(new FileReader(path));
String line = null;
while((line = br.readLine())!=null){
//{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"1"}
//使用fastjson 将JSON字符串 封装到对象中
// MovieBean movieBean = JSON.parseObject(line, MovieBean.class);
// System.out.println(movieBean);
//使用fastjson 将对象转换为字符串
// String s = JSON.toJSONString(movieBean);
// System.out.println(s);
//使用gson工具
// Gson gson = new Gson();
// //将JSON字符串封装到对象中
// MovieBean mb = gson.fromJson(line, MovieBean.class);
System.out.println(mb);
// //将对象转换为JSON字符串
// String s = gson.toJson(mb);
// System.out.println(s);
}
}
}
// 求每部电影的平均分
import com.google.gson.Gson;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MovieAvgRate{
/*
K1: 起始位置
V1: 一行数据
K2:电影名称
V2:分数
*/
private static class MovieMapper extends Mapper{
Gson gs = new Gson();
Text k2 = new Text();
DoubleWritable v2 = new DoubleWritable();
@Override
protected void map(LongWritable key, Text value,Context context) {
try{
//将每行数据转换为对象
MovieBean movieBean = gs.fromJson(value.toString(), MovieBean.class);
String movie = movieBean.getMovie();
double rate = movieBean.getRate();
k2.set(movie);
v2.set(rate);
context.write(k2,v2);
}catch (Exception e){
System.out.println("出错行为:"+value.toString());
}
}
}
private static class MovieReduce extends Reducer{
DoubleWritable v3= new DoubleWritable();
@Override
protected void reduce(Text key, Iterable values,Context context) throws IOException, InterruptedException {
double sum = 0;
int count = 0;
for (DoubleWritable value : values) {
sum+=value.get();
count++;
}
v3.set(sum/count);
context.write(key,v3);
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
//创建任务
Job job = Job.getInstance(conf, "movie");
//设置Mapper类
job.setMapperClass(MovieMapper.class);
//设置Reduce类
job.setReducerClass(MovieReduce.class);
//设置map的输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
//设置reduce的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
//设置输入文件位置
FileInputFormat.setInputPaths(job,new Path("d:\\work\\abc\\movie.txt"));
//设置输出文件位置
FileOutputFormat.setOutputPath(job,new Path("d:\\work\\abc\\out_put3"));
//将任务提交 并等待完成
job.waitForCompletion(true);
}
}
那么MovieBean需要实现hadoop序列化
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class MovieBean implements Writable {
private String movie ;
private double rate;
private long timeStamp;
private String uid;
// get/set
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(movie);
dataOutput.writeDouble(rate);
dataOutput.writeLong(timeStamp);
dataOutput.writeUTF(uid);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
movie = dataInput.readUTF();
rate = dataInput.readDouble();
timeStamp = dataInput.readLong();
uid = dataInput.readUTF();
}
}
import com.google.gson.Gson;
import com.google.gson.JsonSyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
public class Movie {
private static class MovieMapper extends Mapper{
Gson gs = new Gson();
Text k2 = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
try {
MovieBean movieBean = gs.fromJson(value.toString(), MovieBean.class);
// System.out.println(movieBean);
String movie = movieBean.getMovie();
k2.set(movie);
context.write(k2,movieBean);
} catch (Exception e) {
e.printStackTrace();
// System.out.println("出错行:"+value.toString());
}
}
}
private static class MovieReducer extends Reducer{
Text v3 = new Text();
@Override
protected void reduce(Text key, Iterable values,Context context) throws IOException, InterruptedException {
//总分
double countRate = 0.0;
//总次数
int count = 0;
//定义集合 存储评论的人数 去掉重复
Set set = new HashSet<>();
for (MovieBean mb : values) {
System.out.println(mb);
countRate+= mb.getRate();
System.out.println(mb.getRate());
count++;
// 1 2 1 2 3 2 一个人可以对电影进行多次评论 添加到set集合中去重 集合的长度就是总人数
set.add(mb.getUid());
}
v3.set("平均分:"+countRate/count+"总人数:"+set.size());
context.write(key,v3);
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
//创建任务
Job job = Job.getInstance(conf, "movie2");
//设置Mapper类
job.setMapperClass(Movie.MovieMapper.class);
//设置Reduce类
job.setReducerClass(Movie.MovieReducer.class);
//设置map的输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(MovieBean.class);
//设置reduce的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//设置输入文件位置
FileInputFormat.setInputPaths(job,new Path("d:\\work\\abc\\movie.txt"));
//设置输出文件位置
FileOutputFormat.setOutputPath(job,new Path("d:\\work\\abc\\out_put4"));
//将任务提交 并等待完成
job.waitForCompletion(true);
}
}
package com.doit.demo08;
import com.doit.demo05.MovieBean;
import com.google.gson.Gson;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.*;
public class Movie {
private static class MovieMapper extends Mapper{
Gson gs = new Gson();
Text k2 = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
try {
MovieBean movieBean = gs.fromJson(value.toString(), MovieBean.class);
// System.out.println(movieBean);
String uid = movieBean.getUid();
k2.set(uid);
context.write(k2,movieBean);
} catch (Exception e) {
e.printStackTrace();
// System.out.println("出错行:"+value.toString());
}
}
}
private static class MovieReducer extends Reducer{
Text v3 = new Text();
@Override
protected void reduce(Text key, Iterable values,Context context) throws IOException, InterruptedException {
ArrayList list = new ArrayList<>();
for (MovieBean value : values) {
MovieBean mb = new MovieBean();
mb.setUid(value.getUid());
mb.setMovie(value.getMovie());
mb.setRate(value.getRate());
mb.setTimeStamp(value.getTimeStamp());
list.add(mb);
}
Collections.sort(list, new Comparator() {
@Override
public int compare(MovieBean o1, MovieBean o2) {
return Double.compare(o2.getRate(),o1.getRate());
}
});
for (MovieBean movieBean : list) {
Gson gs = new Gson();
v3.set(gs.toJson(movieBean));
context.write(NullWritable.get(),v3);
}
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
//创建任务
Job job = Job.getInstance(conf, "movie2");
//设置Mapper类
job.setMapperClass(MovieMapper.class);
//设置Reduce类
job.setReducerClass(MovieReducer.class);
//设置map的输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(MovieBean.class);
//设置reduce的输出类型
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
//设置输入文件位置
FileInputFormat.setInputPaths(job,new Path("d:\\work\\abc\\movie.txt"));
//设置输出文件位置
FileOutputFormat.setOutputPath(job,new Path("d:\\work\\abc\\out_put7"));
//将任务提交 并等待完成
job.waitForCompletion(true);
}
}