MapReduce案例11——影评分析3(特定电影不同年龄段平均评分)

题目:

现有如此三份数据:
1、users.dat    数据格式为:  2::M::56::16::70072
对应字段为:UserID BigInt, Gender String, Age Int, Occupation String, Zipcode String
对应字段中文解释:用户id,性别,年龄,职业,邮政编码

2、movies.dat		数据格式为: 2::Jumanji (1995)::Adventure|Children's|Fantasy
对应字段为:MovieID BigInt, Title String, Genres String
对应字段中文解释:电影ID,电影名字,电影类型

3、ratings.dat		数据格式为:  1::1193::5::978300760
对应字段为:UserID BigInt, MovieID BigInt, Rating Double, Timestamped String
对应字段中文解释:用户ID,电影ID,评分,评分时间戳

用户ID,电影ID,评分,评分时间戳,性别,年龄,职业,邮政编码,电影名字,电影类型
userid, movieId, rate, ts, gender, age, occupation, zipcode, movieName, movieType
(3)求movieid = 2116这部电影各年龄段(因为年龄就只有7个,就按这个7个分就好了)的平均影评(年龄段,评分)

分析:以影评分析2中的联合数据表作为数据源,在map阶段仅写入2116特定电影,以年龄作为key进行分组,在reduce阶段求取不同年龄段的平均评分:

代码:

/**
 * @author: lpj   
 * @date: 2018年3月16日 下午7:16:47
 * @Description:
 */
package lpj.filmCritic;

import java.io.IOException;
import java.text.DecimalFormat;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
 *
 */
public class AgeGroupRateMR {
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
//		conf.addResource("hdfs-site.xml");//使用配置文件
//		System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
		FileSystem fs = FileSystem.get(conf);//默认使用本地
		
		Job job = Job.getInstance(conf);
		job.setJarByClass(AgeGroupRateMR.class);
		job.setMapperClass(AgeGroupRateMR_Mapper.class);
		job.setReducerClass(AgeGroupRateMR_Reducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
//		
//		String inputpath = args[0];
//		String outpath = args[1];
		
		Path inputPath = new Path("d:/a/totalFilmInfos.txt");
		Path outputPath = new Path("d:/a/homework11_3");
		if (fs.exists(outputPath)) {
			fs.delete(outputPath, true);
		}
		
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);
		boolean isdone = job.waitForCompletion(true);
		System.exit(isdone ? 0 : 1);
	}
	
	public static class AgeGroupRateMR_Mapper extends Mapper{
		Text kout = new Text();
		Text valueout = new Text();
		//(3)求movieid = 2116这部电影各年龄段(因为年龄就只有7个,就按这个7个分就好了)的平均影评(年龄段,评分
		//userid, movieId, rate, ts, gender, age, occupation, zipcode, movieName, movieType
		//用户ID,电影ID,评分,评分时间戳,性别,年龄,职业,邮政编码,电影名字,电影类型
		@Override
		protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
			String [] reads = value.toString().trim().split("::");
			String movieid = reads[1];
			String age = reads[5];
			String rate = reads[2];
			if (movieid.equals("2116")) {
				kout.set(age);
				valueout.set(rate + "\t" + movieid);
				context.write(kout, valueout);
			}
		}
	}
	public static class AgeGroupRateMR_Reducer extends Reducer{
		Text kout = new Text();
		Text valueout = new Text();
		@Override
		protected void reduce(Text key, Iterable values, Context context)throws IOException, InterruptedException {
			int totalRate = 0;
			int rateNum = 0;
			double avgRate = 0;
			String movieid = "";
			for(Text text : values){
				String[] reads = text.toString().split("\t");
				totalRate += Integer.parseInt(reads[0]);
				rateNum ++;
				movieid = reads[1];//仅仅为了验证一下
			}
			avgRate = 1.0 * totalRate / rateNum;
			DecimalFormat df = new DecimalFormat("#.#");//设置评分格式
			String string = df.format(avgRate);
			String vv = string + "\t" +movieid;
			valueout.set(vv);
			context.write(key, valueout);
		}
		
	}

}

运行结果:

1	3.3	2116
18	3.4	2116
25	3.4	2116
35	3.2	2116
45	2.8	2116
50	3.3	2116
56	3.5	2116

你可能感兴趣的:(MapReduce)