需求分析
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* 1.1通过统计乘用车辆(非营运)和商用车辆(其他)的数量和销售额分布
*/
public class CountMap extends Mapper<LongWritable, Text, IntWritable, LongWritable> {
private IntWritable intWritable = new IntWritable();
private LongWritable longWritable = new LongWritable();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().trim().split(",");
//月1 数量11
if (split != null && split.length > 11 && split[11] != null && !"".equals(split[11].trim())) {
try {
intWritable.set(Integer.parseInt(split[1]));
longWritable.set(Long.parseLong(split[11]));
context.write(intWritable, longWritable);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.logging.Logger;
/**
* @program: Hadoop_MR
* @description:
* @author: 作者
* @create: 2022-06-21 23:28
*/
public class CountCombine extends Reducer<Text, LongWritable, Text, LongWritable> {
private Logger logger = Logger.getLogger(CountCombine.class.getName());
private LongWritable res = new LongWritable();
public CountCombine() {
logger.info("CountCombine的构造方法,是单例吗?");//是
}
@Override
protected void setup(Context context) throws IOException, InterruptedException {
logger.info("CountCombine的setup执行时机");//开始一次
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
logger.info("CountCombine的cleanup执行时机");//结束一次
}
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
Long sum = new Long(0);
for (LongWritable val : values) {
sum += val.get();
}
res.set(sum);
logger.info("combine合并:" + key.toString() + ":" + res.get());
context.write(key, res);
}
}
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;
/**
* @program: Hadoop_MR
* @description:
* @author: 作者
* @create: 2022-06-21 23:34
*/
public class CountReduce extends Reducer<Text, LongWritable, Text, Text> {
private Logger logger = Logger.getLogger(CountCombine.class.getName());
Map<String, Long> map = new HashMap<>();
double all = 0;
public CountReduce() {
logger.info("CountReduce的构造方法,是单例吗?");
}
@Override
protected void setup(Context context) throws IOException, InterruptedException {
logger.info("CountReduce的setup执行时机");
}
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long sum = 0;
for (LongWritable val : values) {
sum += val.get();
}
all += sum;
map.put(key.toString(), sum);
logger.info("CountReduce的reduce:" + key.toString() + ":" + sum);
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
logger.info("CountReduce的cleanup执行时机");
Set<String> keySet = map.keySet();
for (String key : keySet) {
long value = map.get(key);
double percent = value / all;
logger.info("CountReduce的cleanup:" + key.toString() + ":" + value + "\t" + percent);
context.write(new Text(key), new Text(value + "\t" + percent));
}
}
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @program: Hadoop_MR
* @description:
* @author: 作者
* @create: 2022-06-21 23:45
*/
public class App1 {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Path input = new Path("E:\\HadoopMRData\\input");
Path output = new Path("E:\\HadoopMRData\\output");
if (args != null && args.length == 2) {
input = new Path(args[0]);
output = new Path(args[1]);
}
Configuration conf = new Configuration();
//conf.set("fs.defaultFS","hdfs://node1:8020");
/*FileSystem fs = FileSystem.get(conf);
if (fs.exists(output)) {
fs.delete(output, true);
}*/
Job job = Job.getInstance(conf, "通过统计乘用车辆(非营运)和商用车辆(其他)的数量和销售额分布");
job.setJarByClass(App1.class);
job.setMapperClass(CountMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setCombinerClass(CountCombine.class);
job.setReducerClass(CountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//job.setNumReduceTasks(2);
FileInputFormat.addInputPath(job, input);
FileOutputFormat.setOutputPath(job, output);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class CountMap extends Mapper<LongWritable, Text, IntWritable, LongWritable> {
private IntWritable intWritable = new IntWritable();
private LongWritable longWritable = new LongWritable();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().trim().split(",");
//月1 数量11
if (split != null && split.length > 11 && split[11] != null && !"".equals(split[11].trim())) {
try {
intWritable.set(Integer.parseInt(split[1]));
longWritable.set(Long.parseLong(split[11]));
context.write(intWritable, longWritable);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @program: Hadoop_MR
* @description:
* @author: 作者
* @create: 2022-06-21 23:28
*/
public class CountCombine extends Reducer<IntWritable, LongWritable, IntWritable, LongWritable> {
private LongWritable res = new LongWritable();
@Override
protected void reduce(IntWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
Long sum = new Long(0);
for (LongWritable val : values) {
sum += val.get();
}
res.set(sum);
context.write(key, res);
}
}
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* @program: Hadoop_MR
* @description:
* @author: 作者
* @create: 2022-06-21 23:34
*/
public class CountReduce extends Reducer<IntWritable, LongWritable, IntWritable, Text> {
private Map<Integer, Long> map = new HashMap<Integer, Long>();
private Long all = 0L;//总销售数
private DoubleWritable doubleWritable = new DoubleWritable();//比例
@Override
protected void reduce(IntWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
Long sum = 0L;
for (LongWritable val : values) {
sum += val.get();
}
all += sum;
map.put(key.get(), sum);
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
Set<Integer> keySet = map.keySet();
for (Integer key : keySet) {
Long value = map.get(key);
double percent = value / (double) all;
doubleWritable.set(percent);
context.write(new IntWritable(key), new Text(value + "\t" + doubleWritable));
}
}
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @program: Hadoop_MR
* @description:
* @author: 作者
* @create: 2022-06-21 23:45
*/
public class App2 {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Path input = new Path("E:\\HadoopMRData\\input");
Path output = new Path("E:\\HadoopMRData\\output");
if (args != null && args.length == 2) {
input = new Path(args[0]);
output = new Path(args[1]);
}
Configuration conf = new Configuration();
//conf.set("fs.defaultFS","hdfs://node1:8020");
/*FileSystem fs = FileSystem.get(conf);
if (fs.exists(output)) {
fs.delete(output, true);
}*/
Job job = Job.getInstance(conf, "统计山西省2013年每个月的汽车销售数量的比例,按月份排序");
job.setJarByClass(App2.class);
job.setMapperClass(CountMap.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(LongWritable.class);
job.setCombinerClass(CountCombine.class);
job.setReducerClass(CountReduce.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
//job.setNumReduceTasks(2);
FileInputFormat.addInputPath(job, input);
FileOutputFormat.setOutputPath(job, output);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class CountMap extends Mapper<LongWritable, Text, Text, LongWritable> {
@Override//map的数量由切片决定,一个map的执行顺序setup-map1-map2-cleanup
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().trim().split(",");
if (split != null && split.length > 38 && split[38] != null) {
if ("男性".equals(split[38]) || "女性".equals(split[38])) {
context.write(new Text(split[38]), new LongWritable(1));
}
}
}
}
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @program: Hadoop_MR
* @description:
* @author: 作者
* @create: 2022-06-21 23:28
*/
public class CountCombine extends Reducer<Text, LongWritable, Text, LongWritable> {
private LongWritable res = new LongWritable();
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long sum = 0L;
for (LongWritable val : values) {
sum += val.get();
}
res.set(sum);
context.write(key, res);
}
}
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* @program: Hadoop_MR
* @description:
* @author: 作者
* @create: 2022-06-21 23:34
*/
public class CountReduce extends Reducer<Text, LongWritable, Text, Text> {
private Map<String, Long> map = new HashMap<String, Long>();
private long all = 0L;//总销售数
private DoubleWritable doubleWritable = new DoubleWritable();//比例
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long sum = 0L;
for (LongWritable val : values) {
sum += val.get();
}
all += sum;
map.put(key.toString(), sum);
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
Set<String> keySet = map.keySet();
for (String key : keySet) {
long value = map.get(key);
double percent = value / (double) all;
doubleWritable.set(percent);
context.write(new Text(key), new Text(value + "\t" + doubleWritable));
}
}
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @program: Hadoop_MR
* @description:
* @author: 作者
* @create: 2022-06-21 23:45
*/
public class App3 {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Path input = new Path("E:\\HadoopMRData\\input");
Path output = new Path("E:\\HadoopMRData\\output");
if (args != null && args.length == 2) {
input = new Path(args[0]);
output = new Path(args[1]);
}
Configuration conf = new Configuration();
//conf.set("fs.defaultFS","hdfs://node1:8020");
/*FileSystem fs = FileSystem.get(conf);
if (fs.exists(output)) {
fs.delete(output, true);
}*/
Job job = Job.getInstance(conf, "统计买车的男女比例及男女对车的颜色的选择");
job.setJarByClass(App3.class);
job.setMapperClass(CountMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setCombinerClass(CountCombine.class);
job.setReducerClass(CountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//job.setNumReduceTasks(2);
FileInputFormat.addInputPath(job, input);
FileOutputFormat.setOutputPath(job, output);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class CountMap extends Mapper<LongWritable, Text, Text, LongWritable> {
@Override//map的数量由切片决定,一个map的执行顺序setup-map1-map2-cleanup
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().trim().split(",");
//所有权10、型号6和类型9
if (split != null && split.length > 10 && split[10] != null && split[6] != null && split[9] != null) {
if (!"".equals(split[10]) && !"".equals(split[6]) && !"".equals(split[9])) {
context.write(new Text(split[10] + "\t" + split[6] + "\t" + split[9]), new LongWritable(1));
}
}
}
}
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* @program: Hadoop_MR
* @description:
* @author: 作者
* @create: 2022-06-21 23:34
*/
public class CountReduce extends Reducer<Text, LongWritable, Text, Text> {
private Map<String, Long> map = new HashMap<String, Long>();
private long all = 0L;//总销售数
private DoubleWritable doubleWritable = new DoubleWritable();//比例
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long sum = 0L;
for (LongWritable val : values) {
sum += val.get();
}
all += sum;
map.put(key.toString(), sum);
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
Set<String> keySet = map.keySet();
for (String key : keySet) {
long value = map.get(key);
double percent = value / (double) all;
doubleWritable.set(percent);
context.write(new Text(key), new Text(value + "\t" + doubleWritable));
}
}
}
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @program: Hadoop_MR
* @description:
* @author: 作者
* @create: 2022-06-21 23:45
*/
public class App4 {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Path input = new Path("E:\\HadoopMRData\\input");
Path output = new Path("E:\\HadoopMRData\\output");
if (args != null && args.length == 2) {
input = new Path(args[0]);
output = new Path(args[1]);
}
Configuration conf = new Configuration();
//conf.set("fs.defaultFS","hdfs://node1:8020");
/*FileSystem fs = FileSystem.get(conf);
if (fs.exists(output)) {
fs.delete(output, true);
}*/
Job job = Job.getInstance(conf, "统计的车的所有权、型号和类型");
job.setJarByClass(App4.class);
job.setMapperClass(CountMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setGroupingComparatorClass(Count10Group.class);
job.setReducerClass(CountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, input);
FileOutputFormat.setOutputPath(job, output);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}