userlogin.java
package userlogin;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import userlogin.userloginMapper;
import userlogin.userloginReducer;
import userlogin.userloginMapper;
import userlogin.userloginReducer;
import userlogin.userlogin;
import userlogin.userlogin;
public class userlogin {
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
// TODO Auto-generated method stub
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job userloginJob = Job.getInstance(conf,"word count");
//重要:指定本job所在的jar包
userloginJob.setJarByClass(userlogin.class);
//设置wordCountJob所用的mapper逻辑类为哪个类
userloginJob.setMapperClass(userloginMapper.class);
//设置wordCountJob所用的reducer逻辑类为哪个类
userloginJob.setReducerClass(userloginReducer.class);
//设置map阶段输出的kv数据类型
userloginJob.setMapOutputKeyClass(Text.class);
userloginJob.setMapOutputValueClass(IntWritable.class);
//设置最终输出的kv数据类型
userloginJob.setOutputKeyClass(Text.class);
userloginJob.setOutputValueClass(IntWritable.class);
//设置要处理的文本数据所存放的路径
FileInputFormat.setInputPaths(userloginJob, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(userloginJob, new Path(otherArgs[1]));
//提交job给hadoop集群
userloginJob.waitForCompletion(true);
}
}
userloginMapper.java
package userlogin;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class userloginMapper extends Mapper{
@Override
protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
//拿到一行文本内容,转换成String 类型
String line = value.toString();
//将这行文本切分成单词
String[] words=line.split(",");
// word[0] = Nehru
// word[1] = 2016-01-01
//输出<单词,1>
// for(String word:words){
// context.write(new Text(word), new IntWritable(1));
// }
context.write(new Text(words[0]), new IntWritable(1));
}
}
userloginReducer.java
package userlogin;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class userloginReducer extends Reducer{
@Override
/*
* reduce方法提供给reduce task进程来调用
*
* reduce task会将shuffle阶段分发过来的大量kv数据对进行聚合,聚合的机制是相同key的kv对聚合为一组
* 然后reduce task对每一组聚合kv调用一次我们自定义的reduce方法
* 比如:
* hello组会调用一次reduce方法进行处理,tom组也会调用一次reduce方法进行处理
* 调用时传递的参数:
* key:一组kv中的key
* values:一组kv中所有value的迭代器
*/
protected void reduce(Text key, Iterable values,Context context) throws IOException, InterruptedException {
//定义一个计数器
int count = 0;
//通过value这个迭代器,遍历这一组kv中所有的value,进行累加
for(IntWritable value:values){
count+=value.get();
}
//输出这个单词的统计结果
context.write(key, new IntWritable(count));
}
}
--------------------------------------------------------------------------------------
userloginsort.java
package userloginsort;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import userloginsort.userloginsortMapper;
import userloginsort.userloginsortReducer;
import userloginsort.userloginsortMapper;
import userloginsort.userloginsortReducer;
import userloginsort.userloginsort;
import userloginsort.userloginsort;
public class userloginsort {
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
// TODO Auto-generated method stub
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job userloginJob = Job.getInstance(conf,"word count");
//重要:指定本job所在的jar包
userloginJob.setJarByClass(userloginsort.class);
//设置wordCountJob所用的mapper逻辑类为哪个类
userloginJob.setMapperClass(userloginsortMapper.class);
//设置wordCountJob所用的reducer逻辑类为哪个类
userloginJob.setReducerClass(userloginsortReducer.class);
//设置map阶段输出的kv数据类型
userloginJob.setMapOutputKeyClass(IntWritable.class);
userloginJob.setMapOutputValueClass(Text.class);
//设置最终输出的kv数据类型
userloginJob.setOutputKeyClass(Text.class);
userloginJob.setOutputValueClass(IntWritable.class);
//设置要处理的文本数据所存放的路径
FileInputFormat.setInputPaths(userloginJob, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(userloginJob, new Path(otherArgs[1]));
//提交job给hadoop集群
userloginJob.waitForCompletion(true);
}
}
userloginsortMapper.java
package userloginsort;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class userloginsortMapper extends Mapper{
@Override
protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
//拿到一行文本内容,转换成String 类型
String line = value.toString();
//将这行文本切分成单词
String[] words=line.split("\t");
int logcount = Integer.parseInt(words[1]);
// word[0] = Nehru
// word[1] = 2016-01-01
//输出<单词,1>
// for(String word:words){
// context.write(new Text(word), new IntWritable(1));
// }
context.write(new IntWritable(logcount), new Text(words[0]));
}
}
userloginsortReducer.java
package userloginsort;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class userloginsortReducer extends Reducer{
@Override
/*
* reduce方法提供给reduce task进程来调用
*
* reduce task会将shuffle阶段分发过来的大量kv数据对进行聚合,聚合的机制是相同key的kv对聚合为一组
* 然后reduce task对每一组聚合kv调用一次我们自定义的reduce方法
* 比如:
* hello组会调用一次reduce方法进行处理,tom组也会调用一次reduce方法进行处理
* 调用时传递的参数:
* key:一组kv中的key
* values:一组kv中所有value的迭代器
*/
protected void reduce(IntWritable key, Iterable values,Context context) throws IOException, InterruptedException {
//定义一个计数器
// int count = 0;
//通过value这个迭代器,遍历这一组kv中所有的value,进行累加
for(Text value:values){
// count+=value.get();
// context.write(key, new IntWritable(count));
context.write(value, key);
}
//输出这个单词的统计结果
}
}
---------------------------------------------------------------
score.java
package score;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import score.score;
import score.scoreMapper;
import score.scoreReducer;
public class score {
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job scoreJob = Job.getInstance(conf,"word count");
//重要:指定本job所在的jar包
scoreJob.setJarByClass(score.class);
//设置wordCountJob所用的mapper逻辑类为哪个类
scoreJob.setMapperClass(scoreMapper.class);
//设置wordCountJob所用的reducer逻辑类为哪个类
scoreJob.setReducerClass(scoreReducer.class);
//设置map阶段输出的kv数据类型
scoreJob.setMapOutputKeyClass(Text.class);
scoreJob.setMapOutputValueClass(IntWritable.class);
//设置最终输出的kv数据类型
scoreJob.setOutputKeyClass(Text.class);
scoreJob.setOutputValueClass(IntWritable.class);
//设置要处理的文本数据所存放的路径
FileInputFormat.setInputPaths(scoreJob, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(scoreJob, new Path(otherArgs[1]));
//提交job给hadoop集群
scoreJob.waitForCompletion(true);
}
}
scoreMapper.java
package score;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class scoreMapper extends Mapper{
@Override
protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
//拿到一行文本内容,转换成String 类型
String line = value.toString();
//将这行文本切分成单词
String[] words=line.split(" ");
//transfer integer
// chinese 73
// double scor = Double.parseDouble(words[1]);
int scor = Integer.parseInt(words[1]);
context.write(new Text(words[0]), new IntWritable(scor));
}
}
scoreReducer.java
package score;
import java.io.IOException;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
//public class scoreReducer extends Reducer{
public class scoreReducer extends Reducer{
@Override
/*
* 调用时传递的参数:
* key:一组kv中的key
* values:一组kv中所有value的迭代器
*/
protected void reduce(Text key, Iterable values,Context context) throws IOException, InterruptedException {
//定义一个计数器
int count = 0;
int sum = 0;
int mmax=0;
int temp;
//通过value这个迭代器,遍历这一组kv中所有的value,进行累加
for(IntWritable value:values){
// sum += value.get();
// count++;
temp = value.get();
if (mmax < temp) {
mmax = temp;
}
}
// float avg = (float)1.0*sum/count;
// 1.0 double
//输出统计结果
// context.write(key, new FloatWritable(avg));
context.write(key, new IntWritable(mmax));
}
}
------------------------------------------------------------------------------
scoresort.java
package scoresort;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import scoresort.scoresortMapper;
import scoresort.scoresortReducer;
import scoresort.scoresortMapper;
import scoresort.scoresortReducer;
import scoresort.scoresort;
import scoresort.scoresort;
public class scoresort {
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
// TODO Auto-generated method stub
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job userloginJob = Job.getInstance(conf,"word count");
//重要:指定本job所在的jar包
userloginJob.setJarByClass(scoresort.class);
//设置wordCountJob所用的mapper逻辑类为哪个类
userloginJob.setMapperClass(scoresortMapper.class);
//设置wordCountJob所用的reducer逻辑类为哪个类
userloginJob.setReducerClass(scoresortReducer.class);
//设置map阶段输出的kv数据类型
userloginJob.setMapOutputKeyClass(FloatWritable.class);
userloginJob.setMapOutputValueClass(Text.class);
//设置最终输出的kv数据类型
userloginJob.setOutputKeyClass(Text.class);
userloginJob.setOutputValueClass(FloatWritable.class);
//设置要处理的文本数据所存放的路径
FileInputFormat.setInputPaths(userloginJob, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(userloginJob, new Path(otherArgs[1]));
//提交job给hadoop集群
userloginJob.waitForCompletion(true);
}
}
scoresortMapper.java
package scoresort;
import java.io.IOException;
import org.apache.hadoop.io.FloatWritable;
//import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class scoresortMapper extends Mapper{
@Override
protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
//拿到一行文本内容,转换成String 类型
String line = value.toString();
//将这行文本切分成单词
String[] words=line.split("\t");
// int logcount = Integer.parseInt(words[1]);
float logcount = Float.parseFloat(words[1]);
context.write(new FloatWritable(logcount), new Text(words[0]));
}
}
scoresortReducer.java
package scoresort;
import java.io.IOException;
import org.apache.hadoop.io.FloatWritable;
//import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class scoresortReducer extends Reducer{
@Override
/*
* reduce方法提供给reduce task进程来调用
*
* reduce task会将shuffle阶段分发过来的大量kv数据对进行聚合,聚合的机制是相同key的kv对聚合为一组
* 然后reduce task对每一组聚合kv调用一次我们自定义的reduce方法
* 比如:
* hello组会调用一次reduce方法进行处理,tom组也会调用一次reduce方法进行处理
* 调用时传递的参数:
* key:一组kv中的key
* values:一组kv中所有value的迭代器
*/
protected void reduce(FloatWritable key, Iterable values,Context context) throws IOException, InterruptedException {
//定义一个计数器
// int count = 0;
//通过value这个迭代器,遍历这一组kv中所有的value,进行累加
for(Text value:values){
context.write(value, key);
}
//输出这个单词的统计结果
}
}
Here I tar it to a file.
tar -czvf monthlogin.tar.gz monthlogin
End with this picture.