Hadoop -- UserLogin Statistical data

Hadoop -- UserLogin Statistical data_第1张图片

Hadoop -- UserLogin Statistical data_第2张图片

userlogin.java

package userlogin;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import userlogin.userloginMapper;
import userlogin.userloginReducer;
import userlogin.userloginMapper;
import userlogin.userloginReducer;
import userlogin.userlogin;
import userlogin.userlogin;

public class userlogin {

	public static void main(String[] args) throws Exception{
		// TODO Auto-generated method stub
		// TODO Auto-generated method stub
				Configuration conf = new Configuration();
				String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
				Job userloginJob = Job.getInstance(conf,"word count");
				
				//重要:指定本job所在的jar包
				userloginJob.setJarByClass(userlogin.class);
				
				//设置wordCountJob所用的mapper逻辑类为哪个类
				userloginJob.setMapperClass(userloginMapper.class);
				//设置wordCountJob所用的reducer逻辑类为哪个类
				userloginJob.setReducerClass(userloginReducer.class);
				
				//设置map阶段输出的kv数据类型
				userloginJob.setMapOutputKeyClass(Text.class);
				userloginJob.setMapOutputValueClass(IntWritable.class);
				
				//设置最终输出的kv数据类型
				userloginJob.setOutputKeyClass(Text.class);
				userloginJob.setOutputValueClass(IntWritable.class);
				
				//设置要处理的文本数据所存放的路径
				FileInputFormat.setInputPaths(userloginJob, new Path(otherArgs[0]));
				FileOutputFormat.setOutputPath(userloginJob, new Path(otherArgs[1]));
				
				//提交job给hadoop集群
				userloginJob.waitForCompletion(true);
	}
}

Hadoop -- UserLogin Statistical data_第3张图片

userloginMapper.java

package userlogin;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;

public class userloginMapper extends Mapper{
	@Override
	protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
		//拿到一行文本内容,转换成String 类型
		String line = value.toString();
		//将这行文本切分成单词
		String[] words=line.split(",");
		
//		word[0] = Nehru
//		word[1] = 2016-01-01
		
		//输出<单词,1>
//		for(String word:words){
//			context.write(new Text(word), new IntWritable(1));
//		}
		context.write(new Text(words[0]), new IntWritable(1));
	}
}

Hadoop -- UserLogin Statistical data_第4张图片

userloginReducer.java

package userlogin;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;

public class userloginReducer extends Reducer{
	@Override
	/*
	 * reduce方法提供给reduce task进程来调用
	 * 
	 * reduce task会将shuffle阶段分发过来的大量kv数据对进行聚合,聚合的机制是相同key的kv对聚合为一组
	 * 然后reduce task对每一组聚合kv调用一次我们自定义的reduce方法
	 * 比如:
	 *  hello组会调用一次reduce方法进行处理,tom组也会调用一次reduce方法进行处理
	 *  调用时传递的参数:
	 *  		key:一组kv中的key
	 *  		values:一组kv中所有value的迭代器
	 */
	protected void reduce(Text key, Iterable values,Context context) throws IOException, InterruptedException {
		//定义一个计数器
		int count = 0;
		//通过value这个迭代器,遍历这一组kv中所有的value,进行累加
		for(IntWritable value:values){
			count+=value.get();
		}
		
		//输出这个单词的统计结果
		context.write(key, new IntWritable(count));
	}
}

--------------------------------------------------------------------------------------

userloginsort.java

package userloginsort;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import userloginsort.userloginsortMapper;
import userloginsort.userloginsortReducer;
import userloginsort.userloginsortMapper;
import userloginsort.userloginsortReducer;
import userloginsort.userloginsort;
import userloginsort.userloginsort;

public class userloginsort {

	public static void main(String[] args) throws Exception{
		// TODO Auto-generated method stub
		// TODO Auto-generated method stub
				Configuration conf = new Configuration();
				String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
				Job userloginJob = Job.getInstance(conf,"word count");
				
				//重要:指定本job所在的jar包
				userloginJob.setJarByClass(userloginsort.class);
				
				//设置wordCountJob所用的mapper逻辑类为哪个类
				userloginJob.setMapperClass(userloginsortMapper.class);
				//设置wordCountJob所用的reducer逻辑类为哪个类
				userloginJob.setReducerClass(userloginsortReducer.class);
				
				//设置map阶段输出的kv数据类型
				userloginJob.setMapOutputKeyClass(IntWritable.class);
				userloginJob.setMapOutputValueClass(Text.class);
				
				//设置最终输出的kv数据类型
				userloginJob.setOutputKeyClass(Text.class);
				userloginJob.setOutputValueClass(IntWritable.class);
				
				//设置要处理的文本数据所存放的路径
				FileInputFormat.setInputPaths(userloginJob, new Path(otherArgs[0]));
				FileOutputFormat.setOutputPath(userloginJob, new Path(otherArgs[1]));
				
				//提交job给hadoop集群
				userloginJob.waitForCompletion(true);
	}
}

userloginsortMapper.java

package userloginsort;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;

public class userloginsortMapper extends Mapper{
	@Override
	protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
		//拿到一行文本内容,转换成String 类型
		String line = value.toString();
		//将这行文本切分成单词
		String[] words=line.split("\t");
		int logcount = Integer.parseInt(words[1]);
		
//		word[0] = Nehru
//		word[1] = 2016-01-01
		
		//输出<单词,1>
//		for(String word:words){
//			context.write(new Text(word), new IntWritable(1));
//		}
		context.write(new IntWritable(logcount), new Text(words[0]));
	}
}

userloginsortReducer.java

package userloginsort;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;

public class userloginsortReducer extends Reducer{
	@Override
	/*
	 * reduce方法提供给reduce task进程来调用
	 * 
	 * reduce task会将shuffle阶段分发过来的大量kv数据对进行聚合,聚合的机制是相同key的kv对聚合为一组
	 * 然后reduce task对每一组聚合kv调用一次我们自定义的reduce方法
	 * 比如:
	 *  hello组会调用一次reduce方法进行处理,tom组也会调用一次reduce方法进行处理
	 *  调用时传递的参数:
	 *  		key:一组kv中的key
	 *  		values:一组kv中所有value的迭代器
	 */
	protected void reduce(IntWritable key, Iterable values,Context context) throws IOException, InterruptedException {
		//定义一个计数器
//		int count = 0;
		//通过value这个迭代器,遍历这一组kv中所有的value,进行累加
		for(Text value:values){
//			count+=value.get();
//			context.write(key, new IntWritable(count));
			context.write(value, key);
		}
		
		//输出这个单词的统计结果
		
	}
}

---------------------------------------------------------------

score.java

package score;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import score.score;
import score.scoreMapper;
import score.scoreReducer;

public class score {

	public static void main(String[] args) throws Exception{

			Configuration conf = new Configuration();
			String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
			Job scoreJob = Job.getInstance(conf,"word count");
			
			//重要:指定本job所在的jar包
			scoreJob.setJarByClass(score.class);
			
			//设置wordCountJob所用的mapper逻辑类为哪个类
			scoreJob.setMapperClass(scoreMapper.class);
			//设置wordCountJob所用的reducer逻辑类为哪个类
			scoreJob.setReducerClass(scoreReducer.class);
			
			//设置map阶段输出的kv数据类型
			scoreJob.setMapOutputKeyClass(Text.class);
			scoreJob.setMapOutputValueClass(IntWritable.class);
			
			//设置最终输出的kv数据类型
			scoreJob.setOutputKeyClass(Text.class);
			scoreJob.setOutputValueClass(IntWritable.class);
			
			//设置要处理的文本数据所存放的路径
			FileInputFormat.setInputPaths(scoreJob, new Path(otherArgs[0]));
			FileOutputFormat.setOutputPath(scoreJob, new Path(otherArgs[1]));
			
			//提交job给hadoop集群
			scoreJob.waitForCompletion(true);
	}
}

scoreMapper.java

package score;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;

public class scoreMapper extends Mapper{
	@Override
	protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
		//拿到一行文本内容,转换成String 类型
		String line = value.toString();
		//将这行文本切分成单词
		String[] words=line.split(" ");
		
		//transfer integer
		// chinese 73
		
//		double scor = Double.parseDouble(words[1]);
		int scor = Integer.parseInt(words[1]);
		context.write(new Text(words[0]), new IntWritable(scor));
	}
}

scoreReducer.java

package score;

import java.io.IOException;

import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;

//public class scoreReducer extends Reducer{
public class scoreReducer extends Reducer{
	@Override
	/*
	 *  调用时传递的参数:
	 *  		key:一组kv中的key
	 *  		values:一组kv中所有value的迭代器
	 */
	protected void reduce(Text key, Iterable values,Context context) throws IOException, InterruptedException {
		//定义一个计数器
		int count = 0;
		int sum = 0;
		int mmax=0;
		int temp;
		
		//通过value这个迭代器,遍历这一组kv中所有的value,进行累加
		for(IntWritable value:values){
//			sum += value.get();
//			count++;
			temp = value.get();
			if (mmax < temp) {
				mmax = temp;
			}
		}
//		float avg = (float)1.0*sum/count;
		// 1.0 double

		//输出统计结果
//		context.write(key, new FloatWritable(avg));
		context.write(key, new IntWritable(mmax));
	}
}

------------------------------------------------------------------------------

scoresort.java

package scoresort;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import scoresort.scoresortMapper;
import scoresort.scoresortReducer;
import scoresort.scoresortMapper;
import scoresort.scoresortReducer;
import scoresort.scoresort;
import scoresort.scoresort;

public class scoresort {

	public static void main(String[] args) throws Exception{
		// TODO Auto-generated method stub
		// TODO Auto-generated method stub
				Configuration conf = new Configuration();
				String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
				Job userloginJob = Job.getInstance(conf,"word count");
				
				//重要:指定本job所在的jar包
				userloginJob.setJarByClass(scoresort.class);
				
				//设置wordCountJob所用的mapper逻辑类为哪个类
				userloginJob.setMapperClass(scoresortMapper.class);
				//设置wordCountJob所用的reducer逻辑类为哪个类
				userloginJob.setReducerClass(scoresortReducer.class);
				
				//设置map阶段输出的kv数据类型
				userloginJob.setMapOutputKeyClass(FloatWritable.class);
				userloginJob.setMapOutputValueClass(Text.class);
				
				//设置最终输出的kv数据类型
				userloginJob.setOutputKeyClass(Text.class);
				userloginJob.setOutputValueClass(FloatWritable.class);
				
				//设置要处理的文本数据所存放的路径
				FileInputFormat.setInputPaths(userloginJob, new Path(otherArgs[0]));
				FileOutputFormat.setOutputPath(userloginJob, new Path(otherArgs[1]));
				
				//提交job给hadoop集群
				userloginJob.waitForCompletion(true);
	}
}

scoresortMapper.java

package scoresort;

import java.io.IOException;

import org.apache.hadoop.io.FloatWritable;
//import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;

public class scoresortMapper extends Mapper{
	@Override
	protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
		//拿到一行文本内容,转换成String 类型
		String line = value.toString();
		//将这行文本切分成单词
		String[] words=line.split("\t");
		
//		int logcount = Integer.parseInt(words[1]);
		float logcount = Float.parseFloat(words[1]);

		context.write(new FloatWritable(logcount), new Text(words[0]));
	}
}

scoresortReducer.java

package scoresort;

import java.io.IOException;

import org.apache.hadoop.io.FloatWritable;
//import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;

public class scoresortReducer extends Reducer{
	@Override
	/*
	 * reduce方法提供给reduce task进程来调用
	 * 
	 * reduce task会将shuffle阶段分发过来的大量kv数据对进行聚合,聚合的机制是相同key的kv对聚合为一组
	 * 然后reduce task对每一组聚合kv调用一次我们自定义的reduce方法
	 * 比如:
	 *  hello组会调用一次reduce方法进行处理,tom组也会调用一次reduce方法进行处理
	 *  调用时传递的参数:
	 *  		key:一组kv中的key
	 *  		values:一组kv中所有value的迭代器
	 */
	protected void reduce(FloatWritable key, Iterable values,Context context) throws IOException, InterruptedException {
		//定义一个计数器
//		int count = 0;
		//通过value这个迭代器,遍历这一组kv中所有的value,进行累加
		for(Text value:values){

			context.write(value, key);
		}
		
		//输出这个单词的统计结果
		
	}
}

Here I tar it to a file.

tar -czvf monthlogin.tar.gz monthlogin

Hadoop -- UserLogin Statistical data_第5张图片

End with this picture.

你可能感兴趣的:(hadoop,大数据,分布式,学习方法)