1版本中的mapreduce既要处理计算,又要管理资源的调度,耦合性非常大,新增了yarn之后,mapreduce只负责计算
scp -r /usr/lcoal/jdk1.8.0_192
scp -r /etc/profile
scp -r /etc/hosts
sytemctl stop firewalld.service
sytemctl disable firewalld.service
source /etc/profile
//所有会话
ssh-keygen -t rsa
//所有会话连点三次回车
ssh-copy-id bigdata1
123456
ssh-copy-id bigdata2
ssh-copy-id bigdata3
ssh-copy-id bigdata4
ssh-copy-id bigdata5
put 路径 文件名
6 分发到其他的主机上
scp -r /usr/local/hadoop-2.8.4 bigdata1:/usr/local
scp -r /etc/profile bigdata1:/etc/profile
export HADOOP_HOME=/usr/local/hadoop-2.8.4
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
Hadoop
//首先启动小弟的journalnode
hadoop-daemon.sh start journalnode
//在其中一个大哥里面输入
hdfs namenode -format
hdfs zkfc -formatZK
//第二台也要格式化
hdfs namenode -format
//看看log中是否有successfully
hadoop-daemon.sh start namenode//在所有的大哥上都要执行
hadoop-daemon.sh start zkfc //启动注册active
//每一个zk上启动一个,都可以看到一个节点上线
hadoop-daemon.sh start datanode
start-yarn.sh
vi /etc/sysconfig/network-scripts/ifcfg-ens33
//cat hadoop-env.sh 在usr/local/hadoop-2.8.4/etc/hadoop下面
export JAVA_HOME=/usr/local/jdk1.8.0_192
start-dfs.sh
stop-dfs.sh
package com.iweb.test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class Sort {
static class SoMapper extends Mapper{
Flowbeen k= new Flowbeen();
Text v =new Text();
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String [] split = value.toString().split("\t");
long sum_up = Long.parseLong(split [split.length-3]);
long sum_down = Long.parseLong(split[split.length-2]);
k.set(sum_up,sum_down);
v.set(split[1]);
context.write(k,v);
}
}
static class SoReduce extends Reducer{
protected void reduce(Flowbeen key, Iterable values, Context context) throws IOException, InterruptedException {
for (Text text:values
) {
context.write(text,key);
}
}
}
public static void main(String[] args) throws Exception{
Configuration conf =new Configuration();
Job job =Job.getInstance(conf);
Path src =null;
Path dst =null;
if(args.length==2){
src =new Path(args[0]);
dst = new Path(args [1]);
}else {
src =new Path("E:\\test\\sort\\in");
dst =new Path("E:\\test\\sort\\out");
conf.set("mapreduce.job.jar","F:\\workSpace\\java\\sort\\target\\sort-1.0-SNAPSHOT-jar-with-dependencies.jar");
}
FileSystem fs = FileSystem.get(conf);
if (fs.exists(dst)){
fs.delete(dst,true);
}
job.setMapperClass(SoMapper.class);
job.setReducerClass(SoReduce.class);
job.setOutputKeyClass(Flowbeen.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job,src);
FileOutputFormat.setOutputPath(job,dst);
System.exit(job.waitForCompletion(true)?0:1);
}
}
//bean
package com.iweb.test;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Flowbeen implements Writable, WritableComparable {
//1 13736230513 192.196.100.1 www.atguigu.com 2481 24681 200
private long up;
private long down;
private long sum;
public Flowbeen() {
super();
}
public Flowbeen(long up, long down) {
super();
this.up = up;
this.down = down;
this.sum = up+down;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(up);
dataOutput.writeLong(down);
dataOutput.writeLong(sum);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.up = dataInput.readLong();
this.down = dataInput.readLong();
this.sum = dataInput.readLong();
}
@Override
public String toString() {
return up +
"\t" + down +
"\t" + sum ;
}
public long getUp() {
return up;
}
public void setUp(long up) {
this.up = up;
}
public long getDown() {
return down;
}
public void setDown(long down) {
this.down = down;
}
public long getSum() {
return sum;
}
public void setSum(long sum) {
this.sum = sum;
}
public void set(long upFlow, long downFlow) {
this.up = upFlow;
this.down = downFlow;
this.sum = upFlow + downFlow;
}
@Override
public int compareTo(Flowbeen o) {
int result;
if(sum>o.getSum()){
result = -1;
}else if (sum
合并成大文件后,shuffle的过程也就结束了,后面进入reducetask的逻辑运算过程(从文件中取出每一个键值对的Group,调用UDF函数(用户自定义的方法))
//自定义一个OutputFormat类
public class FilterOutputFormat extends FileOutputFormat<Text, NullWritable>{
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
// 创建一个RecordWriter
return new FilterRecordWriter(job);
}
}
//编写RecordWriter
public class FilterRecordWriter extends RecordWriter<Text, NullWritable> {
FSDataOutputStream atguiguOut = null;
FSDataOutputStream otherOut = null;
public FilterRecordWriter(TaskAttemptContext job) {
// 1 获取文件系统
FileSystem fs;
try {
fs = FileSystem.get(job.getConfiguration());
// 2 创建输出文件路径
Path atguiguPath = new Path("e:/atguigu.log");
Path otherPath = new Path("e:/other.log");
// 3 创建输出流
atguiguOut = fs.create(atguiguPath);
otherOut = fs.create(otherPath);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void write(Text key, NullWritable value) throws IOException, InterruptedException {
// 判断是否包含“atguigu”输出到不同文件
if (key.toString().contains("atguigu")) {
atguiguOut.write(key.toString().getBytes());
} else {
otherOut.write(key.toString().getBytes());
}
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
// 关闭资源
IOUtils.closeStream(atguiguOut);
IOUtils.closeStream(otherOut); }
}