MapReduce运行模式

MapReduce运行模式

1、本地模式 OR 集群模式

// 设置为local时,运行模式为本地模式
config.set("mapreduce.framework.name", "local");

// 设置为yarn时,运行模式为集群模式
config.set("mapreduce.framework.name", "yarn");

2、数据文件的输入输出路径

// 设置输入输出为hdfs路径
config.set("fs.defaultFS", "hdfs://node01:9000");

// 设置输入输出为本地路径
config.set("fs.defaultFS", "file:///");

提示:集群模式下,文件路径必须是hdfs路径

 3、文件的输出路径为hdfs时,可能出现AccessControlException: Permission denied,报错信息如下

Caused by: org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.security.AccessControlException: Permission denied: user=node01, access=WRITE, inode="":suh:supergroup:rwxr-xr-x 

 解决方案:

  1、在系统环境变量或JVM变量中添加 HADOOP_USER_NAME,值为运行HADOOP上的Linux的用户名。修改后,需重启eclipse(推荐使用的方案)

  2、MapReduce的驱动程序中添加如下设置:

// HADOOP_USER_NAME对应的用户对hdfs有读写权限
conf.set("HADOOP_USER_NAME", "root");

 WordCount完成代码

package com.theone.pureone.mymapreducer;
import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountMR {

	/**
	 * LongWritable:文本行数,从第一行开始
	 * Text:每行的文本数据
	 * Text:Mapper端的输出Key的类型
	 * IntWritable:Mapper端的输出Value的类型
	 * @author Pureone
	 *
	 */
	
	public static class MyMapper extends Mapper {
		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			String[] split = value.toString().split("\\s+");
			for (String str : split) {
				value.set(str);
				context.write(value, new IntWritable(1));
			}
		}
	}

	/**
	 * Text:从Mapper端接收的Key的类型
	 * IntWritable:从Mapper端接受的Value的类型
	 * Text:Reducer输出的Key的类型
	 * IntWritable:Reducer输出的Value的类型
	 * @author Pureone
	 *
	 */
	public static class MyReducer extends Reducer {
		@Override
		protected void reduce(Text key, Iterable value,
				Reducer.Context context)
				throws IOException, InterruptedException {
			Iterator iterator = value.iterator();
			int count = 0;
			while (iterator.hasNext()) {
				IntWritable next = (IntWritable) iterator.next();
				count += next.get();
			}
			IntWritable sum = new IntWritable(count);
			context.write(key, sum);
		}
	}

	/**
	 * 驱动程序
	 * 
	 * @throws IOException
	 * @throws InterruptedException
	 * @throws ClassNotFoundException
	 */

	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration config = new Configuration();
		// 设置输入输出为hdfs路径
		config.set("fs.defaultFS", "hdfs://node01:9000");
		System.setProperty("HADOOP_USER_NAME", "root");
		// 设置运行模式为本地,若设置为yarn,则为集群模式
		config.set("mapreduce.framework.name", "local");
		// config.set("yarn.resourcemanager.hostname", "node01");
		Job job = Job.getInstance();
		// 设置驱动类
		job.setJarByClass(WordCountMR.class);
		// 设置Mapper端输出的Key-Value的类型
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		// 设置Reducer端输出的Key-Value的类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		// 设置Mapper类
		job.setMapperClass(MyMapper.class);
		// 设置Reducer类
		job.setReducerClass(MyReducer.class);
		// 指定该mapreduce程序数据的输入和输出路径
		Path inputPath = new Path("F:\\input");
		Path outputPath = new Path("hdfs://node01:9000/first_path/output");
		FileSystem fileSystem = FileSystem.get(config);
		// 判断输出路径是否存在
		if (fileSystem.exists(outputPath)) {
			fileSystem.delete(outputPath, true);
		}
		//
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);
		// 提交作业
		boolean completion = job.waitForCompletion(true);
		System.exit(completion ? 0 : 1);
	}
}

 

你可能感兴趣的:(hadoop)