#在 ~/.bash_profile 文件中进行配置
export HADOOP_HOME=/Users/brycezou/program/hadoop-2.7.1
export PATH=$HADOOP_HOME/bin/:$PATH
注意:Ubuntu系统还要加入下面两行
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
否则会出现如下警告:
……/libhadoop.so.1.0.0 which might have disabled stack guard.
The VM will try to fix the stack guard now.
3)用 hadoop version 检验是否安装成功。
a.txt,内容为:bla bla ha ha
b.txt,内容为:bla wa wa haha ha
5)在终端中运行如下命令(只有一条命令,为了清晰展示参数,本文调整了格式)
hadoop jar
~/hadoop-2.7.1/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar
wordcount
~/test/wc_input
~/test/wc_output
6)在 ~/test/wc_output/ 目录下,生成两个文件,其中文件part-r-00000的内容为
bla 3
ha 3
haha 1
wa 2
这就是词频统计的结果。
export MVN_HOME=/Users/brycezou/program/apache-maven-3.0.3
export PATH=$MVN_HOME/bin:$PATH
2)创建一个空项目
3)添加一个 maven module
4)添加一个 package
5)修改和完善 pom.xml
6)添加 Java class
7)在 File–>Project Structure–>Project Setting 中的 Project 标签下,设置 Project SDK
8)在 File–>Project Structure–>Project Setting 中的 Artifacts 标签下,设置编译打包的功能,具体为:Add–>JAR–>From modules with dependencies–>设置 Main Class,选中 extract to the target JAR–>设置输出路径–>勾选 Build on make–>确定
9)编译和运行
<dependencies>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-commonartifactId>
<version>2.7.1version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-hdfsartifactId>
<version>2.7.1version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-clientartifactId>
<version>2.7.1version>
dependency>
dependencies>
2)添加 wordcount.java 类文件
package brycezou.mapreduce;
/**
* Created by brycezou on 16/11/8.
*/
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class wordcount
{
public static class TextSplitMapper extends Mapper<Object, Text, Text, IntWritable>
{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws
IOException, InterruptedException
{
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens())
{
word.set(itr.nextToken());
context.write(word, one);
}
}
}
public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable>
{
public void reduce(Text key, Iterable values, Context context) throws
IOException, InterruptedException
{
int sum = 0;
for (IntWritable val : values)
{
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception
{
String strInput = "hello, map reduce , nice!";
StringTokenizer itr = new StringTokenizer(strInput);
while (itr.hasMoreTokens())
{
System.out.println(itr.nextToken());
}
if (args.length != 2)
{
System.err.println("Usage: wordcount );
System.exit(-1);
}
Job job = Job.getInstance();
job.setJarByClass(wordcount.class);
job.setMapperClass(TextSplitMapper.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
hadoop jar wordcount_module.jar wordcount wc_input/ wc_output/
在 Mac 上可能会出现错误提示
Exception in thread "main" java.io.IOException: Mkdirs failed to create /var
/folders/_9/d7bt14qd16l7jxkhf7kr2pqm0000gn/T/hadoop-unjar4581432203972016000/META-INF/license
先执行下列指令即可解决该问题
zip -d wordcount_module.jar META-INF/LICENSE
重新运行发现仍存在错误
hadoop jar wordcount_module.jar wordcount wc_input/ wc_output/
错误的详情为
Exception in thread "main" java.lang.ClassNotFoundException: wordcount
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:274)
at org.apache.hadoop.util.RunJar.run(RunJar.java:214)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
解决方案是,给类名加上完整的包名,这一点都是被 Hadoop 自带的 wordcount 例子误导所致
hadoop jar
wordcount_module.jar
brycezou.mapreduce.wordcount
wc_input/
wc_output/
搞定。恭喜你已阅读完此文!