MapReduce编程

不想放本地了直接扔这里来好了

环境变量

export JAVA_HOME=/opt/jdk1.8.0_241
export HADOOP_HOME=/opt/hadoop-2.7.3
export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH

Ruby

hdfs dfs -rm -r -f /ncdc_out
cat sample.txt | ./max_temp_map.rb
cat sample.txt|./max_temp_map.rb|sort|./max_temp_reduce.rb
hadoop jar /opt/hadoop-2.7.3/share/hadoop/tools/lib/hadoop-streaming-2.7.3.jar -input /ncdc -output /ncdc_out -mapper max_temp_map.rb -reducer max_temp_reduce.rb -file max_temp_map.rb -file max_temp_reduce.rb
hdfs dfs -cat /ncdc_out/part-00000

Python

hdfs dfs -rm -r -f /ncdc_out
cat sample.txt | ./max_temp_map.py
cat sample.txt|./max_temp_map.py|sort|./max_temp_reduce.py
hadoop jar \
/opt/hadoop-2.7.3/share/hadoop/tools/lib/hadoop-streaming-2.7.3.jar \
-input /ncdc \
-output /ncdc_out \
-mapper max_temp_map.py \
-reducer max_temp_reduce.py \
-file max_temp_map.py \
-file max_temp_reduce.py
hadoop jar /opt/hadoop-2.7.3/share/hadoop/tools/lib/hadoop-streaming-2.7.3.jar -input /ncdc -output /ncdc_out -mapper max_temp_map.py -reducer max_temp_reduce.py -file max_temp_map.py -file max_temp_reduce.py
hdfs dfs -cat /ncdc_out/part-00000

Hadoop Pipes

hdfs dfs -rm -r -f /ncdc_out
hdfs dfs -put max_temperature /
hadoop pipes \
-D hadoop.pipes.java.recordreader=ture \
-D hadoop.pipes.java.recordwriter=ture \
-input /ncdc \
-output /ncdc_out \
-program /max_temperature
hadoop pipes -D hadoop.pipes.java.recordreader=ture -D hadoop.pipes.java.recordwriter=ture -input /ncdc -output /ncdc_out -program /max_temperature

你可能感兴趣的:(Ubuntu)