hadoop jython ( windows )
参考 : hadoop window 搭建 后,由于对 py 的语法喜欢 ,一直想 把hadoop,改成jython 的
这次 在 自己电脑上 终于 完成,下面介绍过程:
测试环境:
依然的 windows + cygwin
hadoop 0.18 # C:/cygwin/home/lky/tools/java/hadoop-0.18.3
jython 2.2.1 # C:/jython2.2.1
参考: PythonWordCount
启动 hadoop 并到 hdoop_home 下
结果输出:
cat c:/cygwin/home/lky/tools/java/hadoop-0.18.3/tmp2/part-00000
(http://www.apache.org/). 1
Apache 1
Foundation 1
Software 1
The 1
This 1
by 1
developed 1
includes 1
product 1
software 1
下面重头来了 :(简洁的 jy hdoop 代码)
整理 www.blogjava.net/Good-Game
这次 在 自己电脑上 终于 完成,下面介绍过程:
测试环境:
依然的 windows + cygwin
hadoop 0.18 # C:/cygwin/home/lky/tools/java/hadoop-0.18.3
jython 2.2.1 # C:/jython2.2.1
参考: PythonWordCount
启动 hadoop 并到 hdoop_home 下
# 在云环境中创建 input 目录
$>bin/hadoop dfs -mkdir input
# 在 包 hadoop 的 NOTICE.txt 拷贝到 input 目录下
$>bin/hadoop dfs -copyFromLocal c:/cygwin/home/lky/tools/java/hadoop-0.18.3/NOTICE.txt hdfs:///user/lky/input
$>cd src/examples/python
# 创建 个 脚本 ( jy->jar->hd run ) 一步完成!
# 当然 在 linux 写个脚本比这 好看 呵呵!
$>vim run.bat
# 修改 jythonc 打包 环境 。 +hadoop jar
$>vim C:\jython2.2.1\Tools\jythonc\jythonc.py
# 运行
C:/cygwin/home/lky/tools/java/hadoop-0.18.3/src/examples/python>
run.bat WordCount.py hdfs:///user/lky/input file:///c:/cygwin/home/lky/tools/java/hadoop-0.18.3/tmp2
$>bin/hadoop dfs -mkdir input
# 在 包 hadoop 的 NOTICE.txt 拷贝到 input 目录下
$>bin/hadoop dfs -copyFromLocal c:/cygwin/home/lky/tools/java/hadoop-0.18.3/NOTICE.txt hdfs:///user/lky/input
$>cd src/examples/python
# 创建 个 脚本 ( jy->jar->hd run ) 一步完成!
# 当然 在 linux 写个脚本比这 好看 呵呵!
$>vim run.bat
"
C:\Program Files\Java\jdk1.6.0_11\bin\java.exe
"
-classpath
"
C:\jython2.2.1\jython.jar;%CLASSPATH%
"
org.python.util.jython C:\jython2
.2.1
\Tools\jythonc\jythonc.py -p org.apache.hadoop.examples -d -j wc.jar -c %
1
sh C:\cygwin\home\lky\tools\java\hadoop- 0.18.3 \bin\hadoop jar wc.jar % 2 % 3 % 4 % 5 % 6 % 7 % 8 % 9
sh C:\cygwin\home\lky\tools\java\hadoop- 0.18.3 \bin\hadoop jar wc.jar % 2 % 3 % 4 % 5 % 6 % 7 % 8 % 9
# 修改 jythonc 打包 环境 。 +hadoop jar
$>vim C:\jython2.2.1\Tools\jythonc\jythonc.py
#
Copyright (c) Corporation for National Research Initiatives
# Driver script for jythonc2. See module main.py for details
import sys,os,glob
for fn in glob.glob('c:/cygwin/home/lky/tools/java/hadoop-0.18.3/*.jar') :sys.path.append(fn)
for fn in glob.glob('c:/jython2.2.1/*.jar') :sys.path.append(fn)
for fn in glob.glob('c:/cygwin/home/lky/tools/java/hadoop-0.18.3/lib/*.jar' ) :sys.path.append(fn)
import main
main.main()
import os
os._exit(0)
# Driver script for jythonc2. See module main.py for details
import sys,os,glob
for fn in glob.glob('c:/cygwin/home/lky/tools/java/hadoop-0.18.3/*.jar') :sys.path.append(fn)
for fn in glob.glob('c:/jython2.2.1/*.jar') :sys.path.append(fn)
for fn in glob.glob('c:/cygwin/home/lky/tools/java/hadoop-0.18.3/lib/*.jar' ) :sys.path.append(fn)
import main
main.main()
import os
os._exit(0)
# 运行
C:/cygwin/home/lky/tools/java/hadoop-0.18.3/src/examples/python>
run.bat WordCount.py hdfs:///user/lky/input file:///c:/cygwin/home/lky/tools/java/hadoop-0.18.3/tmp2
结果输出:
cat c:/cygwin/home/lky/tools/java/hadoop-0.18.3/tmp2/part-00000
(http://www.apache.org/). 1
Apache 1
Foundation 1
Software 1
The 1
This 1
by 1
developed 1
includes 1
product 1
software 1
下面重头来了 :(简洁的 jy hdoop 代码)
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from org.apache.hadoop.fs import Path
from org.apache.hadoop.io import *
from org.apache.hadoop.mapred import *
import sys
import getopt
class WordCountMap(Mapper, MapReduceBase):
one = IntWritable( 1 )
def map(self, key, value, output, reporter):
for w in value.toString().split():
output.collect(Text(w), self.one)
class Summer(Reducer, MapReduceBase):
def reduce(self, key, values, output, reporter):
sum = 0
while values.hasNext():
sum += values.next().get()
output.collect(key, IntWritable(sum))
def printUsage(code):
print " wordcount [-m <maps>] [-r <reduces>] <input> <output> "
sys.exit(code)
def main(args):
conf = JobConf(WordCountMap);
conf.setJobName( " wordcount " );
conf.setOutputKeyClass(Text);
conf.setOutputValueClass(IntWritable);
conf.setMapperClass(WordCountMap);
conf.setCombinerClass(Summer);
conf.setReducerClass(Summer);
try :
flags, other_args = getopt.getopt(args[ 1 :], " m:r: " )
except getopt.GetoptError:
printUsage( 1 )
if len(other_args) != 2 :
printUsage( 1 )
for f,v in flags:
if f == " -m " :
conf.setNumMapTasks(int(v))
elif f == " -r " :
conf.setNumReduceTasks(int(v))
conf.setInputPath(Path(other_args[0]))
conf.setOutputPath(Path(other_args[ 1 ]))
JobClient.runJob(conf);
if __name__ == " __main__ " :
main(sys.argv)
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from org.apache.hadoop.fs import Path
from org.apache.hadoop.io import *
from org.apache.hadoop.mapred import *
import sys
import getopt
class WordCountMap(Mapper, MapReduceBase):
one = IntWritable( 1 )
def map(self, key, value, output, reporter):
for w in value.toString().split():
output.collect(Text(w), self.one)
class Summer(Reducer, MapReduceBase):
def reduce(self, key, values, output, reporter):
sum = 0
while values.hasNext():
sum += values.next().get()
output.collect(key, IntWritable(sum))
def printUsage(code):
print " wordcount [-m <maps>] [-r <reduces>] <input> <output> "
sys.exit(code)
def main(args):
conf = JobConf(WordCountMap);
conf.setJobName( " wordcount " );
conf.setOutputKeyClass(Text);
conf.setOutputValueClass(IntWritable);
conf.setMapperClass(WordCountMap);
conf.setCombinerClass(Summer);
conf.setReducerClass(Summer);
try :
flags, other_args = getopt.getopt(args[ 1 :], " m:r: " )
except getopt.GetoptError:
printUsage( 1 )
if len(other_args) != 2 :
printUsage( 1 )
for f,v in flags:
if f == " -m " :
conf.setNumMapTasks(int(v))
elif f == " -r " :
conf.setNumReduceTasks(int(v))
conf.setInputPath(Path(other_args[0]))
conf.setOutputPath(Path(other_args[ 1 ]))
JobClient.runJob(conf);
if __name__ == " __main__ " :
main(sys.argv)
整理 www.blogjava.net/Good-Game