转载请注明出处:http://blog.csdn.net/lastsweetop/article/details/9173061
所有源码在github上,https://github.com/lastsweetop/styhadoop
package com.sweetop.styhadoop; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.util.ReflectionUtils; /** * Created with IntelliJ IDEA. * User: lastsweetop * Date: 13-6-25 * Time: 下午10:09 * To change this template use File | Settings | File Templates. */ public class StreamCompressor { public static void main(String[] args) throws Exception { String codecClassName = args[0]; Class<?> codecClass = Class.forName(codecClassName); Configuration conf = new Configuration(); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); CompressionOutputStream out = codec.createOutputStream(System.out); IOUtils.copyBytes(System.in, out, 4096, false); out.finish(); } }
echo "Hello lastsweetop" | ~/hadoop/bin/hadoop com.sweetop.styhadoop.StreamCompressor org.apache.hadoop.io.compress.GzipCodec | gunzip -使用GzipCodec类来压缩“Hello lastsweetop”,然后再通过gunzip工具解压。
[exec] 13/06/26 20:01:53 INFO util.NativeCodeLoader: Loaded the native-hadoop library [exec] 13/06/26 20:01:53 INFO zlib.ZlibFactory: Successfully loaded & initialized native-zlib library [exec] Hello lastsweetop
package com.sweetop.styhadoop; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URI; /** * Created with IntelliJ IDEA. * User: lastsweetop * Date: 13-6-26 * Time: 下午10:03 * To change this template use File | Settings | File Templates. */ public class FileDecompressor { public static void main(String[] args) throws Exception { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path inputPath = new Path(uri); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(inputPath); if (codec == null) { System.out.println("No codec found for " + uri); System.exit(1); } String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension()); InputStream in = null; OutputStream out = null; try { in = codec.createInputStream(fs.open(inputPath)); out = fs.create(new Path(outputUri)); IOUtils.copyBytes(in,out,conf); } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } } }
[hadoop@namenode native]$pwd /home/hadoop/hadoop/lib/native [hadoop@namenode native]$ls -ls total 8 4 drwxrwxrwx 2 root root 4096 Nov 14 2012 Linux-amd64-64 4 drwxrwxrwx 2 root root 4096 Nov 14 2012 Linux-i386-32如果是其他平台的话,你就需要自己编译了,详细步骤请看这里 http://wiki.apache.org/hadoop/NativeHadoop
if [ -d "${HADOOP_HOME}/build/native" -o -d "${HADOOP_HOME}/lib/native" -o -e "${HADOOP_PREFIX}/lib/libhadoop.a" ]; then if [ -d "$HADOOP_HOME/build/native" ]; then JAVA_LIBRARY_PATH=${HADOOP_HOME}/build/native/${JAVA_PLATFORM}/lib fi if [ -d "${HADOOP_HOME}/lib/native" ]; then if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_HOME}/lib/native/${JAVA_PLATFORM} else JAVA_LIBRARY_PATH=${HADOOP_HOME}/lib/native/${JAVA_PLATFORM} fi fi if [ -e "${HADOOP_PREFIX}/lib/libhadoop.a" ]; then JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/lib fi fihadoop会去查找对应的原生库,并且自动加载,你不需要关心这些设置。但某些时候你不想使用原生库,比如调试一些bug的时候,那么可以通过hadoop.native.lib设置为false来实现。
package com.sweetop.styhadoop; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.compress.CodecPool; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.io.compress.Compressor; import org.apache.hadoop.util.ReflectionUtils; /** * Created with IntelliJ IDEA. * User: lastsweetop * Date: 13-6-27 * Time: 上午11:53 * To change this template use File | Settings | File Templates. */ public class PooledStreamCompressor { public static void main(String[] args) throws Exception { String codecClassName = args[0]; Class<?> codecClass = Class.forName(codecClassName); Configuration conf = new Configuration(); CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); Compressor compressor = null; try { compressor = CodecPool.getCompressor(codec); CompressionOutputStream out = codec.createOutputStream(System.out, compressor); IOUtils.copyBytes(System.in, out, 4096, false); out.finish(); } finally { CodecPool.returnCompressor(compressor); } } }代码比较容易理解,通过CodecPool的getCompressor方法获得Compressor对象,该方法需要传入一个codec,然后Compressor对象在createOutputStream中使用,使用完毕后再通过returnCompressor放回去。
[exec] 13/06/27 12:00:06 INFO util.NativeCodeLoader: Loaded the native-hadoop library [exec] 13/06/27 12:00:06 INFO zlib.ZlibFactory: Successfully loaded & initialized native-zlib library [exec] 13/06/27 12:00:06 INFO compress.CodecPool: Got brand-new compressor [exec] Hello lastsweetop