package cn.edu.xmu.dm.mpdemo.ioformat; import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.Text; /** * desc: SequenceFileWriter * <code>SequenceFileWriteDemo</code> * * @author chenwq ([email protected]) * @version 1.0 2012/05/19 */ public class SequenceFileWriteDemo { private static final String[] DATA = { "One, two, buckle my shoe", "Three, four, shut the door", "Five, six, pick up sticks", "Seven, eight, lay them straight", "Nine, ten, a big fat hen" }; public static void main(String[] args) throws IOException { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { /** * fs: outputstream * conf: configuration object * key: the key' type * value: the value's type */ writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); // writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), // value.getClass(), CompressionType.BLOCK); for (int i = 0; i < 100; i++) { key.set(100 - i); value.set(DATA[i % DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } } }
package cn.edu.xmu.dm.mpdemo.ioformat; import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Writable; import org.apache.hadoop.util.ReflectionUtils; /** * desc: SequenceFileReader * <code>SequenceFileReadDemo</code> * * @author chenwq ([email protected]) * @version 1.0 2012/05/19 */ public class SequenceFileReadDemo { public static void main(String[] args) throws IOException { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance( reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance( reader.getValueClass(), conf); long position = reader.getPosition(); while (reader.next(key, value)) { String syncSeen = reader.syncSeen() ? "*" : ""; System.out.printf("[%s%s]\t%s\t%s\n", position, syncSeen, key, value); position = reader.getPosition(); // beginning of next record } } finally { IOUtils.closeStream(reader); } } }
使用Block压缩后的大小对比:
root@ubuntu:~# hadoop fs -ls mpdemo/ Found 2 items -rw-r--r-- 3 root supergroup 4788 2012-05-19 00:11 /user/root/mpdemo/seqinput -rw-r--r-- 3 root supergroup 484 2012-05-19 00:17 /user/root/mpdemo/seqinputblock