Hadoop SequenceFile Writer And Reader

 

package cn.edu.xmu.dm.mpdemo.ioformat;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Text;

/**
 * desc: SequenceFileWriter
 * <code>SequenceFileWriteDemo</code>
 * 
 * @author chenwq ([email protected])
 * @version 1.0 2012/05/19
 */
public class SequenceFileWriteDemo {
	private static final String[] DATA = { "One, two, buckle my shoe",
			"Three, four, shut the door", "Five, six, pick up sticks",
			"Seven, eight, lay them straight", "Nine, ten, a big fat hen" };

	public static void main(String[] args) throws IOException {
		String uri = args[0];
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(uri), conf);
		Path path = new Path(uri);

		IntWritable key = new IntWritable();
		Text value = new Text();
		SequenceFile.Writer writer = null;
		try {
			/**
			 * fs: outputstream
			 * conf: configuration object
			 * key: the key' type
			 * value: the value's type
			 */
			writer = SequenceFile.createWriter(fs, conf, path, key.getClass(),
					value.getClass());
//			writer = SequenceFile.createWriter(fs, conf, path, key.getClass(),
//					value.getClass(), CompressionType.BLOCK);
			for (int i = 0; i < 100; i++) {
				key.set(100 - i);
				value.set(DATA[i % DATA.length]);
				System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key,
						value);
				writer.append(key, value);
			}
		} finally {
			IOUtils.closeStream(writer);
		}
	}
}

 

 

 

package cn.edu.xmu.dm.mpdemo.ioformat;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;

/**
 * desc: SequenceFileReader
 * <code>SequenceFileReadDemo</code>
 * 
 * @author chenwq ([email protected])
 * @version 1.0 2012/05/19
 */
public class SequenceFileReadDemo {
	public static void main(String[] args) throws IOException {
		String uri = args[0];
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(URI.create(uri), conf);
		Path path = new Path(uri);

		SequenceFile.Reader reader = null;
		try {
			reader = new SequenceFile.Reader(fs, path, conf);
			Writable key = (Writable) ReflectionUtils.newInstance(
					reader.getKeyClass(), conf);
			Writable value = (Writable) ReflectionUtils.newInstance(
					reader.getValueClass(), conf);
			long position = reader.getPosition();
			while (reader.next(key, value)) {
				String syncSeen = reader.syncSeen() ? "*" : "";
				System.out.printf("[%s%s]\t%s\t%s\n", position, syncSeen, key,
						value);
				position = reader.getPosition(); // beginning of next record
			}
		} finally {
			IOUtils.closeStream(reader);
		}
	}
}

 

 

 

使用Block压缩后的大小对比:

 

root@ubuntu:~# hadoop fs -ls mpdemo/
Found 2 items
-rw-r--r--   3 root supergroup       4788 2012-05-19 00:11 /user/root/mpdemo/seqinput
-rw-r--r--   3 root supergroup        484 2012-05-19 00:17 /user/root/mpdemo/seqinputblock
 

 

你可能感兴趣的:(sequence)