



◦java.io.InputStream(java.io.Closeable) ◦java.io.FilterInputStream ◦java.io.DataInputStream(implements java.io.DataInput) ◦org.apache.hadoop.fs.FSDataInputStream(implements org.apache.hadoop.fs.Seekable, org.apache.hadoop.fs.PositionedReadable) ◦org.apache.hadoop.fs.HarFileSystem.HarFSDataInputStream



package org.apache.hadoop.fs; import java.io.*; /** Stream that permits seeking. */ public interface Seekable { /** * 从指定文件中的位置pos,对文件流进行前向搜索。 */ void seek(long pos) throws IOException; /** * 返回文件流中当前偏移位置。 */ long getPos() throws IOException; /** * 从targetPos位置搜索文件数据的一个不同拷贝,搜索到则返回true,否则返回false。 */ boolean seekToNewSource(long targetPos) throws IOException; }

Seekable 接口中定义的方法,都是基于文件流的位置进行操作的方法,使得在文件系统中或文件系统之间进行流式操作更加方便。


package org.apache.hadoop.fs; import java.io.*; import org.apache.hadoop.fs.*; public interface PositionedReadable { /** * 读取文件流中最多到length大小的字节,到字节缓冲区buffer中,它是从给定的position位置开始读取的。 * 该读取方式不改变文件的当前偏移位置offset,并且该方法是线程安全的。 */ public int read(long position, byte[] buffer, int offset, int length) throws IOException; /** * 读取文件流中length大小的字节,到字节缓冲区buffer中,它是从给定的position位置开始读取的。 * 该读取方式不改变文件的当前偏移位置offset,并且该方法是线程安全的。 */ public void readFully(long position, byte[] buffer, int offset, int length) throws IOException; /** * 读取文件流中buffer长度的字节,到字节缓冲区buffer中,它是从给定的position位置开始读取的 * 该读取方式不改变文件的当前偏移位置offset,并且该方法是线程安全的。 */ public void readFully(long position, byte[] buffer) throws IOException; }



package org.apache.hadoop.fs; import java.io.*; public class FSDataInputStream extends DataInputStream implements Seekable, PositionedReadable { public FSDataInputStream(InputStream in) throws IOException { super(in); // 调用基类的构造方法,初始化一个基本流类属性InputStream in if( !(in instanceof Seekable) || !(in instanceof PositionedReadable) ) { // 强制保证InputStream in必须实现Seekable与PositionedReadable这两个接口。 throw new IllegalArgumentException( "In is not an instance of Seekable or PositionedReadable"); } } public synchronized void seek(long desired) throws IOException { ((Seekable)in).seek(desired); // 设置从in的desired位置开始搜索输入流流in } public long getPos() throws IOException { return ((Seekable)in).getPos(); } public int read(long position, byte[] buffer, int offset, int length) throws IOException { return ((PositionedReadable)in).read(position, buffer, offset, length); } public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { ((PositionedReadable)in).readFully(position, buffer, offset, length); } public void readFully(long position, byte[] buffer) throws IOException { ((PositionedReadable)in).readFully(position, buffer, 0, buffer.length); } public boolean seekToNewSource(long targetPos) throws IOException { return ((Seekable)in).seekToNewSource(targetPos); } }


另外,在org.apache.hadoop.fs包中还定义了基于RAF(Random Access File)风格的输入流类,可以随机读取该流对象。继承关系如下所示:

◦java.io.InputStream(implements java.io.Closeable) ◦org.apache.hadoop.fs.FSInputStream(implements org.apache.hadoop.fs.Seekable, org.apache.hadoop.fs.PositionedReadable) ◦org.apache.hadoop.fs.FSInputChecker ◦org.apache.hadoop.fs.ChecksumFileSystem.ChecksumFSInputChecker


package org.apache.hadoop.fs; import java.io.*; public abstract class FSInputStream extends InputStream implements Seekable, PositionedReadable { /** * 从给定的偏移位置pos开始搜索,下一次读取就从该位置开始读取。 */ public abstract void seek(long pos) throws IOException; /** * 返回文件的当前前向偏移位置 */ public abstract long getPos() throws IOException; /** * 搜索不同的文件数据的拷贝,如果搜索到则返回true,否则返回false */ public abstract boolean seekToNewSource(long targetPos) throws IOException; public int read(long position, byte[] buffer, int offset, int length) throws IOException { synchronized (this) { long oldPos = getPos(); int nread = -1; try { seek(position); nread = read(buffer, offset, length); } finally { seek(oldPos); } return nread; } } public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { int nread = 0; while (nread < length) { int nbytes = read(position+nread, buffer, offset+nread, length-nread); if (nbytes < 0) { throw new EOFException("End of file reached before reading fully."); } nread += nbytes; } } public void readFully(long position, byte[] buffer) throws IOException { readFully(position, buffer, 0, buffer.length); } }



◦java.io.OutputStream(implements java.io.Closeable, java.io.Flushable) ◦java.io.FilterOutputStream ◦java.io.DataOutputStream ◦org.apache.hadoop.fs.FSDataOutputStream(implements org.apache.hadoop.fs.Syncable)


/** * 该PositionCache类是一个缓冲流类,对输出流的位置进行缓存。 */ private static class PositionCache extends FilterOutputStream { private FileSystem.Statistics statistics; long position; // 缓存中输出流对象out的偏移位置 public PositionCache(OutputStream out, FileSystem.Statistics stats, long pos) throws IOException { super(out); // 初始化从基类继承下来的OutputStream out对象 statistics = stats; position = pos; } public void write(int b) throws IOException { out.write(b); // 向输出流对象out中写入一个字节b position++; // 缓存中输出流的偏移位置加1 if (statistics != null) { statistics.incrementBytesWritten(1); // 更新文件系统的统计数据对象 } } public void write(byte b[], int off, int len) throws IOException { out.write(b, off, len); // position += len; // 更新缓存 if (statistics != null) { statistics.incrementBytesWritten(len); // 更新文件统计数据 } } public long getPos() throws IOException { return position; // 返回输出流中当前待写入位置 } public void close() throws IOException { out.close(); // 关闭输出流 } }



public FSDataOutputStream(OutputStream out, FileSystem.Statistics stats, long startPosition) throws IOException { super(new PositionCache(out, stats, startPosition)); // 缓冲了out流,缓存的数据对象包括stats、startPosition wrappedStream = out; }



@Deprecated public FSDataOutputStream(OutputStream out) throws IOException { this(out, null); } public FSDataOutputStream(OutputStream out, FileSystem.Statistics stats) throws IOException { this(out, stats, 0); }


public long getPos() throws IOException { return ((PositionCache)out).getPos(); } public void close() throws IOException { out.close(); } public OutputStream getWrappedStream() { return wrappedStream; } /** wrappedStream是必须实现Syncable接口的流类,强制同步全部缓冲区 */ public void sync() throws IOException { if (wrappedStream instanceof Syncable) { ((Syncable)wrappedStream).sync(); } }

