先来定义dbf文件的格式,先来定义dbfheader,
import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.ReadableByteChannel;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
/**
* DBFFileHeader Define.
*
*/
public class DBFFileHeader {
/** MAXFILELENGTH. */
private static final int MAXFILELENGTH = 256;
/** RESERVEDBYTE2. */
private static final int RESERVEDBYTE2 = 14;
/** MAXFIELDNAMELENGTH. */
private static final int MAXFIELDNAMELENGTH = 11;
/** HEADRESERVEDBYTE. */
private static final int HEADRESERVEDBYTE = 20;
/** FFMASK. */
private static final int FFMASK = 0xff;
/** bits of one byte . */
private static final int BYTELENGTH = 8;
/** length of bytes read from file for detected basic elements. */
private static final int LEADOFFILE = 10;
/** YEARCOMPARE dbf file time field limited to <99. */
private static final int YEARCOMPARE = 90;
/** YEAR2000. */
private static final int YEAR2000 = 2000;
/** YEAR1900. */
private static final int YEAR1900 = 1900;
/** CHUNKSIZE use while readdatas. */
private static final int CHUNKSIZE = 1024;
/** Constant for the size of a record. */
private static final int FILE_DESCRIPTOR_SIZE = 32;
/** type of the file, must be 03h. */
private static final byte MAGIC = 0x03;
/** Date the file was last updated. */
private Date date = new Date();
/** recordCnt. */
private int recordCnt = 0;
/** fieldCnt. */
private int fieldCnt = 0;
/**
* set this to a default length of 1, which is enough for one "space".
* character which signifies an empty record
*/
private int recordLength = 1;
/**
* set this to a flagged value so if no fields are added before the write.
* we know to adjust the headerLength to MINIMUM_HEADER
*/
private int headerLength = -1;
/** largestFieldSize. */
private int largestFieldSize = 0;
/**
* collection of header records. lets start out with a zero-length array,
* just in case
*/
private DbaseField[] fields = new DbaseField[0];
/**
* Method for read.
*
* @param buffer
* @param channel
* @throws IOException
*/
private void read(final ByteBuffer buffer, final ReadableByteChannel channel) throws IOException {
if (buffer.remaining() > 0) {
if (channel.read(buffer) == -1) {
throw new EOFException("Premature end of file");
}
}
}
/**
* Returns the field length in bytes.
*
* @param inIndex
* The field index.
* @return The length in bytes.
*/
public int getFieldLength(final int inIndex) {
return fields[inIndex].fieldLength;
}
/**
* Retrieve the location of the decimal point within the field.
*
* @param inIndex
* The field index.
* @return The decimal count.
*/
public int getFieldDecimalCount(final int inIndex) {
return fields[inIndex].getDecimalCount();
}
/**
* Retrieve the Name of the field at the given index.
*
* @param inIndex
* The field index.
* @return The name of the field.
*/
public String getFieldName(final int inIndex) {
return fields[inIndex].fieldName;
}
/**
* Get the character class of the field. Retrieve the type of field at the
* given index
*
* @param inIndex
* The field index.
* @return The dbase character representing this field.
*/
public char getFieldType(final int inIndex) {
return fields[inIndex].fieldType;
}
/**
* Get the date this file was last updated.
*
* @return The Date last modified.
*/
public Date getLastUpdateDate() {
return date;
}
/**
* Return the number of fields in the records.
*
* @return The number of fields in this table.
*/
public int getNumFields() {
return fields.length;
}
/**
* Return the number of records in the file.
*
* @return The number of records in this table.
*/
public int getNumRecords() {
return recordCnt;
}
/**
* Get the length of the records in bytes.
*
* @return The number of bytes per record.
*/
public int getRecordLength() {
return recordLength;
}
/**
* Get the length of the header.
*
* @return The length of the header in bytes.
*/
public int getHeaderLength() {
return headerLength;
}
/**
* Read the header data from the DBF file.
*
* @param channel
* A readable byte channel. If you have an InputStream you need
* to use, you can call java.nio.Channels.getChannel(InputStream
* in).
* @throws IOException
* If errors occur while reading.
*/
public void readHeader(final ReadableByteChannel channel, final boolean useDirectBuffer) throws IOException {
// we'll read in chunks of 1K
ByteBuffer in;
if (useDirectBuffer) {
in = ByteBuffer.allocateDirect(DBFFileHeader.CHUNKSIZE);
} else {
in = ByteBuffer.allocate(DBFFileHeader.CHUNKSIZE);
}
in.order(ByteOrder.LITTLE_ENDIAN);
// only want to read first 10 bytes...
in.limit(LEADOFFILE);
// read and reset in byteBuffer
read(in, channel);
in.position(0);
// type of file.
final byte magic = in.get();
if (magic != MAGIC) {
throw new IOException("Unsupported DBF file Type " + Integer.toHexString(magic));
}
// parse the update date information.
int tempUpdateYear = in.get();
final int tempUpdateMonth = in.get();
final int tempUpdateDay = in.get();
// correct year present
if (tempUpdateYear > YEARCOMPARE) {
tempUpdateYear = tempUpdateYear + YEAR1900;
} else {
tempUpdateYear = tempUpdateYear + YEAR2000;
}
final Calendar c = Calendar.getInstance();
c.set(Calendar.YEAR, tempUpdateYear);
c.set(Calendar.MONTH, tempUpdateMonth - 1);
c.set(Calendar.DATE, tempUpdateDay);
date = c.getTime();
// read the number of records.
recordCnt = in.getInt();
// read the length of the header structure.
// ahhh.. unsigned little-endian shorts
// mask out the byte and or it with shifted 2nd byte
if (in.order().equals(ByteOrder.BIG_ENDIAN)) {
headerLength = ((in.get() & FFMASK) << BYTELENGTH) | (in.get() & FFMASK);
} else {
headerLength = (in.get() & FFMASK) | ((in.get() & FFMASK) << BYTELENGTH);
}
// if the header is bigger than our 1K, reallocate
if (headerLength > in.capacity()) {
if (useDirectBuffer) {
DirectBufferUtil.clean(in);
}
in = ByteBuffer.allocateDirect(headerLength - LEADOFFILE);
}
in.limit(headerLength - LEADOFFILE);
in.position(0);
read(in, channel);
in.position(0);
// read the length of a record
// ahhh.. unsigned little-endian shorts
recordLength = (in.get() & FFMASK) | ((in.get() & FFMASK) << BYTELENGTH);
// skip / skip thesreserved bytes in the header.
in.position(in.position() + HEADRESERVEDBYTE);
// calculate the number of Fields in the header
fieldCnt = (headerLength - FILE_DESCRIPTOR_SIZE - 1) / FILE_DESCRIPTOR_SIZE;
// read all of the header records
final List<Object> lfields = new ArrayList<Object>();
for (int i = 0; i < fieldCnt; i++) {
final DbaseField field = new DbaseField();
// read the field name
final byte[] buffer = new byte[MAXFIELDNAMELENGTH];
in.get(buffer);
String name = new String(buffer);
final int nullPoint = name.indexOf(0);
if (nullPoint != -1) {
name = name.substring(0, nullPoint);
}
field.setFieldName(name.trim());
// read the field type
field.setFieldType((char) in.get());
// read the field data address, offset from the start of the record.
field.setFieldDataAddress(in.getInt());
// read the field length in bytes
int length = in.get();
if (length < 0) {
length = length + MAXFILELENGTH;
}
field.setFieldLength(length);
if (length > largestFieldSize) {
largestFieldSize = length;
}
// read the field decimal count in bytes
field.setDecimalCount(in.get());
// rreservedvededved bytes.
// in.skipBytes(14);
in.position(in.position() + RESERVEDBYTE2);
// some broken shapefiles have 0-length attributes. The reference
// implementation
// (ArcExplorer 2.0, built with MapObjects) just ignores them.
if (field.getFieldLength() > 0) {
lfields.add(field);
}
}
// Last byte is a marker for the end of the field definitions.
// in.skipBytes(1);
in.position(in.position() + 1);
if (useDirectBuffer) {
DirectBufferUtil.clean(in);
}
fields = new DbaseField[lfields.size()];
fields = lfields.toArray(fields);
}
/**
* Get the largest field size of this table.
*
* @return The largt field size iiin bytes.
*/
public int getLargestFieldSize() {
return largestFieldSize;
}
/**
* Class for holding the information assicated with a record.
*/
class DbaseField {
/** fieldName. */
private String fieldName;
/** Field Type (C N L D or M). */
private char fieldType;
/** Field Data Address offset from the start of the record.. */
private int fieldDataAddress;
/** Length of the data in bytes. */
private int fieldLength;
/** Field decimal count in Binary, indicating where the decimal is. */
private int decimalCount;
/**
* Set fieldName.
*
* @param fieldName
* The fieldName to set.
*/
void setFieldName(final String fieldName) {
this.fieldName = fieldName;
}
/**
* Get fieldName.
*
* @return Returns the fieldName.
*/
String getFieldName() {
return fieldName;
}
/**
* Set fieldType.
*
* @param fieldType
* The fieldType to set.
*/
void setFieldType(final char fieldType) {
this.fieldType = fieldType;
}
/**
* Get fieldType.
*
* @return Returns the fieldType.
*/
char getFieldType() {
return fieldType;
}
/**
* Set fieldDataAddress.
*
* @param fieldDataAddress
* The fieldDataAddress to set.
*/
void setFieldDataAddress(final int fieldDataAddress) {
this.fieldDataAddress = fieldDataAddress;
}
/**
* Get fieldDataAddress.
*
* @return Returns the fieldDataAddress.
*/
int getFieldDataAddress() {
return fieldDataAddress;
}
/**
* Set fieldLength.
*
* @param fieldLength
* The fieldLength to set.
*/
void setFieldLength(final int fieldLength) {
this.fieldLength = fieldLength;
}
/**
* Get fieldLength.
*
* @return Returns the fieldLength.
*/
int getFieldLength() {
return fieldLength;
}
/**
* Set decimalCount.
*
* @param decimalCount
* The decimalCount to set.
*/
void setDecimalCount(final int decimalCount) {
this.decimalCount = decimalCount;
}
/**
* Get decimalCount.
*
* @return Returns the decimalCount.
*/
int getDecimalCount() {
return decimalCount;
}
}
里面暗含有一个类DbaseField
下面是DBFFileReader,用FileChannel来读取,这里用channel读取时一般时把文件整个的读进来放到内存中,然后再去处理,如果这里你有别的好的方法,不用直接读取到内存中,请在下面跟贴,谢谢赐教.
public class DBFFileReader {
/** Buffer Size. */
private static final int EKBYTESIZE = 8 * 1024;
/** DBF File Header . */
private DBFFileHeader header;
/** Data Input Buffer. */
private ByteBuffer buffer;
/** File relative channel. */
private ReadableByteChannel channel;
/** use for read datas in dbf. */
private CharBuffer charBuffer;
/** decoder. */
private CharsetDecoder decoder;
/** fieldTypes. */
private char[] fieldTypes;
/** fieldLengths. */
private int[] fieldLengths;
/** ready counts. */
private int cnt = 1;
/** current read row , if not read calls this may be empty. */
private Row row;
/** whether use memoryMap. */
private boolean useMemoryMappedBuffer;
/** randomAccessEnabled. */
// private final boolean randomAccessEnabled;
/** current dataBuffer Offset. */
private int currentOffset = 0;
/**
* Construct for DBFFileReader.java.
*
* @param channel
* dbfFile channel.
* @param useDirectBuffer
* where use useDirectBuffer , if file is not to big to
* handler use false maybe more faster .
* @throws IOException
*/
public DBFFileReader(final ReadableByteChannel channel, final boolean useDirectBuffer) throws IOException {
this.channel = channel;
this.useMemoryMappedBuffer = useDirectBuffer;
// this.randomAccessEnabled = (channel instanceof FileChannel);
header = new DBFFileHeader();
header.readHeader(channel, useDirectBuffer);
init();
}
/**
* Prepare buffer and charbuffer for further read.
*
* @throws IOException
*/
private void init() throws IOException {
// create the ByteBuffer
// if we have a FileChannel, lets map it
if ((channel instanceof FileChannel) && this.useMemoryMappedBuffer) {
final FileChannel fc = (FileChannel) channel;
buffer = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
buffer.position((int) fc.position());
this.currentOffset = 0;
} else {
// Force useMemoryMappedBuffer to false
this.useMemoryMappedBuffer = false;
// Some other type of channel
// start with a 8K buffer, should be more than adequate
int size = EKBYTESIZE;
// if for some reason its not, resize it
if (header.getRecordLength() > size) {
size = header.getRecordLength();
}
buffer = ByteBuffer.allocate(size);
// fill it and reset
fill(buffer, channel);
buffer.flip();
this.currentOffset = header.getHeaderLength();
}
// The entire file is in little endian
buffer.order(ByteOrder.LITTLE_ENDIAN);
// Set up some buffers and lookups for efficiency
fieldTypes = new char[header.getNumFields()];
fieldLengths = new int[header.getNumFields()];
for (int i = 0, ii = header.getNumFields(); i < ii; i++) {
fieldTypes[i] = header.getFieldType(i);
fieldLengths[i] = header.getFieldLength(i);
}
charBuffer = CharBuffer.allocate(header.getRecordLength());
final Charset chars = Charset.forName("ISO-8859-1");
// Charset chars = Charset.forName("gbk");
decoder = chars.newDecoder();
row = new Row();
}
/**
* Get current row data. Call this right after Row.read() is invoke;
*
* @return
* @throws IOException
*/
public Row readRow() throws IOException {
read();
return row;
}
/**
* Method for read.
*
* @throws IOException
*/
private void read() throws IOException {
boolean foundRecord = false;
while (!foundRecord) {
// if data is load in batch , we should adjust buffer
bufferCheck();
charBuffer.position(0);
buffer.limit(buffer.position() + header.getRecordLength());
decoder.decode(buffer, charBuffer, true);
buffer.limit(buffer.capacity());
charBuffer.flip();
foundRecord = true;
}
cnt++;
}
/**
* Adjust buffer and reload data if necessary.
*
* @throws IOException
*/
private void bufferCheck() throws IOException {
// remaining is less than record length
// compact the remaining data and read again
if (!buffer.isReadOnly() && (buffer.remaining() < header.getRecordLength())) {
this.currentOffset += buffer.position();
buffer.compact();
fill(buffer, channel);
buffer.position(0);
}
}
/**
* fill buffer with data in channel.
*
* @param buffer
* @param channel
* @return
* @throws IOException
*/
protected int fill(final ByteBuffer buffer, final ReadableByteChannel channel) throws IOException {
int r = buffer.remaining();
// channel reads return -1 when EOF or other error
// because they a non-blocking reads, 0 is a valid return value!!
while ((buffer.remaining() > 0) && (r != -1)) {
r = channel.read(buffer);
}
if (r == -1) {
buffer.limit(buffer.position());
}
return r;
}
/**
* Close reader.
* @throws IOException
*/
public void close() throws IOException {
if (channel.isOpen()) {
channel.close();
}
if (buffer instanceof MappedByteBuffer) {
DirectBufferUtil.clean(buffer);
}
buffer = null;
channel = null;
charBuffer = null;
decoder = null;
header = null;
row = null;
}
/**
* Method for getHeader.
*
* @return
*/
public DBFFileHeader getHeader() {
return this.header;
}
/**
* Query the reader as to whether there is another record.
*
* @return True if more records exist, false otherwise.
*/
public boolean hasNext() {
return cnt < header.getNumRecords() + 1;
}
/**
* Represent a Row in dbf file.
* @author 2008-3-6 下午01:51:51
*
*/
public final class Row {
/**
* Read a row.
* @param column
* @return
* @throws IOException
*/
public Object read(final int column) throws IOException {
final int offset = getOffset(column);
return readObject(offset, column);
}
/**
* Method for getOffset.
*
* @param column
* @return
*/
private int getOffset(final int column) {
int offset = 1;
for (int i = 0, ii = column; i < ii; i++) {
offset += fieldLengths[i];
}
return offset;
}
/**
* (non-Javadoc).
* @see java.lang.Object#toString()
* @return .
*/
@Override
public String toString() {
final StringBuffer ret = new StringBuffer("DBF Row - ");
for (int i = 0; i < header.getNumFields(); i++) {
ret.append(header.getFieldName(i)).append(": \"");
try {
ret.append(this.read(i));
} catch (final IOException ioe) {
ret.append(ioe.getMessage());
}
ret.append("\" ");
}
return ret.toString();
}
/**
* Read a file object.
* @param fieldOffset
* @param fieldNum
* @return
* @throws IOException
*/
private Object readObject(final int fieldOffset, final int fieldNum) throws IOException {
final char type = fieldTypes[fieldNum];
final int fieldLen = fieldLengths[fieldNum];
Object object = null;
if (fieldLen > 0) {
switch (type) {
// (L)logical (T,t,F,f,Y,y,N,n)
case 'l':
case 'L':
switch (charBuffer.charAt(fieldOffset)) {
case 't':
case 'T':
case 'Y':
case 'y':
object = Boolean.TRUE;
break;
case 'f':
case 'F':
case 'N':
case 'n':
object = Boolean.FALSE;
break;
default:
throw new IOException("Unknown logical value : '" + charBuffer.charAt(fieldOffset) + "'");
}
break;
// (C)character (String)
case 'c':
case 'C':
// oh, this seems like a lot of work to parse strings...but,
// For some reason if zero characters ( (int) char == 0 )
// are
// allowed
// in these strings, they do not compare correctly later on
// down
final int start = fieldOffset;
final int end = fieldOffset + fieldLen - 1;
// set up the new indexes for start and end
charBuffer.position(start).limit(end + 1);
final String s = new String(charBuffer.toString().getBytes("ISO-8859-1"), "gbk");
// this resets the limit...
charBuffer.clear();
object = s;
break;
// (D)date (Date)
case 'd':
case 'D':
try {
String tempString = charBuffer.subSequence(fieldOffset, fieldOffset + 4).toString();
final int tempYear = Integer.parseInt(tempString);
tempString = charBuffer.subSequence(fieldOffset + 4, fieldOffset + 6).toString();
final int tempMonth = Integer.parseInt(tempString) - 1;
tempString = charBuffer.subSequence(fieldOffset + 6, fieldOffset + 8).toString();
final int tempDay = Integer.parseInt(tempString);
final Calendar cal = Calendar.getInstance();
cal.clear();
cal.set(Calendar.YEAR, tempYear);
cal.set(Calendar.MONTH, tempMonth);
cal.set(Calendar.DAY_OF_MONTH, tempDay);
object = cal.getTime();
} catch (final NumberFormatException nfe) {
// todo: use progresslistener, this isn't a grave error.
}
break;
// (F)floating (Double)
case 'n':
case 'N':
try {
if (header.getFieldDecimalCount(fieldNum) == 0) {
object = new Integer(extractNumberString(charBuffer, fieldOffset, fieldLen));
break;
}
// else will fall through to the floating point number
} catch (final NumberFormatException e) {
// Lets try parsing a long instead...
try {
object = new Long(extractNumberString(charBuffer, fieldOffset, fieldLen));
break;
} catch (final NumberFormatException e2) {
}
}
case 'f':
case 'F': // floating point number
try {
object = new Double(extractNumberString(charBuffer, fieldOffset, fieldLen));
} catch (final NumberFormatException e) {
// okay, now whatever we got was truly undigestable.
// Lets go
// with
// a zero Double.
object = new Double(0.0);
}
break;
default:
throw new IOException("Invalid field type : " + type);
}
}
return object;
}
/**
* @param charBuffer2
* TODO
* @param fieldOffset
* @param fieldLen
*/
private String extractNumberString(final CharBuffer charBuffer2, final int fieldOffset, final int fieldLen) {
final String thing = charBuffer2.subSequence(fieldOffset, fieldOffset + fieldLen).toString().trim();
return thing;
}
}
}
读取就用下面的代码来读,相应的exception和相关的引入类,自行引入,
FileChannel channel = (new FileInputStream("D:\\temp\\show2003.dbf")).getChannel();
DBFFileReader dbfreader = new DBFFileReader(channel,true);
int fields = dbfreader.getHeader().getNumFields();
while(dbfreader.hasNext()) {
DBFFileReader.Row row = dbfreader.readRow();
for (int i = 0; i < fields; i++) {
System.out.print("["+row.read(i)+"] ");
}
}
dbfreader.close();
channel.close();