java 读取dbf文件

阅读更多
先来定义dbf文件的格式,先来定义dbfheader,
import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.ReadableByteChannel;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;

/**
 * DBFFileHeader Define.
 *  
 */
public class DBFFileHeader {
	/** MAXFILELENGTH. */
	private static final int MAXFILELENGTH = 256;

	/** RESERVEDBYTE2. */
	private static final int RESERVEDBYTE2 = 14;

	/** MAXFIELDNAMELENGTH. */
	private static final int MAXFIELDNAMELENGTH = 11;

	/** HEADRESERVEDBYTE. */
	private static final int HEADRESERVEDBYTE = 20;

	/** FFMASK. */
	private static final int FFMASK = 0xff;

	/** bits of one byte . */
	private static final int BYTELENGTH = 8;

	/** length of bytes read from file for detected basic elements. */
	private static final int LEADOFFILE = 10;

	/** YEARCOMPARE dbf file time field limited to <99. */
	private static final int YEARCOMPARE = 90;

	/** YEAR2000. */
	private static final int YEAR2000 = 2000;

	/** YEAR1900. */
	private static final int YEAR1900 = 1900;

	/** CHUNKSIZE use while readdatas. */
	private static final int CHUNKSIZE = 1024;

	/** Constant for the size of a record. */
	private static final int FILE_DESCRIPTOR_SIZE = 32;

	/** type of the file, must be 03h. */
	private static final byte MAGIC = 0x03;

	/** Date the file was last updated. */
	private Date date = new Date();

	/** recordCnt. */
	private int recordCnt = 0;

	/** fieldCnt. */
	private int fieldCnt = 0;

	/**
	 * set this to a default length of 1, which is enough for one "space".
	 * character which signifies an empty record
	 */
	private int recordLength = 1;

	/**
	 * set this to a flagged value so if no fields are added before the write.
	 * we know to adjust the headerLength to MINIMUM_HEADER
	 */
	private int headerLength = -1;

	/** largestFieldSize. */
	private int largestFieldSize = 0;

	/**
	 * collection of header records. lets start out with a zero-length array,
	 * just in case
	 */
	private DbaseField[] fields = new DbaseField[0];

	/**
	 * Method for read.
	 * 
	 * @param buffer
	 * @param channel
	 * @throws IOException
	 */
	private void read(final ByteBuffer buffer, final ReadableByteChannel channel) throws IOException {
		if (buffer.remaining() > 0) {
			if (channel.read(buffer) == -1) {
				throw new EOFException("Premature end of file");
			}
		}
	}

	/**
	 * Returns the field length in bytes.
	 * 
	 * @param inIndex
	 *            The field index.
	 * @return The length in bytes.
	 */
	public int getFieldLength(final int inIndex) {
		return fields[inIndex].fieldLength;
	}

	/**
	 * Retrieve the location of the decimal point within the field.
	 * 
	 * @param inIndex
	 *            The field index.
	 * @return The decimal count.
	 */
	public int getFieldDecimalCount(final int inIndex) {
		return fields[inIndex].getDecimalCount();
	}

	/**
	 * Retrieve the Name of the field at the given index.
	 * 
	 * @param inIndex
	 *            The field index.
	 * @return The name of the field.
	 */
	public String getFieldName(final int inIndex) {
		return fields[inIndex].fieldName;
	}

	/**
	 * Get the character class of the field. Retrieve the type of field at the
	 * given index
	 * 
	 * @param inIndex
	 *            The field index.
	 * @return The dbase character representing this field.
	 */
	public char getFieldType(final int inIndex) {
		return fields[inIndex].fieldType;
	}

	/**
	 * Get the date this file was last updated.
	 * 
	 * @return The Date last modified.
	 */
	public Date getLastUpdateDate() {
		return date;
	}

	/**
	 * Return the number of fields in the records.
	 * 
	 * @return The number of fields in this table.
	 */
	public int getNumFields() {
		return fields.length;
	}

	/**
	 * Return the number of records in the file.
	 * 
	 * @return The number of records in this table.
	 */
	public int getNumRecords() {
		return recordCnt;
	}

	/**
	 * Get the length of the records in bytes.
	 * 
	 * @return The number of bytes per record.
	 */
	public int getRecordLength() {
		return recordLength;
	}

	/**
	 * Get the length of the header.
	 * 
	 * @return The length of the header in bytes.
	 */
	public int getHeaderLength() {
		return headerLength;
	}

	/**
	 * Read the header data from the DBF file.
	 * 
	 * @param channel
	 *            A readable byte channel. If you have an InputStream you need
	 *            to use, you can call java.nio.Channels.getChannel(InputStream
	 *            in).
	 * @throws IOException
	 *             If errors occur while reading.
	 */
	public void readHeader(final ReadableByteChannel channel, final boolean useDirectBuffer) throws IOException {
		// we'll read in chunks of 1K
		ByteBuffer in;
		if (useDirectBuffer) {
			in = ByteBuffer.allocateDirect(DBFFileHeader.CHUNKSIZE);
		} else {
			in = ByteBuffer.allocate(DBFFileHeader.CHUNKSIZE);
		}

		in.order(ByteOrder.LITTLE_ENDIAN);
		// only want to read first 10 bytes...
		in.limit(LEADOFFILE);
		// read and reset in byteBuffer
		read(in, channel);
		in.position(0);

		// type of file.
		final byte magic = in.get();
		if (magic != MAGIC) {
			throw new IOException("Unsupported DBF file Type " + Integer.toHexString(magic));
		}

		// parse the update date information.
		int tempUpdateYear = in.get();
		final int tempUpdateMonth = in.get();
		final int tempUpdateDay = in.get();
		// correct year present
		if (tempUpdateYear > YEARCOMPARE) {
			tempUpdateYear = tempUpdateYear + YEAR1900;
		} else {
			tempUpdateYear = tempUpdateYear + YEAR2000;
		}
		final Calendar c = Calendar.getInstance();
		c.set(Calendar.YEAR, tempUpdateYear);
		c.set(Calendar.MONTH, tempUpdateMonth - 1);
		c.set(Calendar.DATE, tempUpdateDay);
		date = c.getTime();

		// read the number of records.
		recordCnt = in.getInt();

		// read the length of the header structure.
		// ahhh.. unsigned little-endian shorts
		// mask out the byte and or it with shifted 2nd byte
		if (in.order().equals(ByteOrder.BIG_ENDIAN)) {
			headerLength = ((in.get() & FFMASK) << BYTELENGTH) | (in.get() & FFMASK);
		} else {
			headerLength = (in.get() & FFMASK) | ((in.get() & FFMASK) << BYTELENGTH);
		}

		// if the header is bigger than our 1K, reallocate
		if (headerLength > in.capacity()) {
			if (useDirectBuffer) {
				DirectBufferUtil.clean(in);
			}
			in = ByteBuffer.allocateDirect(headerLength - LEADOFFILE);
		}
		in.limit(headerLength - LEADOFFILE);
		in.position(0);
		read(in, channel);
		in.position(0);

		// read the length of a record
		// ahhh.. unsigned little-endian shorts
		recordLength = (in.get() & FFMASK) | ((in.get() & FFMASK) << BYTELENGTH);

		// skip / skip thesreserved bytes in the header.
		in.position(in.position() + HEADRESERVEDBYTE);

		// calculate the number of Fields in the header
		fieldCnt = (headerLength - FILE_DESCRIPTOR_SIZE - 1) / FILE_DESCRIPTOR_SIZE;

		// read all of the header records
		final List lfields = new ArrayList();
		for (int i = 0; i < fieldCnt; i++) {
			final DbaseField field = new DbaseField();

			// read the field name
			final byte[] buffer = new byte[MAXFIELDNAMELENGTH];
			in.get(buffer);
			String name = new String(buffer);
			final int nullPoint = name.indexOf(0);
			if (nullPoint != -1) {
				name = name.substring(0, nullPoint);
			}
			field.setFieldName(name.trim());

			// read the field type
			field.setFieldType((char) in.get());

			// read the field data address, offset from the start of the record.
			field.setFieldDataAddress(in.getInt());

			// read the field length in bytes
			int length = in.get();
			if (length < 0) {
				length = length + MAXFILELENGTH;
			}
			field.setFieldLength(length);

			if (length > largestFieldSize) {
				largestFieldSize = length;
			}

			// read the field decimal count in bytes
			field.setDecimalCount(in.get());

			// rreservedvededved bytes.
			// in.skipBytes(14);
			in.position(in.position() + RESERVEDBYTE2);

			// some broken shapefiles have 0-length attributes. The reference
			// implementation
			// (ArcExplorer 2.0, built with MapObjects) just ignores them.
			if (field.getFieldLength() > 0) {
				lfields.add(field);
			}
		}

		// Last byte is a marker for the end of the field definitions.
		// in.skipBytes(1);
		in.position(in.position() + 1);

		if (useDirectBuffer) {
			DirectBufferUtil.clean(in);
		}

		fields = new DbaseField[lfields.size()];
		fields = lfields.toArray(fields);
	}

	/**
	 * Get the largest field size of this table.
	 * 
	 * @return The largt field size iiin bytes.
	 */
	public int getLargestFieldSize() {
		return largestFieldSize;
	}

	/**
	 * Class for holding the information assicated with a record.
	 */
	class DbaseField {

		/** fieldName. */
		private String fieldName;

		/** Field Type (C N L D or M). */
		private char fieldType;

		/** Field Data Address offset from the start of the record.. */
		private int fieldDataAddress;

		/** Length of the data in bytes. */
		private int fieldLength;

		/** Field decimal count in Binary, indicating where the decimal is. */
		private int decimalCount;

		/**
		 * Set fieldName.
		 * 
		 * @param fieldName
		 *            The fieldName to set.
		 */
		void setFieldName(final String fieldName) {
			this.fieldName = fieldName;
		}

		/**
		 * Get fieldName.
		 * 
		 * @return Returns the fieldName.
		 */
		String getFieldName() {
			return fieldName;
		}

		/**
		 * Set fieldType.
		 * 
		 * @param fieldType
		 *            The fieldType to set.
		 */
		void setFieldType(final char fieldType) {
			this.fieldType = fieldType;
		}

		/**
		 * Get fieldType.
		 * 
		 * @return Returns the fieldType.
		 */
		char getFieldType() {
			return fieldType;
		}

		/**
		 * Set fieldDataAddress.
		 * 
		 * @param fieldDataAddress
		 *            The fieldDataAddress to set.
		 */
		void setFieldDataAddress(final int fieldDataAddress) {
			this.fieldDataAddress = fieldDataAddress;
		}

		/**
		 * Get fieldDataAddress.
		 * 
		 * @return Returns the fieldDataAddress.
		 */
		int getFieldDataAddress() {
			return fieldDataAddress;
		}

		/**
		 * Set fieldLength.
		 * 
		 * @param fieldLength
		 *            The fieldLength to set.
		 */
		void setFieldLength(final int fieldLength) {
			this.fieldLength = fieldLength;
		}

		/**
		 * Get fieldLength.
		 * 
		 * @return Returns the fieldLength.
		 */
		int getFieldLength() {
			return fieldLength;
		}

		/**
		 * Set decimalCount.
		 * 
		 * @param decimalCount
		 *            The decimalCount to set.
		 */
		void setDecimalCount(final int decimalCount) {
			this.decimalCount = decimalCount;
		}

		/**
		 * Get decimalCount.
		 * 
		 * @return Returns the decimalCount.
		 */
		int getDecimalCount() {
			return decimalCount;
		}

	} 
 
里面暗含有一个类DbaseField
下面是DBFFileReader,用FileChannel来读取,这里用channel读取时一般时把文件整个的读进来放到内存中,然后再去处理,如果这里你有别的好的方法,不用直接读取到内存中,请在下面跟贴,谢谢赐教.
public class DBFFileReader {
	/** Buffer Size. */
	private static final int EKBYTESIZE = 8 * 1024;

	/** DBF File Header . */
	private DBFFileHeader header;

	/** Data Input Buffer. */
	private ByteBuffer buffer;

	/** File relative channel. */
	private ReadableByteChannel channel;

	/** use for read datas in dbf. */
	private CharBuffer charBuffer;

	/** decoder. */
	private CharsetDecoder decoder;

	/** fieldTypes. */
	private char[] fieldTypes;

	/** fieldLengths. */
	private int[] fieldLengths;

	/** ready counts. */
	private int cnt = 1;

	/** current read row , if not read calls this may be empty. */
	private Row row;

	/** whether use memoryMap. */
	private boolean useMemoryMappedBuffer;

	/** randomAccessEnabled. */
	// private final boolean randomAccessEnabled;
	/** current dataBuffer Offset. */
	private int currentOffset = 0;

	/**
	 * Construct for DBFFileReader.java.
	 * 
	 * @param channel
	 *            dbfFile channel.
	 * @param useDirectBuffer
	 *            where use useDirectBuffer , if file is not to big to
	 *            handler use false maybe more faster .
	 * @throws IOException
	 */
	public DBFFileReader(final ReadableByteChannel channel, final boolean useDirectBuffer) throws IOException {
		this.channel = channel;
		this.useMemoryMappedBuffer = useDirectBuffer;
		// this.randomAccessEnabled = (channel instanceof FileChannel);
		header = new DBFFileHeader();
		header.readHeader(channel, useDirectBuffer);
		init();
	}

	/**
	 * Prepare buffer and charbuffer for further read.
	 * 
	 * @throws IOException
	 */
	private void init() throws IOException {
		// create the ByteBuffer
		// if we have a FileChannel, lets map it
		if ((channel instanceof FileChannel) && this.useMemoryMappedBuffer) {
			final FileChannel fc = (FileChannel) channel;
			buffer = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
			buffer.position((int) fc.position());
			this.currentOffset = 0;
		} else {
			// Force useMemoryMappedBuffer to false
			this.useMemoryMappedBuffer = false;
			// Some other type of channel
			// start with a 8K buffer, should be more than adequate
			int size = EKBYTESIZE;
			// if for some reason its not, resize it
			if (header.getRecordLength() > size) {
				size = header.getRecordLength();
			}
			buffer = ByteBuffer.allocate(size);
			// fill it and reset
			fill(buffer, channel);
			buffer.flip();
			this.currentOffset = header.getHeaderLength();
		}

		// The entire file is in little endian
		buffer.order(ByteOrder.LITTLE_ENDIAN);

		// Set up some buffers and lookups for efficiency
		fieldTypes = new char[header.getNumFields()];
		fieldLengths = new int[header.getNumFields()];
		for (int i = 0, ii = header.getNumFields(); i < ii; i++) {
			fieldTypes[i] = header.getFieldType(i);
			fieldLengths[i] = header.getFieldLength(i);
		}

		charBuffer = CharBuffer.allocate(header.getRecordLength());
		final Charset chars = Charset.forName("ISO-8859-1");
		// Charset chars = Charset.forName("gbk");
		decoder = chars.newDecoder();
		row = new Row();
	}

	/**
	 * Get current row data. Call this right after Row.read() is invoke;
	 * 
	 * @return
	 * @throws IOException
	 */
	public Row readRow() throws IOException {
		read();
		return row;
	}

	/**
	 * Method for read.
	 * 
	 * @throws IOException
	 */
	private void read() throws IOException {
		boolean foundRecord = false;
		while (!foundRecord) {
			// if data is load in batch , we should adjust buffer
			bufferCheck();
			charBuffer.position(0);
			buffer.limit(buffer.position() + header.getRecordLength());
			decoder.decode(buffer, charBuffer, true);
			buffer.limit(buffer.capacity());
			charBuffer.flip();
			foundRecord = true;
		}
		cnt++;
	}

	/**
	 * Adjust buffer and reload data if necessary.
	 * 
	 * @throws IOException
	 */
	private void bufferCheck() throws IOException {
		// remaining is less than record length
		// compact the remaining data and read again
		if (!buffer.isReadOnly() && (buffer.remaining() < header.getRecordLength())) {
			this.currentOffset += buffer.position();
			buffer.compact();
			fill(buffer, channel);
			buffer.position(0);
		}
	}

	/**
	 * fill buffer with data in channel.
	 * 
	 * @param buffer
	 * @param channel
	 * @return
	 * @throws IOException
	 */
	protected int fill(final ByteBuffer buffer, final ReadableByteChannel channel) throws IOException {
		int r = buffer.remaining();
		// channel reads return -1 when EOF or other error
		// because they a non-blocking reads, 0 is a valid return value!!
		while ((buffer.remaining() > 0) && (r != -1)) {
			r = channel.read(buffer);
		}
		if (r == -1) {
			buffer.limit(buffer.position());
		}
		return r;
	}

	/**
	 * Close reader.
	 * @throws IOException
	 */
	public void close() throws IOException {
		if (channel.isOpen()) {
			channel.close();
		}
		if (buffer instanceof MappedByteBuffer) {
			DirectBufferUtil.clean(buffer);
		}

		buffer = null;
		channel = null;
		charBuffer = null;
		decoder = null;
		header = null;
		row = null;
	}

	/**
	 * Method for getHeader.
	 * 
	 * @return
	 */
	public DBFFileHeader getHeader() {
		return this.header;
	}

	/**
	 * Query the reader as to whether there is another record.
	 * 
	 * @return True if more records exist, false otherwise.
	 */
	public boolean hasNext() {
		return cnt < header.getNumRecords() + 1;
	}

	/**
	 * Represent a Row in dbf file.
	 * @author 2008-3-6 下午01:51:51
	 *
	 */
	public final class Row {
		/**
		 * Read a row.
		 * @param column
		 * @return
		 * @throws IOException
		 */
		public Object read(final int column) throws IOException {
			final int offset = getOffset(column);
			return readObject(offset, column);
		}

		/**
		 * Method for getOffset.
		 * 
		 * @param column
		 * @return
		 */
		private int getOffset(final int column) {
			int offset = 1;
			for (int i = 0, ii = column; i < ii; i++) {
				offset += fieldLengths[i];
			}
			return offset;
		}

		/**
		 * (non-Javadoc).	
		 * @see java.lang.Object#toString()	
		 * @return .
		 */
		@Override
		public String toString() {
			final StringBuffer ret = new StringBuffer("DBF Row - ");
			for (int i = 0; i < header.getNumFields(); i++) {
				ret.append(header.getFieldName(i)).append(": \"");
				try {
					ret.append(this.read(i));
				} catch (final IOException ioe) {
					ret.append(ioe.getMessage());
				}
				ret.append("\" ");
			}
			return ret.toString();
		}

		/**
		 * Read a file object.
		 * @param fieldOffset
		 * @param fieldNum
		 * @return
		 * @throws IOException
		 */
		private Object readObject(final int fieldOffset, final int fieldNum) throws IOException {
			final char type = fieldTypes[fieldNum];
			final int fieldLen = fieldLengths[fieldNum];
			Object object = null;
			if (fieldLen > 0) {
				switch (type) {
				// (L)logical (T,t,F,f,Y,y,N,n)
				case 'l':
				case 'L':
					switch (charBuffer.charAt(fieldOffset)) {
					case 't':
					case 'T':
					case 'Y':
					case 'y':
						object = Boolean.TRUE;
						break;
					case 'f':
					case 'F':
					case 'N':
					case 'n':
						object = Boolean.FALSE;
						break;
					default:
						throw new IOException("Unknown logical value : '" + charBuffer.charAt(fieldOffset) + "'");
					}
					break;
				// (C)character (String)
				case 'c':
				case 'C':
					// oh, this seems like a lot of work to parse strings...but,
					// For some reason if zero characters ( (int) char == 0 )
					// are
					// allowed
					// in these strings, they do not compare correctly later on
					// down
					final int start = fieldOffset;
					final int end = fieldOffset + fieldLen - 1;
					// set up the new indexes for start and end
					charBuffer.position(start).limit(end + 1);
					final String s = new String(charBuffer.toString().getBytes("ISO-8859-1"), "gbk");
					// this resets the limit...
					charBuffer.clear();
					object = s;
					break;
				// (D)date (Date)
				case 'd':
				case 'D':
					try {
						String tempString = charBuffer.subSequence(fieldOffset, fieldOffset + 4).toString();
						final int tempYear = Integer.parseInt(tempString);
						tempString = charBuffer.subSequence(fieldOffset + 4, fieldOffset + 6).toString();
						final int tempMonth = Integer.parseInt(tempString) - 1;
						tempString = charBuffer.subSequence(fieldOffset + 6, fieldOffset + 8).toString();
						final int tempDay = Integer.parseInt(tempString);
						final Calendar cal = Calendar.getInstance();
						cal.clear();
						cal.set(Calendar.YEAR, tempYear);
						cal.set(Calendar.MONTH, tempMonth);
						cal.set(Calendar.DAY_OF_MONTH, tempDay);
						object = cal.getTime();
					} catch (final NumberFormatException nfe) {
						// todo: use progresslistener, this isn't a grave error.
					}
					break;
				// (F)floating (Double)
				case 'n':
				case 'N':
					try {
						if (header.getFieldDecimalCount(fieldNum) == 0) {
							object = new Integer(extractNumberString(charBuffer, fieldOffset, fieldLen));
							break;
						}
						// else will fall through to the floating point number
					} catch (final NumberFormatException e) {

						// Lets try parsing a long instead...
						try {
							object = new Long(extractNumberString(charBuffer, fieldOffset, fieldLen));
							break;
						} catch (final NumberFormatException e2) {
							
						}
					}
				case 'f':
				case 'F': // floating point number
					try {

						object = new Double(extractNumberString(charBuffer, fieldOffset, fieldLen));
					} catch (final NumberFormatException e) {
						// okay, now whatever we got was truly undigestable.
						// Lets go
						// with
						// a zero Double.
						object = new Double(0.0);
					}
					break;
				default:
					throw new IOException("Invalid field type : " + type);
				}

			}
			return object;
		}

		/**
		 * @param charBuffer2
		 *            TODO
		 * @param fieldOffset
		 * @param fieldLen
		 */
		private String extractNumberString(final CharBuffer charBuffer2, final int fieldOffset, final int fieldLen) {
			final String thing = charBuffer2.subSequence(fieldOffset, fieldOffset + fieldLen).toString().trim();
			return thing;
		}
	}

}

读取就用下面的代码来读,相应的exception和相关的引入类,自行引入,
FileChannel channel = (new FileInputStream("D:\\temp\\show2003.dbf")).getChannel();
 DBFFileReader dbfreader = new DBFFileReader(channel,true);
 int fields = dbfreader.getHeader().getNumFields();
  while(dbfreader.hasNext()) {
 	 DBFFileReader.Row row = dbfreader.readRow();
 	 for (int i = 0; i < fields; i++) {
 		 System.out.print("["+row.read(i)+"] ");
 	 }
  }
  dbfreader.close();
  channel.close();

你可能感兴趣的:(Java,C,C++,C#,F#)