解析InnoDB(.ibd)存储结构

一直都对innodb的文件结构非常感兴趣 , 最近又翻了翻Jeremy Cole关于InnoDB的文章(https://blog.jcole.us/2013/01/03/the-basics-of-innodb-space-file-layout/), 手又痒痒, 用Java写了一个解析innodb数据文件(.ibd)文件的小程序, 可以对.ibd的基本存储单位页(Page)的页头页和页尾进行解析。想做为一个开始, 后续如果有精力且资料充足的情况下, 会把逐步将innodb主要的Page都解析出来。

InnoDB的数据存储使用"space"模型, 通常称为表空间, 对应".ibd"文件。MySQL默认一个表对应一个表空间(innodb_file_per_table=ON), 也就是一个数据库里的表, 会对应一个{datadir}/{database}/{table}.idb文件。

表空间在管理上会分割成一个个页(Page), 默认大小为16KB(16384), 受Innodb_page_size参数控制, Page是InnoDB表空间的最小单位,每个Page又包含38字节的页头(FIL Header)和8字节的页尾(FIL Trailer)。

InnoDB’s data storage model uses “spaces”, often called “tablespaces” in the context of MySQL, and sometimes called “file spaces” in InnoDB itself.
Each space is divided into pages, normally 16 KiB each
Every page has a 38-byte FIL header and 8-byte FIL trailer (FIL is a shortened form of “file”).

页头页尾的格式固定, 非常容易解析。下面直接上解析代码:

package org.littlestar.innodb.parser;

import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;

public class IbdataPaser {
	// 目前只考虑16KiB页大小的情况, 且数据文件小于2GB。
	public static final int PAGE_LENGTH_16K        = 16 * 1024;
	public static final int FIL_TRAILER_OFFSET_16K = 16376;
	public static void main(String[] args) throws IOException {
		//指定需要解析的ibd数据文件。
		String innoDbFileName = "D:\\Data\\mysql\\db1\\data\\sakila\\biz_client_business_info.ibd";
		try (RandomAccessFile innoDbFile = new RandomAccessFile(innoDbFileName, "r")) {
			long fileSize = innoDbFile.length();

			FileChannel binlogFileChannel = innoDbFile.getChannel();
			MappedByteBuffer innoDbMappedBuffer = binlogFileChannel.map(MapMode.READ_ONLY, 0, fileSize);

			int page = 0;
			while (innoDbMappedBuffer.position() < fileSize) {
				System.out.println("\nPage :" + page);
				printPageFilHeaderAndTrailer(innoDbMappedBuffer, page * PAGE_LENGTH_16K);
				page++;
			}
		}
	}
	
	public static byte[] printPageFilHeaderAndTrailer(final MappedByteBuffer mappedBuffer, final int pageStartPos) {
		mappedBuffer.position(pageStartPos);
		int pos = mappedBuffer.position();
		byte[] rawCheckSum = new byte[4];
		mappedBuffer.get(rawCheckSum);
		
		System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Checksum(4)") + " : " + bytesToHexString(rawCheckSum));
		
		pos=mappedBuffer.position();
		byte[] rawOffset = new byte[4];
		mappedBuffer.get(rawOffset);
		System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Offset(Page Number)(4)") + " : " + bytesToHexString(rawOffset));
		
		pos=mappedBuffer.position();
		byte[] rawPreviousPage = new byte[4];
		mappedBuffer.get(rawPreviousPage);
		System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Previous Page(4)") + " : " + bytesToHexString(rawPreviousPage));
		
		pos=mappedBuffer.position();
		byte[] rawNextPage = new byte[4];
		mappedBuffer.get(rawNextPage);
		System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Next Page(4)") + " : "+ bytesToHexString(rawNextPage));
		
		pos=mappedBuffer.position();
		byte[] rawLsnForLastPageModification = new byte[8];
		mappedBuffer.get(rawLsnForLastPageModification);
		System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("LSN for last page modification(8)") + " : "+ bytesToHexString(rawLsnForLastPageModification));
		
		pos = mappedBuffer.position();
		byte[] rawFilePageType = new byte[2];
		mappedBuffer.get(rawFilePageType);
		int pageType = getInt16(rawFilePageType);
		String pageTypeName = getPageType(pageType);
		System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Page Type(4)") + " : " +pageTypeName+ " ("+ bytesToHexString(rawFilePageType) +")");
		
		pos = mappedBuffer.position();
		byte[] rawFlushLsn = new byte[8];
		mappedBuffer.get(rawFlushLsn);
		System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Flush LSN(8)") + " : " + bytesToHexString(rawFlushLsn));
		
		pos = mappedBuffer.position();
		byte[] rawSpaceId = new byte[4];
		mappedBuffer.get(rawSpaceId);
		System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Space ID(4)") + " : " + bytesToHexString(rawSpaceId));
	
		pos = pageStartPos + FIL_TRAILER_OFFSET_16K;
		mappedBuffer.position(pos);
		byte[] rawFilTrailerCheckSum = new byte[4];
		mappedBuffer.get(rawFilTrailerCheckSum);
		System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Trailer/Checksum(4)") + " : " + bytesToHexString(rawFilTrailerCheckSum));
		
		pos = mappedBuffer.position();
		byte[] rawLow32BitsOfLsn = new byte[4];
		mappedBuffer.get(rawLow32BitsOfLsn);
		System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Trailer/Low 32Bits Of LSN(4)") + " : " + bytesToHexString(rawLow32BitsOfLsn));
		return rawFilePageType;
	}
	
	public static String formatFieldName(String fieldName) {
		String fieldNameFormat = "%34s";
		return String.format(fieldNameFormat, fieldName);
	}
	
	public static String getRangeString(long begin, long end) {
		String value = begin + " - " + end;
		return  String.format("%20s", value);
	}
	
	public static int getInt16(final byte[] bytes) {
		int value = 0;
		for (byte b : bytes) {
			value = (value << 8) + (b & 0xFF);
		}
		return value;
	}
	
	public static String bytesToHexString(byte[] src) {
		StringBuilder stringBuilder = new StringBuilder("");
		if (src == null || src.length <= 0) {
			return null;
		}
		for (int i = 0; i < src.length; i++) {
			int v = src[i] & 0xFF;
			String hv = Integer.toHexString(v).toUpperCase();
			if (hv.length() < 2) {
				stringBuilder.append(0);
			}
			stringBuilder.append(hv);
		}
		return stringBuilder.toString();
	}
	
	/**
	 * fil0fil.h: https://dev.mysql.com/doc/dev/mysql-server/latest/fil0fil_8h_source.html
	 */
	public static String getPageType(int type) {
		switch(type) {
		case 17855: return "FIL_PAGE_INDEX";
		case 17854: return "FIL_PAGE_RTREE";
		case 17853: return "FIL_PAGE_SDI";
		case 1    : return "FIL_PAGE_TYPE_UNUSED";
		case 2    : return "FIL_PAGE_UNDO_LOG";
		case 3    : return "FIL_PAGE_INODE";
		case 4    : return "FIL_PAGE_IBUF_FREE_LIST";
		case 0    : return "FIL_PAGE_TYPE_ALLOCATED";
		case 5    : return "FIL_PAGE_IBUF_BITMAP";
		case 6    : return "FIL_PAGE_TYPE_SYS";
		case 7    : return "FIL_PAGE_TYPE_TRX_SYS";
		case 8    : return "FIL_PAGE_TYPE_FSP_HDR";
		case 9    : return "FIL_PAGE_TYPE_XDES";
		case 10   : return "FIL_PAGE_TYPE_BLOB";
		case 11   : return "FIL_PAGE_TYPE_ZBLOB";
		case 12   : return "FIL_PAGE_TYPE_ZBLOB2";
		case 13   : return "FIL_PAGE_TYPE_UNKNOWN";
		case 14   : return "FIL_PAGE_COMPRESSED";
		case 15   : return "FIL_PAGE_ENCRYPTED";
		case 16   : return "FIL_PAGE_COMPRESSED_AND_ENCRYPTED";
		case 17   : return "FIL_PAGE_ENCRYPTED_RTREE";
		case 18   : return "FIL_PAGE_SDI_BLOB";
		case 19   : return "FIL_PAGE_SDI_ZBLOB";
		case 20   : return "FIL_PAGE_TYPE_LEGACY_DBLWR";
		case 21   : return "FIL_PAGE_TYPE_RSEG_ARRAY";
		case 22   : return "FIL_PAGE_TYPE_LOB_INDEX";
		case 23   : return "FIL_PAGE_TYPE_LOB_DATA";
		case 24   : return "FIL_PAGE_TYPE_LOB_FIRST";
		case 25   : return "FIL_PAGE_TYPE_ZLOB_FIRST";
		case 26   : return "FIL_PAGE_TYPE_ZLOB_DATA";
		case 27   : return "FIL_PAGE_TYPE_ZLOB_INDEX";
		case 28   : return "FIL_PAGE_TYPE_ZLOB_FRAG";
		case 29   : return "FIL_PAGE_TYPE_ZLOB_FRAG_ENTRY|FIL_PAGE_TYPE_LAST";
		}
		return Integer.toString(type);
	}
}

程序解析结果如下:


Page :0
               0 - 4                       Checksum(4) : 9A894471
               4 - 8            Offset(Page Number)(4) : 00000000
              8 - 12                  Previous Page(4) : 00013892
             12 - 16                      Next Page(4) : 00000001
             16 - 24 LSN for last page modification(8) : 000000049E16F2A4
             24 - 26                      Page Type(4) : FIL_PAGE_TYPE_FSP_HDR (0008)
             26 - 34                      Flush LSN(8) : 0000000000000000
             34 - 38                       Space ID(4) : 00000558
       16376 - 16380               Trailer/Checksum(4) : 9A894471
       16380 - 16384      Trailer/Low 32Bits Of LSN(4) : 9E16F2A4

Page :1
       16384 - 16388                       Checksum(4) : 5266544B
       16388 - 16392            Offset(Page Number)(4) : 00000001
       16392 - 16396                  Previous Page(4) : 00000000
       16396 - 16400                      Next Page(4) : 00000000
       16400 - 16408 LSN for last page modification(8) : 000000049E16E153
       16408 - 16410                      Page Type(4) : FIL_PAGE_IBUF_BITMAP (0005)
       16410 - 16418                      Flush LSN(8) : 0000000000000000
       16418 - 16422                       Space ID(4) : 00000558
       32760 - 32764               Trailer/Checksum(4) : 5266544B
       32764 - 32768      Trailer/Low 32Bits Of LSN(4) : 9E16E153

Page :2
       32768 - 32772                       Checksum(4) : 972F37EC
       32772 - 32776            Offset(Page Number)(4) : 00000002
       32776 - 32780                  Previous Page(4) : 00000000
       32780 - 32784                      Next Page(4) : 00000000
       32784 - 32792 LSN for last page modification(8) : 000000049E16F2A4
       32792 - 32794                      Page Type(4) : FIL_PAGE_INODE (0003)
       32794 - 32802                      Flush LSN(8) : 0000000000000000
       32802 - 32806                       Space ID(4) : 00000558
       49144 - 49148               Trailer/Checksum(4) : 972F37EC
       49148 - 49152      Trailer/Low 32Bits Of LSN(4) : 9E16F2A4

Page :3
       49152 - 49156                       Checksum(4) : 4515EC0D
       49156 - 49160            Offset(Page Number)(4) : 00000003
       49160 - 49164                  Previous Page(4) : FFFFFFFF
       49164 - 49168                      Next Page(4) : FFFFFFFF
       49168 - 49176 LSN for last page modification(8) : 000000049E1749DB
       49176 - 49178                      Page Type(4) : FIL_PAGE_SDI (45BD)
       49178 - 49186                      Flush LSN(8) : 0000000000000000
       49186 - 49190                       Space ID(4) : 00000558
       65528 - 65532               Trailer/Checksum(4) : 4515EC0D
       65532 - 65536      Trailer/Low 32Bits Of LSN(4) : 9E1749DB

Page :4
       65536 - 65540                       Checksum(4) : 830ED289
       65540 - 65544            Offset(Page Number)(4) : 00000004
       65544 - 65548                  Previous Page(4) : FFFFFFFF
       65548 - 65552                      Next Page(4) : FFFFFFFF
       65552 - 65560 LSN for last page modification(8) : 000000049E16F2A4
       65560 - 65562                      Page Type(4) : FIL_PAGE_INDEX (45BF)
       65562 - 65570                      Flush LSN(8) : 0000000000000000
       65570 - 65574                       Space ID(4) : 00000558
       81912 - 81916               Trailer/Checksum(4) : 830ED289
       81916 - 81920      Trailer/Low 32Bits Of LSN(4) : 9E16F2A4

Page :5
       81920 - 81924                       Checksum(4) : 00000000
       81924 - 81928            Offset(Page Number)(4) : 00000000
       81928 - 81932                  Previous Page(4) : 00000000
       81932 - 81936                      Next Page(4) : 00000000
       81936 - 81944 LSN for last page modification(8) : 0000000000000000
       81944 - 81946                      Page Type(4) : FIL_PAGE_TYPE_ALLOCATED (0000)
       81946 - 81954                      Flush LSN(8) : 0000000000000000
       81954 - 81958                       Space ID(4) : 00000000
       98296 - 98300               Trailer/Checksum(4) : 00000000
       98300 - 98304      Trailer/Low 32Bits Of LSN(4) : 00000000

Page :6
       98304 - 98308                       Checksum(4) : 00000000
       98308 - 98312            Offset(Page Number)(4) : 00000000
       98312 - 98316                  Previous Page(4) : 00000000
       98316 - 98320                      Next Page(4) : 00000000
       98320 - 98328 LSN for last page modification(8) : 0000000000000000
       98328 - 98330                      Page Type(4) : FIL_PAGE_TYPE_ALLOCATED (0000)
       98330 - 98338                      Flush LSN(8) : 0000000000000000
       98338 - 98342                       Space ID(4) : 00000000
     114680 - 114684               Trailer/Checksum(4) : 00000000
     114684 - 114688      Trailer/Low 32Bits Of LSN(4) : 00000000

你可能感兴趣的:(MySQL,mysql,innodb)