一直都对innodb的文件结构非常感兴趣 , 最近又翻了翻Jeremy Cole关于InnoDB的文章(https://blog.jcole.us/2013/01/03/the-basics-of-innodb-space-file-layout/), 手又痒痒, 用Java写了一个解析innodb数据文件(.ibd)文件的小程序, 可以对.ibd的基本存储单位页(Page)的页头页和页尾进行解析。想做为一个开始, 后续如果有精力且资料充足的情况下, 会把逐步将innodb主要的Page都解析出来。
InnoDB的数据存储使用"space"模型, 通常称为表空间, 对应".ibd"文件。MySQL默认一个表对应一个表空间(innodb_file_per_table=ON), 也就是一个数据库里的表, 会对应一个{datadir}/{database}/{table}.idb文件。
表空间在管理上会分割成一个个页(Page), 默认大小为16KB(16384), 受Innodb_page_size参数控制, Page是InnoDB表空间的最小单位,每个Page又包含38字节的页头(FIL Header)和8字节的页尾(FIL Trailer)。
InnoDB’s data storage model uses “spaces”, often called “tablespaces” in the context of MySQL, and sometimes called “file spaces” in InnoDB itself.
Each space is divided into pages, normally 16 KiB each
Every page has a 38-byte FIL header and 8-byte FIL trailer (FIL is a shortened form of “file”).
页头页尾的格式固定, 非常容易解析。下面直接上解析代码:
package org.littlestar.innodb.parser;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
public class IbdataPaser {
// 目前只考虑16KiB页大小的情况, 且数据文件小于2GB。
public static final int PAGE_LENGTH_16K = 16 * 1024;
public static final int FIL_TRAILER_OFFSET_16K = 16376;
public static void main(String[] args) throws IOException {
//指定需要解析的ibd数据文件。
String innoDbFileName = "D:\\Data\\mysql\\db1\\data\\sakila\\biz_client_business_info.ibd";
try (RandomAccessFile innoDbFile = new RandomAccessFile(innoDbFileName, "r")) {
long fileSize = innoDbFile.length();
FileChannel binlogFileChannel = innoDbFile.getChannel();
MappedByteBuffer innoDbMappedBuffer = binlogFileChannel.map(MapMode.READ_ONLY, 0, fileSize);
int page = 0;
while (innoDbMappedBuffer.position() < fileSize) {
System.out.println("\nPage :" + page);
printPageFilHeaderAndTrailer(innoDbMappedBuffer, page * PAGE_LENGTH_16K);
page++;
}
}
}
public static byte[] printPageFilHeaderAndTrailer(final MappedByteBuffer mappedBuffer, final int pageStartPos) {
mappedBuffer.position(pageStartPos);
int pos = mappedBuffer.position();
byte[] rawCheckSum = new byte[4];
mappedBuffer.get(rawCheckSum);
System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Checksum(4)") + " : " + bytesToHexString(rawCheckSum));
pos=mappedBuffer.position();
byte[] rawOffset = new byte[4];
mappedBuffer.get(rawOffset);
System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Offset(Page Number)(4)") + " : " + bytesToHexString(rawOffset));
pos=mappedBuffer.position();
byte[] rawPreviousPage = new byte[4];
mappedBuffer.get(rawPreviousPage);
System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Previous Page(4)") + " : " + bytesToHexString(rawPreviousPage));
pos=mappedBuffer.position();
byte[] rawNextPage = new byte[4];
mappedBuffer.get(rawNextPage);
System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Next Page(4)") + " : "+ bytesToHexString(rawNextPage));
pos=mappedBuffer.position();
byte[] rawLsnForLastPageModification = new byte[8];
mappedBuffer.get(rawLsnForLastPageModification);
System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("LSN for last page modification(8)") + " : "+ bytesToHexString(rawLsnForLastPageModification));
pos = mappedBuffer.position();
byte[] rawFilePageType = new byte[2];
mappedBuffer.get(rawFilePageType);
int pageType = getInt16(rawFilePageType);
String pageTypeName = getPageType(pageType);
System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Page Type(4)") + " : " +pageTypeName+ " ("+ bytesToHexString(rawFilePageType) +")");
pos = mappedBuffer.position();
byte[] rawFlushLsn = new byte[8];
mappedBuffer.get(rawFlushLsn);
System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Flush LSN(8)") + " : " + bytesToHexString(rawFlushLsn));
pos = mappedBuffer.position();
byte[] rawSpaceId = new byte[4];
mappedBuffer.get(rawSpaceId);
System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Space ID(4)") + " : " + bytesToHexString(rawSpaceId));
pos = pageStartPos + FIL_TRAILER_OFFSET_16K;
mappedBuffer.position(pos);
byte[] rawFilTrailerCheckSum = new byte[4];
mappedBuffer.get(rawFilTrailerCheckSum);
System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Trailer/Checksum(4)") + " : " + bytesToHexString(rawFilTrailerCheckSum));
pos = mappedBuffer.position();
byte[] rawLow32BitsOfLsn = new byte[4];
mappedBuffer.get(rawLow32BitsOfLsn);
System.out.println(getRangeString(pos, mappedBuffer.position()) + formatFieldName("Trailer/Low 32Bits Of LSN(4)") + " : " + bytesToHexString(rawLow32BitsOfLsn));
return rawFilePageType;
}
public static String formatFieldName(String fieldName) {
String fieldNameFormat = "%34s";
return String.format(fieldNameFormat, fieldName);
}
public static String getRangeString(long begin, long end) {
String value = begin + " - " + end;
return String.format("%20s", value);
}
public static int getInt16(final byte[] bytes) {
int value = 0;
for (byte b : bytes) {
value = (value << 8) + (b & 0xFF);
}
return value;
}
public static String bytesToHexString(byte[] src) {
StringBuilder stringBuilder = new StringBuilder("");
if (src == null || src.length <= 0) {
return null;
}
for (int i = 0; i < src.length; i++) {
int v = src[i] & 0xFF;
String hv = Integer.toHexString(v).toUpperCase();
if (hv.length() < 2) {
stringBuilder.append(0);
}
stringBuilder.append(hv);
}
return stringBuilder.toString();
}
/**
* fil0fil.h: https://dev.mysql.com/doc/dev/mysql-server/latest/fil0fil_8h_source.html
*/
public static String getPageType(int type) {
switch(type) {
case 17855: return "FIL_PAGE_INDEX";
case 17854: return "FIL_PAGE_RTREE";
case 17853: return "FIL_PAGE_SDI";
case 1 : return "FIL_PAGE_TYPE_UNUSED";
case 2 : return "FIL_PAGE_UNDO_LOG";
case 3 : return "FIL_PAGE_INODE";
case 4 : return "FIL_PAGE_IBUF_FREE_LIST";
case 0 : return "FIL_PAGE_TYPE_ALLOCATED";
case 5 : return "FIL_PAGE_IBUF_BITMAP";
case 6 : return "FIL_PAGE_TYPE_SYS";
case 7 : return "FIL_PAGE_TYPE_TRX_SYS";
case 8 : return "FIL_PAGE_TYPE_FSP_HDR";
case 9 : return "FIL_PAGE_TYPE_XDES";
case 10 : return "FIL_PAGE_TYPE_BLOB";
case 11 : return "FIL_PAGE_TYPE_ZBLOB";
case 12 : return "FIL_PAGE_TYPE_ZBLOB2";
case 13 : return "FIL_PAGE_TYPE_UNKNOWN";
case 14 : return "FIL_PAGE_COMPRESSED";
case 15 : return "FIL_PAGE_ENCRYPTED";
case 16 : return "FIL_PAGE_COMPRESSED_AND_ENCRYPTED";
case 17 : return "FIL_PAGE_ENCRYPTED_RTREE";
case 18 : return "FIL_PAGE_SDI_BLOB";
case 19 : return "FIL_PAGE_SDI_ZBLOB";
case 20 : return "FIL_PAGE_TYPE_LEGACY_DBLWR";
case 21 : return "FIL_PAGE_TYPE_RSEG_ARRAY";
case 22 : return "FIL_PAGE_TYPE_LOB_INDEX";
case 23 : return "FIL_PAGE_TYPE_LOB_DATA";
case 24 : return "FIL_PAGE_TYPE_LOB_FIRST";
case 25 : return "FIL_PAGE_TYPE_ZLOB_FIRST";
case 26 : return "FIL_PAGE_TYPE_ZLOB_DATA";
case 27 : return "FIL_PAGE_TYPE_ZLOB_INDEX";
case 28 : return "FIL_PAGE_TYPE_ZLOB_FRAG";
case 29 : return "FIL_PAGE_TYPE_ZLOB_FRAG_ENTRY|FIL_PAGE_TYPE_LAST";
}
return Integer.toString(type);
}
}
程序解析结果如下:
Page :0
0 - 4 Checksum(4) : 9A894471
4 - 8 Offset(Page Number)(4) : 00000000
8 - 12 Previous Page(4) : 00013892
12 - 16 Next Page(4) : 00000001
16 - 24 LSN for last page modification(8) : 000000049E16F2A4
24 - 26 Page Type(4) : FIL_PAGE_TYPE_FSP_HDR (0008)
26 - 34 Flush LSN(8) : 0000000000000000
34 - 38 Space ID(4) : 00000558
16376 - 16380 Trailer/Checksum(4) : 9A894471
16380 - 16384 Trailer/Low 32Bits Of LSN(4) : 9E16F2A4
Page :1
16384 - 16388 Checksum(4) : 5266544B
16388 - 16392 Offset(Page Number)(4) : 00000001
16392 - 16396 Previous Page(4) : 00000000
16396 - 16400 Next Page(4) : 00000000
16400 - 16408 LSN for last page modification(8) : 000000049E16E153
16408 - 16410 Page Type(4) : FIL_PAGE_IBUF_BITMAP (0005)
16410 - 16418 Flush LSN(8) : 0000000000000000
16418 - 16422 Space ID(4) : 00000558
32760 - 32764 Trailer/Checksum(4) : 5266544B
32764 - 32768 Trailer/Low 32Bits Of LSN(4) : 9E16E153
Page :2
32768 - 32772 Checksum(4) : 972F37EC
32772 - 32776 Offset(Page Number)(4) : 00000002
32776 - 32780 Previous Page(4) : 00000000
32780 - 32784 Next Page(4) : 00000000
32784 - 32792 LSN for last page modification(8) : 000000049E16F2A4
32792 - 32794 Page Type(4) : FIL_PAGE_INODE (0003)
32794 - 32802 Flush LSN(8) : 0000000000000000
32802 - 32806 Space ID(4) : 00000558
49144 - 49148 Trailer/Checksum(4) : 972F37EC
49148 - 49152 Trailer/Low 32Bits Of LSN(4) : 9E16F2A4
Page :3
49152 - 49156 Checksum(4) : 4515EC0D
49156 - 49160 Offset(Page Number)(4) : 00000003
49160 - 49164 Previous Page(4) : FFFFFFFF
49164 - 49168 Next Page(4) : FFFFFFFF
49168 - 49176 LSN for last page modification(8) : 000000049E1749DB
49176 - 49178 Page Type(4) : FIL_PAGE_SDI (45BD)
49178 - 49186 Flush LSN(8) : 0000000000000000
49186 - 49190 Space ID(4) : 00000558
65528 - 65532 Trailer/Checksum(4) : 4515EC0D
65532 - 65536 Trailer/Low 32Bits Of LSN(4) : 9E1749DB
Page :4
65536 - 65540 Checksum(4) : 830ED289
65540 - 65544 Offset(Page Number)(4) : 00000004
65544 - 65548 Previous Page(4) : FFFFFFFF
65548 - 65552 Next Page(4) : FFFFFFFF
65552 - 65560 LSN for last page modification(8) : 000000049E16F2A4
65560 - 65562 Page Type(4) : FIL_PAGE_INDEX (45BF)
65562 - 65570 Flush LSN(8) : 0000000000000000
65570 - 65574 Space ID(4) : 00000558
81912 - 81916 Trailer/Checksum(4) : 830ED289
81916 - 81920 Trailer/Low 32Bits Of LSN(4) : 9E16F2A4
Page :5
81920 - 81924 Checksum(4) : 00000000
81924 - 81928 Offset(Page Number)(4) : 00000000
81928 - 81932 Previous Page(4) : 00000000
81932 - 81936 Next Page(4) : 00000000
81936 - 81944 LSN for last page modification(8) : 0000000000000000
81944 - 81946 Page Type(4) : FIL_PAGE_TYPE_ALLOCATED (0000)
81946 - 81954 Flush LSN(8) : 0000000000000000
81954 - 81958 Space ID(4) : 00000000
98296 - 98300 Trailer/Checksum(4) : 00000000
98300 - 98304 Trailer/Low 32Bits Of LSN(4) : 00000000
Page :6
98304 - 98308 Checksum(4) : 00000000
98308 - 98312 Offset(Page Number)(4) : 00000000
98312 - 98316 Previous Page(4) : 00000000
98316 - 98320 Next Page(4) : 00000000
98320 - 98328 LSN for last page modification(8) : 0000000000000000
98328 - 98330 Page Type(4) : FIL_PAGE_TYPE_ALLOCATED (0000)
98330 - 98338 Flush LSN(8) : 0000000000000000
98338 - 98342 Space ID(4) : 00000000
114680 - 114684 Trailer/Checksum(4) : 00000000
114684 - 114688 Trailer/Low 32Bits Of LSN(4) : 00000000