//HRegionServer类的splitRegion方法,该方法封装了手动split的实现逻辑
public void splitRegion(HRegionInfo regionInfo, byte[] splitPoint)
throws NotServingRegionException, IOException {
region.flushcache(); //刷新memstore
region.forceSplit(splitPoint);//设置splitRequest状态为true,以便强制split
//通过线程池完成split,线程池大小通过hbase.regionserver.thread.split配置(默认为1),具体的split逻辑请参阅接下来的SplitRequest的run方法
compactSplitThread.requestSplit(region, region.checkSplit());
}
public byte[] checkSplit() {
// Can't split ROOT/META
if (this.regionInfo.isMetaTable()) {
return null;
}
//如果splitRequest=true或者发现一个大小达到split条件的store(注意:一个store对应一个列族,有多个storefile文件和一个Memstore)则返回true,如果不是强制split(splitRequest=false)且该region下存在一个storefile是reference类型的文件则不能split(reference文件只会存在split期间)
if (!splitPolicy.shouldSplit()) {
return null;
}
//如果在split时指定了rowkey直接返回,否则执行以下逻辑:循环所有的store,找出最大并且不包含reference的store,采用该store的splitPoint作为region的splitPoint,store的splitPoint采用最大的storeFile,的中间rowKey作为splitPoint,因此split实际上并不是完全的等分,可能一个region会比另一个大很多
byte[] ret = splitPolicy.getSplitPoint();
if (ret != null) {
try {
checkRow(ret, "calculated split");
} catch (IOException e) {
LOG.error("Ignoring invalid split", e);
return null;
}
}
return ret;
}
//SplitRequest的run方法
public void run() {
SplitTransaction st = new SplitTransaction(parent, midKey);
if (!st.prepare()) return;
st.execute(this.server, this.server);
}
// SplitTransaction的prepare方法
public boolean prepare() {
//如果region被close或者正在close或者有reference类型的storefile则不split,只有split过程中才会产生reference的storefile文件
if (!this.parent.isSplittable()) return false;
HRegionInfo hri = this.parent.getRegionInfo();
byte [] startKey = hri.getStartKey();
byte [] endKey = hri.getEndKey();
long rid = getDaughterRegionIdTimestamp(hri);
//创建两个新的region对象
this.hri_a = new HRegionInfo(hri.getTableName(), startKey, this.splitrow, false, rid);
this.hri_b = new HRegionInfo(hri.getTableName(), this.splitrow, endKey, false, rid);
return true;
}
// SplitTransaction的excuete方法
public PairOfSameType<HRegion> execute(final Server server,
final RegionServerServices services)
throws IOException {
//关闭被split的region,生成两个新的region:A B
PairOfSameType<HRegion> regions = createDaughters(server, services);
//打开split后生成的A B两个region
openDaughters(server, services, regions.getFirst(), regions.getSecond());
//修改zk中保存的split状态由spliting为split,master获知该状态后会删除zk中的该状态节点
该方法会在循环中检测master是否获得了通知,每次检测会休眠100ms
transitionZKNode(server, services, regions.getFirst(), regions.getSecond());
return regions;
}
PairOfSameType<HRegion> createDaughters(final Server server,
final RegionServerServices services) throws IOException {
//设置split超时时间(默认为3000ms,可通过hbase.regionserver.fileSplitTimeout项配置)
boolean testing = server == null? true:
server.getConfiguration().getBoolean("hbase.testing.nocluster", false);
this.fileSplitTimeout = testing ? this.fileSplitTimeout :
server.getConfiguration().getLong("hbase.regionserver.fileSplitTimeout", this.fileSplitTimeout);
//在zk创建一个临时的节点,保存split状态:
RS_ZK_REGION_CLOSED (2), // RS has finished closing a region
RS_ZK_REGION_OPENING (3), // RS is in process of opening a region
RS_ZK_REGION_OPENED (4), // RS has finished opening a region
RS_ZK_REGION_SPLITTING (5), // RS has started a region split
RS_ZK_REGION_SPLIT (6), // RS split has completed.
RS_ZK_REGION_FAILED_OPEN (7), // RS failed to open a region
if (server != null && server.getZooKeeper() != null) {
try {
createNodeSplitting(server.getZooKeeper(), this.parent.getRegionInfo(), server.getServerName());
} catch (KeeperException e) {
throw new IOException("Failed creating SPLITTING znode on " +this.parent.getRegionNameAsString(), e);
}
}
this.journal.add(JournalEntry.SET_SPLITTING_IN_ZK);
if (server != null && server.getZooKeeper() != null) {
try {
this.znodeVersion = transitionNodeSplitting(server.getZooKeeper(),
this.parent.getRegionInfo(), server.getServerName(), -1);
} catch (KeeperException e) {
throw new IOException("Failed setting SPLITTING znode on " + this.parent.getRegionNameAsString(), e);
}
}
//在hdfs创建一个split目录:{ region dir}/splits
createSplitDir(this.parent.getFilesystem(), this.splitdir);
this.journal.add(JournalEntry.CREATE_SPLIT_DIR);
List<StoreFile> hstoreFilesToSplit = null;
Exception exceptionToThrow = null;
try{
//关闭当前region,不再提供任何服务,在关闭region前会等待该region的所有compact和flush均完成(compact和flush是异步的操作),并且如果发现memstore的大小等于或者大于hbase.hregion.preclose.flush.size配置的值(默认为5M)时,会进行preFlush(close前的flush),这些操作完成后才设置region的closing状态为true,设置closing状态后会分配一个writelock,此时拒绝任何服务了,直到关闭完全,设置closed为true,最后释放写锁
hstoreFilesToSplit = this.parent.close(false);
} catch (Exception e) {
exceptionToThrow = e;
}
//从RS中onlineRegions中删除被split的region,在下线region后的这段时间里client请求该region的数据会失败,client会不断尝试(尝试时间间隔会随着次数的增加而增大,前几次为采用hbase.client.pause配置的值,默认值为1000ms)直到split后形成的A B region上线,此过程比较快,没有复杂的处理,总之split时可能会出现超过1秒的访问。
if (!testing) {
services.removeFromOnlineRegions(this.parent.getRegionInfo().getEncodedName());
}
this.journal.add(JournalEntry.OFFLINED_PARENT);
//通过线程池split storeFile,池大小=storeFile个数,具体的split逻辑请参阅StoreFileSplitter的splitStoreFile()
splitStoreFiles(this.splitdir, hstoreFilesToSplit);
this.journal.add(JournalEntry.STARTED_REGION_A_CREATION);
HRegion a = createDaughterRegion(this.hri_a, this.parent.rsServices);
this.journal.add(JournalEntry.STARTED_REGION_B_CREATION);
HRegion b = createDaughterRegion(this.hri_b, this.parent.rsServices);
this.journal.add(JournalEntry.PONR);
if (!testing) {
//在.META.表中下线split的region,修改.META.表的该region信息,把offline split设置为true,并且添加两列:splitA和splitB
MetaEditor.offlineParentInMeta(server.getCatalogTracker(),
this.parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo());
}
return new PairOfSameType<HRegion>(a, b);
}
//StoreFileSplitter类的splitStoreFile逻辑
private void splitStoreFile(final StoreFile sf, final Path splitdir)
throws IOException {
FileSystem fs = this.parent.getFilesystem();
byte [] family = sf.getFamily();
String encoded = this.hri_a.getEncodedName();
Path storedir = Store.getStoreHomedir(splitdir, encoded, family);
StoreFile.split(fs, storedir, sf, this.splitrow, Range.bottom);
encoded = this.hri_b.getEncodedName();
storedir = Store.getStoreHomedir(splitdir, encoded, family);
StoreFile.split(fs, storedir, sf, this.splitrow, Range.top);
}
// StoreFile的split方法
static Path split(final FileSystem fs,
final Path splitDir,
final StoreFile f,
final byte [] splitRow,
final Reference.Range range)
throws IOException {
// 检查split的row是否是在该store file范围内
if (range == Reference.Range.bottom) {
KeyValue splitKey = KeyValue.createLastOnRow(splitRow);
byte[] firstKey = f.createReader().getFirstKey();
if (f.getReader().getComparator().compare(splitKey.getBuffer(),
splitKey.getKeyOffset(), splitKey.getKeyLength(),
firstKey, 0, firstKey.length) < 0) {
return null;
}
}
else {
KeyValue splitKey = KeyValue.createFirstOnRow(splitRow);
byte[] lastKey = f.createReader().getLastKey();
if (f.getReader().getComparator().compare(splitKey.getBuffer(),
splitKey.getKeyOffset(), splitKey.getKeyLength(),
lastKey, 0, lastKey.length) > 0) {
return null;
}
}
//写reference store,生成的reference stroe的名称格式例如:0959f79e6f7141aba1d81450ac891884.a00556374994fa5b3369e884b45492d3,其中a00556374994fa5b3369e884b45492d3为被split的region的id,0959f79e6f7141aba1d81450ac891884为引用的storeFile的名称,该引用的storefile的内容是用于split的中间rowkey,两个新的region的reference文件的个数均与split region的storeFile文件个数相同,通过hbase的管理界面看到
Reference r = new Reference(splitRow, range);
String parentRegionName = f.getPath().getParent().getParent().getName();
Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
return r.write(fs, p);
}
//打开split生成的两个region
void openDaughters(final Server server,
final RegionServerServices services, HRegion a, HRegion b)
throws IOException {
DaughterOpener aOpener = new DaughterOpener(server, a);
DaughterOpener bOpener = new DaughterOpener(server, b);
aOpener.start();
bOpener.start();
aOpener.join();//等待open完成
bOpener.join();
//如果包含reference的storefile或者storefile数量超过了配置的限制大小时会requestCompact,compact后,reference文件变为实际文件
//增加region信息到.META.表
services.postOpenDeployTasks(b, server.getCatalogTracker(), true);
//添加region对象到RegionServer的onlineRegions列表中,此时可以对外服务
services.addToOnlineRegions(b);
services.postOpenDeployTasks(a, server.getCatalogTracker(), true);
services.addToOnlineRegions(a);
}
}