1.HTable.put
for (Put put : puts) {
validatePut(put);//验证Put有效,主要是判断kv的长度
writeBuffer.add(put);//写入缓存
currentWriteBufferSize += put.heapSize();//计算缓存容量
}
if (autoFlush || currentWriteBufferSize > writeBufferSize) {
flushCommits();//如果自动Flush或者缓存到达阀值,则执行flush
}
2.HTable.flushCommits
try {
connection.processBatchOfPuts(writeBuffer, tableName, pool);//调用HConnection来提交Put,传入了一个线程池,看来是有异步调用的过程
} finally {
// the write buffer was adjusted by processBatchOfPuts
currentWriteBufferSize = 0;
for (Put aPut : writeBuffer) {
currentWriteBufferSize += aPut.heapSize();//currentWriteBufferSize又重新计算了一遍,看来一批提交不一定会全部提交完
}
}
3.HConnectionManager.HConnectionImplementation.processBatch
第一步:按RegionServer把Put分类,以便于将一批Put按RegionServer批量提交
// step 1: break up into regionserver-sized chunks and build the data structs
Map
for (int i = 0; i < workingList.size(); i++) {
Row row = workingList.get(i);
if (row != null) {
HRegionLocation loc = locateRegion(tableName, row.getRow(), true);//定位Put在哪个Region上
HServerAddress address = loc.getServerAddress();//定位Region在哪个RegionServer上
byte[] regionName = loc.getRegionInfo().getRegionName();
MultiAction actions = actionsByServer.get(address);//看该RegionServer上的批量对象创建没有,没有就创建一个
if (actions == null) {
actions = new MultiAction();
actionsByServer.put(address, actions);
}
Action action = new Action(regionName, row, i);//根据Put创建一个响应对象,放到批量操作对象里,什么是响应对象呢,就是Put和返回结果的组合
lastServers[i] = address;
actions.add(regionName, action);
}
}
第二步:往RegionServer上提交请求,各个RegionServer是并发提交的
// step 2: make the requests
Map
new HashMap
for (Entry
futures.put(e.getKey(), pool.submit(createCallable(e.getKey(), e.getValue(), tableName)));
}
第三步,等待各RegionServer返回结果,并准备重试
// step 3: collect the failures and successes and prepare for retry
for (Entry
HServerAddress address = responsePerServer.getKey();
try {
Future
MultiResponse resp = future.get();
if (resp == null) {
// Entire server failed
LOG.debug("Failed all for server: " + address + ", removing from cache");
continue;
}
for (Entry
byte[] regionName = e.getKey();
List
for (Pair
if (regionResult == null) {
// if the first/only record is 'null' the entire region failed.
LOG.debug("Failures for region: " +
Bytes.toStringBinary(regionName) +
", removing from cache");
} else {
// Result might be an Exception, including DNRIOE
results[regionResult.getFirst()] = regionResult.getSecond();
}
}
}
} catch (ExecutionException e) {
LOG.debug("Failed all from " + address, e);
}
}
第四步,识别返回的错误,准备重试
// step 4: identify failures and prep for a retry (if applicable).
// Find failures (i.e. null Result), and add them to the workingList (in
// order), so they can be retried.
retry = false;
workingList.clear();
for (int i = 0; i < results.length; i++) {
// if null (fail) or instanceof Throwable && not instanceof DNRIOE
// then retry that row. else dont.
if (results[i] == null ||
(results[i] instanceof Throwable &&
!(results[i] instanceof DoNotRetryIOException))) {
retry = true;
Row row = list.get(i);
workingList.add(row);
deleteCachedLocation(tableName, row.getRow());
} else {
// add null to workingList, so the order remains consistent with the original list argument.
workingList.add(null);
}
}
由以上四步可以看出,重点在于第二步,继续跟进,看Callable是怎么样call的,有两步,一是创建到RegionServer的连接,二是调用RegionServer上的multi方法,显然这是远程调用的过程。
3.HConnectionManager.HConnectionImplementation.processBatch
return new Callable
public MultiResponse call() throws IOException {
return getRegionServerWithoutRetries(
new ServerCallable
public MultiResponse call() throws IOException {
return server.multi(multi);//第二步:远程调用服务端RegionServer的multi方法,返回结果
}
@Override
public void instantiateServer(boolean reload) throws IOException {
server = connection.getHRegionConnection(address);//第一步:根据RegionServer的地址连上RegionServer
}
}
);
}
};
RegionServer服务端
上面客户端调用过程分析完毕,继续跟RegionServer服务端的处理,入口方法就是HRegionServer.multi
1.HRegionServer.multi
这个方法里有些是关于重试、上锁、结果收集的代码,忽略掉,重要的是两步
第一步:根据RegionName取得对应的Region
HRegion region = getRegion(regionName);
第二步:调用region的put方法实际put数据
OperationStatusCode[] codes =
region.put(putsWithLocks.toArray(new Pair[]{}));
2.HRegion.put
这个方法先检查、上锁、doMiniBatchPut、解锁、判断是否需要flush,重要的是doMiniBatchPut这个方法
long addedSize = doMiniBatchPut(batchOp);
3.HRegion.doMiniBatchPut
这个方法分为上锁、更新时间戳、写WAL、写memstore、解锁;重要的是写WAL和写memstore
// ------------------------------------
// STEP 3. Write to WAL
// ----------------------------------
WALEdit walEdit = new WALEdit();//第一步:创建WAL日志对象放内存里
for (int i = firstIndex; i < lastIndexExclusive; i++) {
// Skip puts that were determined to be invalid during preprocessing
if (batchOp.retCodes[i] != OperationStatusCode.NOT_RUN) continue;
Put p = batchOp.operations[i].getFirst();
if (!p.getWriteToWAL()) continue;
addFamilyMapToWALEdit(p.getFamilyMap(), walEdit);//第二步:写put的内容到WAL日志对象里
}
// Append the edit to WAL
this.log.append(regionInfo, regionInfo.getTableDesc().getName(),
walEdit, now);//第三步:写WAL日志对象到硬盘上
// ------------------------------------
// STEP 4. Write back to memstore
// ----------------------------------
long addedSize = 0;
for (int i = firstIndex; i < lastIndexExclusive; i++) {
if (batchOp.retCodes[i] != OperationStatusCode.NOT_RUN) continue;
Put p = batchOp.operations[i].getFirst();
addedSize += applyFamilyMapToMemstore(p.getFamilyMap());//把put放到memstore里
batchOp.retCodes[i] = OperationStatusCode.SUCCESS;
}