openTSDB
源码详解之写入数据到tsdb-uid
表messageReceived
public void messageReceived(final ChannelHandlerContext ctx,
final MessageEvent msgevent) {...}
该方法是RpcHandler
类中的。
调用 private void handleHttpQuery(final TSDB tsdb, final Channel chan, final HttpRequest req) {...}
接着调用UniqueIdRpc的execute()方法
接着判断前端的http请求属于哪种类型:assign? uidmeta? tsmeta? rename?
相应代码是
if (endpoint.toLowerCase().equals("assign")) {
this.handleAssign(tsdb, query);
return;
} else if (endpoint.toLowerCase().equals("uidmeta")) {
this.handleUIDMeta(tsdb, query);
return;
} else if (endpoint.toLowerCase().equals("tsmeta")) {
this.handleTSMeta(tsdb, query);
return;
} else if (endpoint.toLowerCase().equals("rename")) {
this.handleRename(tsdb, query);
return;
} else {
throw new BadRequestException(HttpResponseStatus.NOT_IMPLEMENTED,
"Other UID endpoints have not been implemented yet");
}
这里看一个 handleAssign()
方法,如下:
/**
* Assigns UIDs to the given metric, tagk or tagv names if applicable
* 如果可用的话,分配UIDs 为给定的metric,tagk,tagv names。
*
*
* This handler supports GET and POST whereby the GET command can
* parse query strings with the {@code type} as their parameter and a comma
* separated list of values to assign UIDs to.
* 这个handler支持GET以及POST,凭借GET命令能够解析查询字符串使用类型作为他们的参数
* 并且根据值的一个逗号分隔列表分配UIDs。
*
*
* Multiple types and names can be provided in one call. Each name will be
* processed independently and if there's an error (such as an invalid name or
* it is already assigned) the error will be stored in a separate error map
* and other UIDs will be processed.
* 多种类型已经名字能够被提供在一次调用。每个name将会单独被处理,并且如果有错误的话,(诸如
* 一个无效的name或者该name已经被分配),这个Error将会被存储在一个隔离的Error map,并且其它的UIDs
* 将会被处理
*
* @param tsdb The TSDB from the RPC router
* 来自RPC router 的TSDB对象
* @param query The query for this request
* 来自这个请求的查询
*/
private void handleAssign(final TSDB tsdb, final HttpQuery query) {
// only accept GET And POST
if (query.method() != HttpMethod.GET && query.method() != HttpMethod.POST) {
throw new BadRequestException(HttpResponseStatus.METHOD_NOT_ALLOWED,
"Method not allowed", "The HTTP method [" + query.method().getName() +
"] is not permitted for this endpoint");
}
//这个在很多地方都是一样的
final HashMap<String, List<String>> source;
if (query.method() == HttpMethod.POST) {
//为此source 赋值
source = query.serializer().parseUidAssignV1();
} else {
source = new HashMap<String, List<String>>(3);
// cut down on some repetitive code, split the query string values by
// comma and add them to the source hash
//减少一些重复的代码,通过逗号分割字符串值,并且将这些值添加到source hash中
String[] types = {"metric", "tagk", "tagv"};//type 包含的类型有metric ,tagk ,tagv
CustomedMethod.printSuffix(types.length+"");
//types.length 表示的String数组types的实际容量
for (int i = 0; i < types.length; i++) {
//types[0] = metric , types[1] = tagk, types[2] = tagv
final String values = query.getQueryStringParam(types[i]);
if (values != null && !values.isEmpty()) {
//这里使用metric命名欠妥,因为得到的数组值不一定仅仅是metric,也有可能是tagk,tagv等
final String[] metrics = values.split(",");
if (metrics != null && metrics.length > 0) {
source.put(types[i], Arrays.asList(metrics));
}
}
}
}
if (source.size() < 1) {
throw new BadRequestException("Missing values to assign UIDs");
}
//这里又来一个map?
final Map<String, TreeMap<String, String>> response =
new HashMap<String, TreeMap<String, String>>();
int error_count = 0;
//HashMap> source
//对这个操作有点儿不解
for (Map.Entry<String, List<String>> entry : source.entrySet()) {
final TreeMap<String, String> results =
new TreeMap<String, String>();
final TreeMap<String, String> errors =
new TreeMap<String, String>();
for (String name : entry.getValue()) {
try {
//assignUid -> Attempts to assign a UID to a name for the given type
final byte[] uid = tsdb.assignUid(entry.getKey(), name);
results.put(name,
UniqueId.uidToString(uid));
} catch (IllegalArgumentException e) {
errors.put(name, e.getMessage());
error_count++;
}
}
//得到的结果应该类似如下的样子:
// metric TreeMap
// tagk TreeMap
// tagv TreeMap
response.put(entry.getKey(),results);
if (errors.size() > 0) {
response.put(entry.getKey() + "_errors", errors);
}
}
if (error_count < 1) {
query.sendReply(query.serializer().formatUidAssignV1(response));
} else {
query.sendReply(HttpResponseStatus.BAD_REQUEST,
query.serializer().formatUidAssignV1(response));
}
}
上面这个方法中调用的关键方法是:
/**
* Attempts to assign a UID to a name for the given type
* 尝试为给定的类型名分配一个UID
*
* Used by the UniqueIdRpc call to generate IDs for new metrics, tagks or
* tagvs. The name must pass validation and if it's already assigned a UID,
* this method will throw an error with the proper UID. Otherwise if it can
* create the UID, it will be returned
* 被UniqueIdRpc调用,为新的metrics,tagks,tagvs生成一个新的IDs。name必须通过验证,
* 如果已经被分配一个UID,这个方法将会抛出一个异常同时和匹配的UID。否则如果它能创建UID,
* 它将被返回
*
* @param type The type of uid to assign, metric, tagk or tagv
* 需要分配uid的类型:metric,tagk,tagv
* @param name The name of the uid object
* uid 对象的名字
* @return A byte array with the UID if the assignment was successful
* 如果分配是成功的,则会返回UID的字节数组
* @throws IllegalArgumentException if the name is invalid or it already
* exists
* @since 2.0
*/
public byte[] assignUid(final String type, final String name) {
//先检测字符串是否符合标准
Tags.validateString(type, name);
//接着判断type的类型哪种?
if (type.toLowerCase().equals("metric")) {
try {
final byte[] uid = this.metrics.getId(name);
//为什么这里直接抛出一个异常,而不是先判断一下字节数组uid?
//意思应该是:如果getId(name)没有抛出异常,则表明是已经分配了uid的name
//否则会抛出一个异常,这个异常的类型就是NoSuchUniqueName
//可以查看getId(name)方法抛出的异常的确就是NoSuchUniqueName
throw new IllegalArgumentException("Name already exists with UID: " +
UniqueId.uidToString(uid));
} catch (NoSuchUniqueName nsue) {
//如果没有这个name对应的id,那么直接创建一个
return this.metrics.getOrCreateId(name);
}
} else if (type.toLowerCase().equals("tagk")) {
try {
final byte[] uid = this.tag_names.getId(name);
throw new IllegalArgumentException("Name already exists with UID: " +
UniqueId.uidToString(uid));
} catch (NoSuchUniqueName nsue) {
return this.tag_names.getOrCreateId(name);
}
} else if (type.toLowerCase().equals("tagv")) {
try {
final byte[] uid = this.tag_values.getId(name);
throw new IllegalArgumentException("Name already exists with UID: " +
UniqueId.uidToString(uid));
} catch (NoSuchUniqueName nsue) {
return this.tag_values.getOrCreateId(name);
}
} else {
LOG.warn("Unknown type name: " + type);
throw new IllegalArgumentException("Unknown type name");
}
}
其中最为关键的方法是 getOrCreateId()
,如下
/**
* Finds the ID associated with a given name or creates it.
* 寻找或者是创建与给出的名字相应的ID
*
*
* This method is blocking. Its use within OpenTSDB itself
* is discouraged, please use {@link #getOrCreateIdAsync} instead.
*
* 这个方法是blocking(阻塞的)。它的使用在openTSDB中是不被鼓励的,相反请使用getOrCreateIdAsync
*
* The length of the byte array is fixed in advance by the implementation.
* 数组的长度是固定的,由实现提前确定
*
* @param name The name to lookup in the table or to assign an ID to.
* 在表中寻找的,亦或是即将分配ID的name
* @throws HBaseException if there is a problem communicating with HBase.
* 如果和HBase 通信有问题,则会抛出HBaseException
* @throws IllegalStateException if all possible IDs are already assigned.
* 如果所有可能的IDs均匀被分配,则抛出IllegalStateException
* @throws IllegalStateException if the ID found in HBase is encoded on the
* wrong number of bytes.
* 如果在HBase中发现的ID是用错误的字节数编码
*/
public byte[] getOrCreateId(final String name) throws HBaseException {
try {
//异步调用,这里是先寻找name所对应的id是否存在,如果不存在的话,就需要分配一个id
return getIdAsync(name).joinUninterruptibly();
} catch (NoSuchUniqueName e) {
//默认情况下,这个uidFilter是关闭的。所以下面这个操作是不会被执行的
if (tsdb != null && tsdb.getUidFilter() != null && tsdb.getUidFilter().fillterUIDAssignments()) {
try {
if (!tsdb.getUidFilter()
.allowUIDAssignment(type, name, null, null)
.join()) {
rejected_assignments++;
throw new FailedToAssignUniqueIdException(new String(kind), name, 0,
"Blocked by UID filter.");
}
} catch (FailedToAssignUniqueIdException e1) {
throw e1;
} catch (InterruptedException e1) {
LOG.error("Interrupted", e1);
Thread.currentThread().interrupt();
} catch (Exception e1) {
throw new RuntimeException("Should never be here", e1);
}
}
Deferred<byte[]> assignment = null; //是一个Deferred对象
boolean pending = false;
//进行同步操作,对pending_assignments这个对象
synchronized (pending_assignments) {
assignment = pending_assignments.get(name);
if (assignment == null) {
// to prevent UID leaks that can be caused when multiple time
// series for the same metric or tags arrive, we need to write a
// deferred to the pending map as quickly as possible. Then we can
// start the assignment process after we've stashed the deferred
// and released the lock
/*在有着相同的metric和tags的多个时间序列到达时,为了阻止UID泄漏,我们需要创建一个deferred对象去尽可能快地
挂起地图。在我们已经存储deferred对象以及释放锁之后,我们就能够开始进程去分配。*/
assignment = new Deferred<byte[]>();
pending_assignments.put(name, assignment);
} else {
pending = true;
}
}
if (pending) {//接下来就等待分配UID了
LOG.info("Already waiting for UID assignment: " + name);
try {
return assignment.joinUninterruptibly();
} catch (Exception e1) {
throw new RuntimeException("Should never be here", e1);
}
}
// start the assignment dance after stashing the deferred =>//stash:存放
//在存放了deferred对象自后,开始分配的工作
byte[] uid = null;
try {
uid = new UniqueIdAllocator(name, assignment).tryAllocate().joinUninterruptibly();
} catch (RuntimeException e1) {
throw e1;
} catch (Exception e1) {
throw new RuntimeException("Should never be here", e);
} finally {
synchronized (pending_assignments) {
if (pending_assignments.remove(name) != null) {
LOG.info("Completed pending assignment for: " + name);
}
}
}
return uid;
} catch (Exception e) {
throw new RuntimeException("Should never be here", e);
}
}
而这个方法其中调用 uid = new UniqueIdAllocator(name, assignment).tryAllocate().joinUninterruptibly();
这个UniqueIdAllocator
类如下:
private final class UniqueIdAllocator implements Callback
类UniqueIdAllocator
是 UniqueId
的一个内部类。同时该类实现了Callback
接口。作为一个回调函数
/**
* Implements the process to allocate a new UID.
* 实现分配一个新的UID的过程
*
* This callback is re-used multiple times in a four step process:
* 1. Allocate a new UID via atomic increment.
* 2. Create the reverse mapping (ID to name).
* 3. Create the forward mapping (name to ID).
* 4. Return the new UID to the caller.
* 这个过程会被多次重复使用在如下四个步骤中:
* 01.通过原子增加的方式,分配一个新的UID
* 02.创建一个逆映射(ID -> name)
* 03.创建一个顺映射(name -> ID)
* 04.给调用者返回一个新的UID
*
*/
private final class UniqueIdAllocator implements Callback<Object, Object> {
private final String name; // What we're trying to allocate an ID for. -> 我们需要为哪个name分配一个ID
private final Deferred<byte[]> assignment; // deferred to call back -> 需要回调的deferred
private short attempt = randomize_id ? // Give up when zero.
MAX_ATTEMPTS_ASSIGN_RANDOM_ID : MAX_ATTEMPTS_ASSIGN_ID;
private HBaseException hbe = null; // Last exception caught.
// TODO(manolama) - right now if we retry the assignment it will create a
// callback chain MAX_ATTEMPTS_* long and call the ErrBack that many times.
// This can be cleaned up a fair amount but it may require changing the
// public behavior a bit. For now, the flag will prevent multiple attempts
// to execute the callback.
private boolean called = false; // whether we called the deferred or not -> 我们是否调用了deferred?
private long id = -1; // The ID we'll grab with an atomic increment.
private byte row[]; // The same ID, as a byte array.
private static final byte ALLOCATE_UID = 0;
private static final byte CREATE_REVERSE_MAPPING = 1;
private static final byte CREATE_FORWARD_MAPPING = 2;
private static final byte DONE = 3;
//state表示的是当前这个线程需要执行的操作是什么。在这里就是分配UID
private byte state = ALLOCATE_UID; // Current state of the process.
UniqueIdAllocator(final String name, final Deferred<byte[]> assignment) {
this.name = name;
this.assignment = assignment;
}
Deferred<byte[]> tryAllocate() {
attempt--;
state = ALLOCATE_UID;
call(null);
return assignment;
}
@SuppressWarnings("unchecked")
public Object call(final Object arg) {
if (attempt == 0) {
if (hbe == null && !randomize_id) {
throw new IllegalStateException("Should never happen!");
}
LOG.error("Failed to assign an ID for kind='" + kind()
+ "' name='" + name + "'", hbe);
if (hbe == null) {
throw new FailedToAssignUniqueIdException(kind(), name,
MAX_ATTEMPTS_ASSIGN_RANDOM_ID);
}
throw hbe;
}
if (arg instanceof Exception) {
final String msg = ("Failed attempt #" + (randomize_id
? (MAX_ATTEMPTS_ASSIGN_RANDOM_ID - attempt)
: (MAX_ATTEMPTS_ASSIGN_ID - attempt))
+ " to assign an UID for " + kind() + ':' + name
+ " at step #" + state);
if (arg instanceof HBaseException) {
LOG.error(msg, (Exception) arg);
hbe = (HBaseException) arg;
attempt--;
state = ALLOCATE_UID;; // Retry from the beginning.
} else {
LOG.error("WTF? Unexpected exception! " + msg, (Exception) arg);
return arg; // Unexpected exception, let it bubble up.
}
}
class ErrBack implements Callback<Object, Exception> {
public Object call(final Exception e) throws Exception {
if (!called) {
LOG.warn("Failed pending assignment for: " + name, e);
assignment.callback(e);
called = true;
}
return assignment;
}
}
final Deferred d;
switch (state) {
case ALLOCATE_UID:
d = allocateUid();
break;
case CREATE_REVERSE_MAPPING:
d = createReverseMapping(arg);
break;
case CREATE_FORWARD_MAPPING:
d = createForwardMapping(arg);
break;
case DONE:
return done(arg);
default:
throw new AssertionError("Should never be here!");
}
return d.addBoth(this).addErrback(new ErrBack());
}
其中会调用 allocateUid()
方法,如下:
/** Generates either a random or a serial ID. If random, we need to
* make sure that there isn't a UID collision.
* 产生一个随机的或者是有序的ID。如果是随机的,我们需要确保没有一个UID会发生碰撞
*/
private Deferred<Long> allocateUid() {
LOG.info("Creating " + (randomize_id ? "a random " : "an ") +
"ID for kind='" + kind() + "' name='" + name + '\'');
//这种修改state是为了什么?
state = CREATE_REVERSE_MAPPING;
if (randomize_id) {//如果是随机生成一个UID
return Deferred.fromResult(RandomUniqueId.getRandomUID());
} else {//否则生成一个自增值
//atomicIncrement: Atomically and durably increments a value in HBase.
//在Hbase中,原子增加并且持久化的增加一个值
//直接发起一个rpc过程,往hbase表中写入数据
return client.atomicIncrement(new AtomicIncrementRequest(table,
MAXID_ROW, ID_FAMILY, kind));
}
}
/**
* Atomically and durably increments a value in HBase.
*
* This is equivalent to
* {@link #atomicIncrement(AtomicIncrementRequest, boolean) atomicIncrement}
* {@code (request, true)}
* @param request The increment request.
* @return The deferred {@code long} value that results from the increment.
*/
public Deferred<Long> atomicIncrement(final AtomicIncrementRequest request) {
num_atomic_increments.increment();
return sendRpcToRegion(request).addCallbacks(icv_done,
Callback.PASSTHROUGH);
}
而sendRpcToRegion()
方法如下:
/**
* Sends an RPC targeted at a particular region to the right RegionServer.
* 将针对特定区域的RPC发送到正确的区域服务器
* This method is package-private so that the low-level {@link RegionClient}
* can retry RPCs when handling a {@link NotServingRegionException}.
这个方法是包私有,所以只有低级的RegionClient能够重试RPCs,当处理一个NotServingRegionException
* @param request The RPC to send. This RPC must specify a single specific table and row key.
需要发送的RPC。这个RPC必须指定表和行键
* @return The deferred result of the RPC (whatever object or exception was
* de-serialized back from the network).
RPC的deferred结果(无论什么对象或者异常被反序列化从network中)
*/
Deferred<Object> sendRpcToRegion(final HBaseRpc request) {
//cannotRetryRequest() : Checks whether or not an RPC can be retried once more.
if (cannotRetryRequest(request)) {
//Returns a {@link Deferred} containing an exception when an RPC couldn't succeed after too many attempts.
return tooManyAttempts(request, null);
}
request.attempt++;
//获取request的table,row_key
final byte[] table = request.table;
final byte[] key = request.key;
//获取table的region信息
//getRegion(): Searches in the regions cache for the region hosting the given row.
final RegionInfo region = getRegion(table, key);
final class RetryRpc implements Callback<Deferred<Object>, Object> {
public Deferred<Object> call(final Object arg) {
if (arg instanceof NonRecoverableException) {
// No point in retrying here, so fail the RPC.
HBaseException e = (NonRecoverableException) arg;
if (e instanceof HasFailedRpcException
&& ((HasFailedRpcException) e).getFailedRpc() != request) {
// If we get here it's because a dependent RPC (such as a META
// lookup) has failed. Therefore the exception we're getting
// indicates that the META lookup failed, but we need to return
// to our caller here that it''s their RPC that failed. Here we
// re-create the exception but with the correct RPC in argument.
e = e.make(e, request); // e is likely a PleaseThrottleException.
}
request.callback(e);
return Deferred.fromError(e);
}
return sendRpcToRegion(request); // Retry the RPC.
}
public String toString() {
return "retry RPC";
}
}
if (region != null) {
if (knownToBeNSREd(region)) {
final NotServingRegionException nsre =
new NotServingRegionException("Region known to be unavailable",
request);
final Deferred<Object> d = request.getDeferred();
handleNSRE(request, region.name(), nsre);
return d;
}
//创建一个RegionClient
final RegionClient client = clientFor(region);
if (client != null && client.isAlive()) {
request.setRegion(region);
final Deferred<Object> d = request.getDeferred();
client.sendRpc(request);
return d;
}
}
return locateRegion(request, table, key).addBothDeferring(new RetryRpc());
}
其中关键的方法是 sendRpc()
,该方法简介如下:
/**
* Sends an RPC out to the wire, or queues it if we're disconnected.
*
* IMPORTANT: Make sure you've got a reference to the Deferred of this
* RPC ({@link HBaseRpc#getDeferred}) before you call this method. Otherwise
* there's a race condition if the RPC completes before you get a chance to
* call {@link HBaseRpc#getDeferred} (since presumably you'll need to either
* return that Deferred or attach a callback to it).
*/
void sendRpc(HBaseRpc rpc) {
if (chan != null) {
if (rpc instanceof BatchableRpc
&& (server_version >= SERVER_VERSION_092_OR_ABOVE // Before 0.92,
|| rpc instanceof PutRequest)) { // we could only batch "put".
final BatchableRpc edit = (BatchableRpc) rpc;
if (edit.canBuffer() && hbase_client.getFlushInterval() > 0) {
bufferEdit(edit);
return;
}
addSingleEditCallbacks(edit);
} else if (rpc instanceof MultiAction) {
// Transform single-edit multi-put into single-put.
final MultiAction batch = (MultiAction) rpc;
if (batch.size() == 1) {
rpc = multiActionToSingleAction(batch);
} else {
hbase_client.num_multi_rpcs.increment();
}
}
final ChannelBuffer serialized = encode(rpc);
if (serialized == null) { // Error during encoding.
return; // Stop here. RPC has been failed already.
}
final Channel chan = this.chan; // Volatile read.
if (chan != null) { // Double check if we disconnected during encode().
// if our channel isn't able to write, we want to properly queue and
// retry the RPC later or fail it immediately so we don't fill up the
// channel's buffer.
if (check_write_status && !chan.isWritable()) {
rpc.callback(new PleaseThrottleException("Region client [" + this +
" ] channel is not writeable.", null, rpc, rpc.getDeferred()));
removeRpc(rpc, false);
writes_blocked.incrementAndGet();
return;
}
rpc.enqueueTimeout(this);
Channels.write(chan, serialized);
rpcs_sent.incrementAndGet();
return;
} // else: continue to the "we're disconnected" code path below.
}
boolean tryagain = false;
boolean dead; // Shadows this.dead;
synchronized (this) {
dead = this.dead;
// Check if we got connected while entering this synchronized block.
if (chan != null) {
tryagain = true;
} else if (!dead) {
if (pending_rpcs == null) {
pending_rpcs = new ArrayList<HBaseRpc>();
}
if (pending_limit > 0 && pending_rpcs.size() >= pending_limit) {
rpc.callback(new PleaseThrottleException(
"Exceeded the pending RPC limit", null, rpc, rpc.getDeferred()));
pending_breached.incrementAndGet();
return;
}
pending_rpcs.add(rpc);
}
}
if (dead) {
if (rpc.getRegion() == null // Can't retry, dunno where it should go.
|| rpc.failfast()) {
rpc.callback(new ConnectionResetException(null));
} else {
hbase_client.sendRpcToRegion(rpc); // Re-schedule the RPC.
}
return;
} else if (tryagain) {
// This recursion will not lead to a loop because we only get here if we
// connected while entering the synchronized block above. So when trying
// a second time, we will either succeed to send the RPC if we're still
// connected, or fail through to the code below if we got disconnected
// in the mean time.
sendRpc(rpc);
return;
}
LOG.debug("RPC queued: {}", rpc);
}
至此一个写uid
数据到 tsdb-uid
表的源码分析完整结束。