zipkin-dependencies离线计算拓扑图依赖
本文分析mysql存储(后续准备接入Doris直接计算依赖关系,所以分析不关注存储)
查找main方法 ZipkinDependenciesJob
case "mysql":
MySQLDependenciesJob.builder()
.logInitializer(logInitializer)
.jars(jarPath)
.day(day)
.conf(sparkConf)
.build()
.run();
break;
zipkin2.dependencies.mysql.MySQLDependenciesJob里面包含内部类Builder,分析run方法
public void run() {
//数据库信息
Map options = new LinkedHashMap<>();
options.put("driver", org.mariadb.jdbc.Driver.class.getName()); // prevents shade from skipping
options.put("url", url);
options.put("user", user);
options.put("password", password);
// 如果 trace_id_high == 1,则跟踪使用128位tranceId取代64位
// 如果 rance_id_high == 0,用64位 "select trace_id_high from zipkin_spans limit 1"
boolean hasTraceIdHigh = hasTraceIdHigh();
Function rowTraceId = r -> r.getLong(hasTraceIdHigh ? 1 : 0);
long microsLower = day * 1000;
long microsUpper = (day * 1000) + TimeUnit.DAYS.toMicros(1) - 1;
//查询span信息
String fields = "s.trace_id, s.parent_id, s.id, a.a_key, a.endpoint_service_name, a.a_type";
if (hasTraceIdHigh) fields = "s.trace_id_high, " + fields;
String groupByFields = fields.replace("s.parent_id, ", "");
String linksQuery = String.format(
"select distinct %s "+
"from zipkin_spans s left outer join zipkin_annotations a on " +
" (s.trace_id = a.trace_id and s.id = a.span_id " +
" and a.a_key in ('lc', 'ca', 'cs', 'sa', 'sr', 'ma', 'ms', 'mr', 'error')) " +
"where s.start_ts between %s and %s group by %s",
fields, microsLower, microsUpper, groupByFields);
options.put("dbtable", "(" + linksQuery + ") as link_spans");
log.info("Running Dependencies job for {}: start_ts between {} and {}", dateStamp, microsLower,
microsUpper);
JavaSparkContext sc = new JavaSparkContext(conf);
List links = new SQLContext(sc).read()
.format("org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider")
.options(options)
.load()
// RDD为spark抽象
.toJavaRDD()
// 按照最高位分组 rowTraceId是一个function
.groupBy(rowTraceId)
// 按照RowsToDependencyLinks逻辑生成新的RDD
//(实际上是讲row数据进行整合为DependencyLink)
.flatMapValues(new RowsToDependencyLinks(logInitializer, hasTraceIdHigh))
// 只取其Value抛弃Key
.values()
// 对DependencyLink进行map操作,参数为lamada函数
// 数据格式为 [ k->(k->l.parent,v->l.child) , v->l ]
.mapToPair(l -> Tuple2.apply(Tuple2.apply(l.parent(), l.child()), l))
// 对map操作的数据进行reduce操作,统计生成最终的DependencyLink
.reduceByKey((l, r) -> DependencyLink.newBuilder()
.parent(l.parent())
.child(l.child())
.callCount(l.callCount() + r.callCount())
.errorCount(l.errorCount() + r.errorCount())
.build())
.values().collect();
sc.stop();
log.info("Saving with day=" + dateStamp);
// 存储倒mysql中
saveToMySQL(links);
log.info("Done");
}
其中组装数据的重点在 new RowsToDependencyLinks(logInitializer, hasTraceIdHigh))
RowsToDependencyLinks 对应的接口实现
final class RowsToDependencyLinks
implements Serializable, Function, Iterable>
关注 RowsToDependencyLinks 的call方法,把row信息整合成span,把span信息整合成DependencyLink
public Iterable call(Iterable rows) {
if (logInitializer != null) logInitializer.run();
// ByTraceId是DependencyLinkSpanIterator 的内部类
// ByTraceId 实现了 Iterator>
Iterator> traces = new DependencyLinkSpanIterator.ByTraceId(rows.iterator(), hasTraceIdHigh);
if (!traces.hasNext()) return Collections.emptyList();
DependencyLinker linker = new DependencyLinker();
List nextTrace = new ArrayList<>();
while (traces.hasNext()) {
// ByTraceId的next方法
Iterator i = traces.next();
// DependencyLinkSpanIterator的next方法,实际是对row的信息整合成span
while (i.hasNext()) nextTrace.add(i.next());
// span整合成DependencyLinker
linker.putTrace(nextTrace);
nextTrace.clear();
}
// 返回List
return linker.link();
}
关注点放在 Iterator i = traces.next() 进入DependencyLinkSpanIterator.next代码
public Iterator next() {
// next的Row数据
Row peeked = delegate.peek();
// 没太搞清楚什么意思,应该是高位tranceId和低位tranceId
currentTraceIdHi = hasTraceIdHigh ? peeked.getLong(0) : 0L;
currentTraceIdLo = peeked.getLong(traceIdIndex);
return new DependencyLinkSpanIterator(
delegate, traceIdIndex, currentTraceIdHi, currentTraceIdLo);
}
另一个关注点在 while (i.hasNext()) nextTrace.add(i.next()) 进入 DependencyLinkSpanIterator.indexfor的next方法
@Override
public Span next() {
Row row = delegate.peek();
// 从row里面去取spanId
long spanId = row.getLong(traceIdIndex + 2);
// 定义一些错误\各个类型的服务名称
boolean error = false;
String lcService = null, srService = null, csService = null, caService = null, saService = null,
maService = null, mrService = null, msService = null;
while (hasNext()) { // there are more values for this trace
if (spanId != delegate.peek().getLong(traceIdIndex + 2) /* id */) {
break; // if we are in a new span
}
Row next = delegate.next(); // row for the same span
// 获取key
String key = emptyToNull(row, traceIdIndex + 3); // a_key
// 获取服务名称
String value = emptyToNull(row, traceIdIndex + 4); // a_service_name
if (key == null || value == null) continue; // neither client nor server
switch (key) {
case "lc":
lcService = value;
break;
case "ca":
caService = value;
break;
case "cs":
csService = value;
break;
case "sa":
saService = value;
break;
case "ma":
maService = value;
break;
case "mr":
mrService = value;
break;
case "ms":
msService = value;
break;
case "sr":
srService = value;
break;
case "error":
// 如果span有一个名为“error”的标记,而不是注释,则该span是错误的
// a span is in error if it has a tag, not an annotation, of name "error"
error = BINARY_ANNOTATION_TYPE_STRING == next.getInt(traceIdIndex + 5); // a_type
}
}
// 客户端地址比客户端发送所有者更权威
// The client address is more authoritative than the client send owner.
if (caService == null) caService = csService;
// Finagle标签两边(“ca”、“sa”) 相同画,则跳过客户端这样就不会被误认为是环回请求
// Finagle labels two sides of the same socket ("ca", "sa") with the same name.
// Skip the client side, so it isn't mistaken for a loopback request
if (saService != null && saService.equals(caService)) caService = null;
//parent ID 为 traceIdIndex + 1
long parentId = row.isNullAt(traceIdIndex + 1) ? 0L : row.getLong(traceIdIndex + 1);
Span.Builder result =
//组装Span
Span.newBuilder().traceId(traceIdHi, traceIdLo).parentId(parentId).id(spanId);
if (error) {
//实际值没意义
result.putTag("error", "" /* actual value doesn't matter */);
}
// 返回构建结果
if (srService != null) {
return result
.kind(Span.Kind.SERVER)
.localEndpoint(ep(srService))
.remoteEndpoint(ep(caService))
.build();
} else if (saService != null) {
Endpoint localEndpoint = ep(caService);
// When span.kind is missing, the local endpoint is "lc" and the remote endpoint is "sa"
if (localEndpoint == null) localEndpoint = ep(lcService);
return result
.kind(csService != null ? Span.Kind.CLIENT : null)
.localEndpoint(localEndpoint)
.remoteEndpoint(ep(saService))
.build();
} else if (csService != null) {
return result.kind(Span.Kind.SERVER).localEndpoint(ep(caService)).build();
} else if (mrService != null) {
return result
.kind(Span.Kind.CONSUMER)
.localEndpoint(ep(mrService))
.remoteEndpoint(ep(maService))
.build();
} else if (msService != null) {
return result
.kind(Span.Kind.PRODUCER)
.localEndpoint(ep(msService))
.remoteEndpoint(ep(maService))
.build();
}
return result.build();
}
另一个关注点在 linker.putTrace(nextTrace) 这个方法对Span进行整合成DependencyLinker
public DependencyLinker putTrace(List spans) {
if (spans.isEmpty()) return this;
// 通过List spans 构建一棵树
// span信息,spanNode parent,List child
SpanNode traceTree = builder.build(spans);
if (logger.isLoggable(FINE)) logger.fine("traversing trace tree, breadth-first");
// traceTree.traverse() 进行广度优先遍历
for (Iterator i = traceTree.traverse(); i.hasNext(); ) {
SpanNode current = i.next();
Span currentSpan = current.span();
if (logger.isLoggable(FINE)) {
logger.fine("processing " + currentSpan);
}
Kind kind = currentSpan.kind();
// When processing links to a client span, we prefer the server's name. If we have no child
// spans, we proceed to use the name the client chose.
if (Kind.CLIENT.equals(kind) && !current.children().isEmpty()) {
continue;
}
String serviceName = currentSpan.localServiceName();
String remoteServiceName = currentSpan.remoteServiceName();
if (kind == null) {
// Treat unknown type of span as a client span if we know both sides
if (serviceName != null && remoteServiceName != null) {
kind = Kind.CLIENT;
} else {
logger.fine("non remote span; skipping");
continue;
}
}
String child;
String parent;
switch (kind) {
case SERVER:
case CONSUMER:
child = serviceName;
parent = remoteServiceName;
if (current == traceTree) { // we are the root-most span.
if (parent == null) {
logger.fine("root's client is unknown; skipping");
continue;
}
}
break;
case CLIENT:
case PRODUCER:
parent = serviceName;
child = remoteServiceName;
break;
default:
logger.fine("unknown kind; skipping");
continue;
}
boolean isError = currentSpan.tags().containsKey("error");
if (kind == Kind.PRODUCER || kind == Kind.CONSUMER) {
if (parent == null || child == null) {
logger.fine("cannot link messaging span to its broker; skipping");
} else {
addLink(parent, child, isError);
}
continue;
}
// Local spans may be between the current node and its remote parent
Span remoteAncestor = firstRemoteAncestor(current);
String remoteAncestorName;
if (remoteAncestor != null
&& (remoteAncestorName = remoteAncestor.localServiceName()) != null) {
// Some users accidentally put the remote service name on client annotations.
// Check for this and backfill a link from the nearest remote to that service as necessary.
if (kind == Kind.CLIENT && serviceName != null && !remoteAncestorName.equals(serviceName)) {
logger.fine("detected missing link to client span");
addLink(remoteAncestorName, serviceName, false); // we don't know if there's an error here
}
if (kind == Kind.SERVER || parent == null) parent = remoteAncestorName;
// When an RPC is split between spans, we skip the child (server side). If our parent is a
// client, we need to check it for errors.
if (!isError && Kind.CLIENT.equals(remoteAncestor.kind()) &&
currentSpan.parentId() != null && currentSpan.parentId().equals(remoteAncestor.id())) {
isError = remoteAncestor.tags().containsKey("error");
}
}
if (parent == null || child == null) {
logger.fine("cannot find remote ancestor; skipping");
continue;
}
addLink(parent, child, isError);
}
return this;
}