Jaeger
Jaeger是Uber推出的一款调用链追踪系统,类似于Zipkin和Dapper,为微服务调用追踪而生。 其主要用于多个服务调用过程追踪分析,图形化服务调用轨迹,便于快速准确定位问题。
Jaeger组成
前端界面展示UI
数据存储Cassandra或Elasticsearch
数据查询Query
数据收集处理Collector
客户端代理Agent
Jaeger服务之间关系
jaeger各组件部署
为了方便统一管理,将jaeger所有组件放到jaeger的namespace中,并创建jaeger-agent这个服务账号,做好访问授权以方便各个节点上的jaeger-agent对default命名空间中各负载的链路调用情况及数据进行收集监控
[root@test-k8s-01 jaeger]#kubectl create namespace jaeger
[root@test-k8s-01 jaeger]# cat rabc.yml
apiVersion: v1
kind: ServiceAccount
metadata:
name: jaeger-agent
namespace: jaeger
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: jaeger-agent
rules:
- apiGroups: [""]
resources:- services
- namespaces
- deployments
- pods
verbs: ["get", "list", "watch"]
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: jaeger-agent
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: jaeger-agent
subjects:
- kind: ServiceAccount
name: jaeger-agent
namespace: jaeger
elasticsearch部署:
测试环境下利用statefulset部署elasticsearch,正式环境将jaeger-collector端数据接入线上ELK集群,方便对索引统一管理
wget https://raw.githubusercontent.com/jaegertracing/jaeger-kubernetes/master/production-elasticsearch/elasticsearch.yml
[root@test-k8s-01 jaeger]#cat elasticsearch.yml
apiVersion: v1
kind: List
items:
- apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
name: elasticsearch
namespace: jaeger
labels:
app: elasticsearch
app.kubernetes.io/name: elasticsearch
app.kubernetes.io/component: storage-backend
app.kubernetes.io/part-of: jaeger
spec:
serviceName: elasticsearch
replicas: 1
template:
metadata:
labels:
app: elasticsearch
app.kubernetes.io/name: elasticsearch
app.kubernetes.io/component: storage-backend
app.kubernetes.io/part-of: jaeger
spec:
containers:
- name: elasticsearch
image: docker.elastic.co/elasticsearch/elasticsearch:5.6.0
imagePullPolicy: Always
command:
- bin/elasticsearch
args:
- "-Ehttp.host=0.0.0.0"
- "-Etransport.host=(PODIP)为elasticsearch的监听地址,默认使用的lo网址在后面cronjob
volumeMounts: 定期将服务之间的依赖关系同步到elasticsearch时会有问题
- name: cce-sfs-kt-jaeger-data
mountPath: /data
env:
- name: PODIP
valueFrom:
fieldRef:
fieldPath: status.podIP
readinessProbe:
failureThreshold: 3
initialDelaySeconds: 20
periodSeconds: 10
successThreshold: 1
tcpSocket:
port: 9200
timeoutSeconds: 4
initialDelaySeconds: 20
periodSeconds: 5
timeoutSeconds: 4
volumes: #通过pvc实现elasticsearch数据持久化存储,此为华为云pvc挂载方式,自定义
- name: cce-sfs-kt-jaeger-data 参见statefulset中关于 volumeClaimTemplates的设置
persistentVolumeClaim:
claimName: cce-sfs-kt-jaeger-data - apiVersion: v1
kind: Service
metadata:
name: elasticsearch
namespace: jaeger
labels:
app: elasticsearch
app.kubernetes.io/name: elasticsearch
app.kubernetes.io/component: storage-backend
app.kubernetes.io/part-of: jaeger
spec:
clusterIP: None
selector:
app.kubernetes.io/name: elasticsearch
app.kubernetes.io/component: storage-backend
app.kubernetes.io/part-of: jaeger
ports:- port: 9200
name: elasticsearch - port: 9300
name: transport
- port: 9200
各组件配置管理:
cat configmap.yml
apiVersion: v1
kind: ConfigMap
metadata:
name: jaeger-configuration
namespace: jaeger
labels:
app: jaeger
app.kubernetes.io/name: jaeger
data:
span-storage-type: elasticsearch
collector: |
es:
server-urls: http://192.168..:80** #ELK集群elasticsearch节点
collector:
zipkin:
http-port: 9411
query: |
es:
server-urls: http://http://192.168..:80**
agent: |
collector:
host-port: "jaeger-collector:14267"
jaeger-query:
我们规划将jaeger-query以及jaeger-collector两个组件的pods调度到test-jaeger-01这台节点
之上,所以通过配置pod的nodeAffinity及对test-jaeger-01节点taints的容忍将以上两个组件的pods强制分配到该节点
[root@test-k8s-01 jumpserver]# kubectl taint node test-jaeger-01 type=jaeger:NoSchedule
[root@test-k8s-01 jumpserver]# kubectl describe node 1test-jaeger-01 |grep Taint
Taints: type=jaeger:NoSchedule
[root@test-k8s-01 jaeger]#cat jaeger-query.yml
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: jaeger-query
namespace: jaeger
labels:
app: jaeger
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: query
spec:
replicas: 2
strategy:
type: Recreate
template:
metadata:
labels:
app: jaeger
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: query
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "16686"
spec:
containers:
- image: jaegertracing/jaeger-query:1.9.0
name: jaeger-query
args: ["--config-file=/conf/query.yaml"]
ports:
- containerPort: 16686
protocol: TCP
readinessProbe:
httpGet:
path: "/"
port: 16687
volumeMounts:
- name: jaeger-configuration-volume
mountPath: /conf
env:
- name: SPAN_STORAGE_TYPE
valueFrom:
configMapKeyRef:
name: jaeger-configuration
key: span-storage-type
volumes:
- configMap:
name: jaeger-configuration
items:
- key: query
path: query.yaml
name: jaeger-configuration-volume
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- 192.168..
tolerations:
- key: "type"
operator: "Equal"
value: "jaeger"
effect: "NoSchedule"
apiVersion: v1 #利用华为云elb部署一个loadbalance类型services,后期通过nginx进行反代,统一访问出入口
kind: Service
metadata:
annotations:
kubernetes.io/elb.class: union
kubernetes.io/elb.id: f7713230-c6fe-465c-ba0a-***********
kubernetes.io/elb.vpc.id: c289b36d-1321-*********
name: jaeger-query
namespace: jaeger
labels:
app: jaeger
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: query
spec:
loadBalancerIP: 192.168..
ports:
- name: jaeger-query
port: 80
protocol: TCP
targetPort: 16686
selector:
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: query
type: LoadBalancer
jaeger-collector以及jaeger-agent部署:
利用daemonset方式部署jaeger-agent,以节点为单位采集各个应用pod之间的span信息,也可以以sidecar的方式注将agent入到各个应用的pod中
root@test-k8s-01 jaeger]# cat jaeger-production-template.yml
apiVersion: v1
kind: List
items:
- apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: jaeger-collector
namespace: jaeger
labels:
app: jaeger
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: collector
spec:
replicas: 2
strategy:
type: Recreate
template:
metadata:
labels:
app: jaeger
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: collector
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "14268"
spec:
tolerations:
- key: "type"
operator: "Equal"
value: "jaeger"
effect: "NoSchedule"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- 192.168.10.156
containers:
- image: jaegertracing/jaeger-collector:1.9.0
name: jaeger-collector
args: ["--config-file=/conf/collector.yaml"]
ports:
- containerPort: 14267
protocol: TCP
- containerPort: 14268
protocol: TCP
- containerPort: 9411
protocol: TCP
readinessProbe:
httpGet:
path: "/"
port: 14269
volumeMounts:
- name: jaeger-configuration-volume
mountPath: /conf
env:
- name: SPAN_STORAGE_TYPE
valueFrom:
configMapKeyRef:
name: jaeger-configuration
key: span-storage-type
volumes:
- configMap:
name: jaeger-configuration
items:
- key: collector
path: collector.yaml
name: jaeger-configuration-volume - apiVersion: v1
kind: Service
metadata:
name: jaeger-collector
labels:
app: jaeger
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: collector
spec:
ports:- name: jaeger-collector-tchannel
port: 14267
protocol: TCP
targetPort: 14267 - name: jaeger-collector-http
port: 14268
protocol: TCP
targetPort: 14268 - name: jaeger-collector-zipkin
port: 9411
protocol: TCP
targetPort: 9411
selector:
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: collector
type: ClusterIP
- name: jaeger-collector-tchannel
- apiVersion: v1
kind: Service
metadata:
name: zipkin
namespace: jaeger
labels:
app: jaeger
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: zipkin
spec:
ports:- name: jaeger-collector-zipkin
port: 9411
protocol: TCP
targetPort: 9411
selector:
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: collector
type: ClusterIP
- name: jaeger-collector-zipkin
- apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: jaeger-agent
namespace: jaeger
labels:
app: jaeger
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: agent
spec:
template:
metadata:
labels:
app: jaeger
app.kubernetes.io/name: jaeger
app.kubernetes.io/component: agent
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "5778"
spec:
containers:
- name: jaeger-agent
image: jaegertracing/jaeger-agent:1.9.0
args: ["--config-file=/conf/agent.yaml"]
volumeMounts:
- name: jaeger-configuration-volume
mountPath: /conf
env:
- name: JAEGER_AGENT_HOST #由于jaeger-agent是作为DaemonSet方式部署,且使用了hostnetwork方式,因此节点
valueFrom: 的IP地址可以存储为环境变量,并通过以下方式传递给应用程序
fieldRef:
fieldPath: status.hostIP
ports:
- containerPort: 5775
protocol: UDP
- containerPort: 6831
protocol: UDP
- containerPort: 6832
protocol: UDP
- containerPort: 5778
protocol: TCP
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
serviceAccountName: jaeger-agent
volumes:
- configMap:
name: jaeger-configuration
items:
- key: agent
path: agent.yaml
name: jaeger-configuration-volume
创建一个cronjob,将各个应用的依赖关系导入elasticsearch,进行持久化存储,否则仅仅存储在缓存之中
apiVersion: v1
items:
- apiVersion: batch/v1beta1
kind: CronJob
metadata:
labels:
run: jaeger-spark-dependencies
name: jaeger-spark-dependencies
namespace: jaeger
spec:
spec:
template:
metadata:
labels:
run: jaeger-spark-dependencies
spec:
containers:
- env:
- name: STORAGE
value: elasticsearch
- name: ES_NODESvalue: elasticsearch-0.elasticsearch.jaeger.svc.cluster.local:9200 选择将es部署在k8s中的
value: 192.168.*.*:80**
- name: ES_USERNAME 如果es没有设置访问认证,就不需要添加
value: elastic
- name: ES_PASSWORD
value: changeme
schedule: 55 23 * * *image: jaegertracing/spark-dependencies name: jaeger-spark-dependencies
[root@test-k8s-01 jaeger]# kubectl get pods -n jaeger
NAME READY STATUS RESTARTS AGE
elasticsearch-0 1/1 Running 0 2d
jaeger-agent-2fdzc 1/1 Running 0 16h
jaeger-agent-2g4km 1/1 Running 0 16h
jaeger-agent-64qf9 1/1 Running 0 16h
jaeger-agent-gvkxq 1/1 Running 0 16h
jaeger-agent-xj7cs 1/1 Running 0 16h
jaeger-agent-zw5gr 1/1 Running 0 16h
jaeger-collector-7fb4fff766-m79x7 1/1 Running 0 16h
jaeger-collector-7fb4fff766-t5thx 1/1 Running 0 16h
jaeger-query-888d478b8-fk527 1/1 Running 0 16h
jaeger-query-888d478b8-ht8xh 1/1 Running 0 16h
jaeger-spark-dependencies-1574265300-8rmbl 0/1 Completed 0 1d
jaeger-spark-dependencies-1574351700-txpjt 0/1 Completed 0 9h
agent采样率设置
参考文档:https://github.com/jaegertracing/jaeger-kubernetes](https://github.com/jaegertracing/jaeger-kubernetes