Elasticsearch 是一个实时的、分布式的可扩展的搜索引擎,允许进行全文、结构化搜索,它通常用于索引和搜索大量日志数据,也可用于搜索许多不同类型的文档。
Elasticsearch 通常与 Kibana 一起部署,Kibana 是 Elasticsearch 的一个功能强大的数据可视化 Dashboard,Kibana 允许你通过 web 界面来浏览 Elasticsearch 日志数据。
Fluentd是一个流行的开源数据收集器,我们将在 Kubernetes 集群节点上安装 Fluentd,通过获取容器日志文件、过滤和转换日志数据,然后将数据传递到 Elasticsearch 集群,在该集群中对其进行索引和存储。
如果你了解 EFK 的基本原理,只是为了测试可以直接使用 Kubernetes 官方提供的 addon 插件的资源清单,地址:https://github.com/kubernetes/kubernetes/blob/master/cluster/addons/fluentd-elasticsearch/,直接安装即可。
vim namespace-logging.yaml
apiVersion: v1
kind: Namespace
metadata:
name: logging
[root@master ~]#kubectl create -f kube-logging.yaml
namespace/logging created
[root@master ~]# kubectl get ns
NAME STATUS AGE
default Active 5d6h
kube-node-lease Active 5d6h
kube-public Active 5d6h
kube-system Active 5d6h
logging Active 5d6h
kind: Service
apiVersion: v1
metadata:
name: elasticsearch
namespace: logging
labels:
app: elasticsearch
spec:
selector:
app: elasticsearch
clusterIP: None
ports:
- port: 9200
name: rest
- port: 9300
name: inter-node
[root@master es]# kubectl create -f elasticsearch-svc.yaml
service/elasticsearch created
[root@master es]# kubectl get svc -n logging
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
elasticsearch ClusterIP None 9200/TCP,9300/TCP 55m
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: es
namespace: logging
spec:
serviceName: elasticsearch
replicas: 3
selector:
matchLabels:
app: elasticsearch
template:
metadata:
labels:
app: elasticsearch
spec:
nodeSelector:
es: log
initContainers:
- name: increase-vm-max-map
image: busybox
command: ["sysctl", "-w", "vm.max_map_count=262144"]
securityContext:
privileged: true
- name: increase-fd-ulimit
image: busybox
command: ["sh", "-c", "ulimit -n 65536"]
securityContext:
privileged: true
containers:
- name: elasticsearch
image: docker.elastic.co/elasticsearch/elasticsearch:7.6.2
ports:
- name: rest
containerPort: 9200
- name: inter
containerPort: 9300
resources:
limits:
cpu: 1000m
requests:
cpu: 1000m
volumeMounts:
- name: data
mountPath: /usr/share/elasticsearch/data
env:
- name: cluster.name
value: k8s-logs
- name: node.name
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: cluster.initial_master_nodes
value: "es-0,es-1,es-2"
- name: discovery.zen.minimum_master_nodes
value: "2"
- name: discovery.seed_hosts
value: "elasticsearch"
- name: ES_JAVA_OPTS
value: "-Xms512m -Xmx512m"
- name: network.host
value: "0.0.0.0"
volumeClaimTemplates:
- metadata:
name: data
labels:
app: elasticsearch
spec:
accessModes: [ "ReadWriteOnce" ]
storageClassName: es-data-db
resources:
requests:
storage: 50Gi
1、replicas: 3 副本数
2、将 matchLabels 设置为 app=elasticsearch,所以 Pod 的模板部分.spec.template.metadata.lables也必须包含app=elasticsearch标签
3、cluster.name:Elasticsearch 集群的名称,我们这里命名成 k8s-logs
4、node.name:节点的名称,通过 metadata.name 来获取。这将解析为 es-[0,1,2],取决于节点的指定顺序
5、discovery.zen.minimum_master_nodes:我们将其设置为(N/2) + 1,N是我们的群集中符合主节点的节点的数量。我们有3个
6、ES_JAVA_OPTS:这里我们设置为-Xms512m -Xmx512m,告诉JVM使用512 MB的最小和最大堆。
[root@master es]# kubectl label nodes node名 es=log
[root@master ~]# systemctl stop firewalld.service
[root@master ~]# yum -y install nfs-utils rpcbind
[root@master ~]# mkdir -p /data/k8s
[root@master ~]# chmod 755 /data/k8s
[root@master ~]# vim /etc/exports
/data/k8s 192.168.241.0/24(rw,sync,no_root_squash)
[root@master ~]# systemctl start rpcbind.service
[root@master ~]# systemctl start nfs.service
kind: Deployment
apiVersion: extensions/v1beta1
metadata:
name: nfs-client-provisioner
spec:
replicas: 1
strategy:
type: Recreate
template:
metadata:
labels:
app: nfs-client-provisioner
spec:
serviceAccountName: nfs-client-provisioner
containers:
- name: nfs-client-provisioner
image: quay.io/external_storage/nfs-client-provisioner:latest
volumeMounts:
- name: nfs-client-root
mountPath: /persistentvolumes
env:
- name: PROVISIONER_NAME
value: fuseim.pri/ifs
- name: NFS_SERVER
value: 192.168.241.129
- name: NFS_PATH
value: /data/k8s
volumes:
- name: nfs-client-root
nfs:
server: 192.168.241.129
path: /data/k8s
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfs-client-provisioner
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: nfs-client-provisioner-runner
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["list", "watch", "create", "update", "patch"]
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["create", "delete", "get", "list", "watch", "patch", "update"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: run-nfs-client-provisioner
subjects:
- kind: ServiceAccount
name: nfs-client-provisioner
namespace: default
roleRef:
kind: ClusterRole
name: nfs-client-provisioner-runner
apiGroup: rbac.authorization.k8s.io
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: es-data-db
provisioner: fuseim.pri/ifs
[root@master ~]# kubectl create -f nfs-client.yaml
[root@master ~]# kubectl create -f nfs-client-sa.yaml
[root@master ~]# kubectl create -f elasticsearch-storageclass.yaml
[root@master ~]# kubectl get po
NAME READY STATUS RESTARTS AGE
nfs-client-provisioner-74f78549f6-h2xd8 1/1 Running 0 97m
[root@master es]# kubectl create -f elasticsearch-statefulset.yaml
statefulset.apps/es created
[root@master es]# kubectl get sts -n logging
NAME READY AGE
es 3/3 80m
[root@master es]# kubectl get po -n logging
NAME READY STATUS RESTARTS AGE
es-0 1/1 Running 0 80m
es-1 1/1 Running 0 85m
es-2 1/1 Running 0 90m
[root@master es]# kubectl port-forward es-0 9200:9200 --namespace=logging
Forwarding from 127.0.0.1:9200 -> 9200
Forwarding from [::1]:9200 -> 9200
[root@master ~]# curl http://localhost:9200/
[root@master ~]# curl http://localhost:9200/_cluster/state?pretty
{
"name" : "es-0",
"cluster_name" : "k8s-logs",
"cluster_uuid" : "9AFOqPu0S2-8rqwTec3drw",
"version" : {
"number" : "7.6.2",
"build_flavor" : "default",
"build_type" : "docker",
"build_hash" : "ef48eb35cf30adf4db14086e8aabd07ef6fb113f",
"build_date" : "2020-03-26T06:34:37.794943Z",
"build_snapshot" : false,
"lucene_version" : "8.4.0",
"minimum_wire_compatibility_version" : "6.8.0",
"minimum_index_compatibility_version" : "6.0.0-beta1"
},
"tagline" : "You Know, for Search"
apiVersion: v1
kind: Service
metadata:
name: kibana
namespace: logging
labels:
app: kibana
spec:
ports:
- port: 5601
type: NodePort
selector:
app: kibana
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: kibana
namespace: logging
labels:
app: kibana
spec:
selector:
matchLabels:
app: kibana
template:
metadata:
labels:
app: kibana
spec:
nodeSelector:
es: log
containers:
- name: kibana
image: docker.elastic.co/kibana/kibana:7.6.2
resources:
limits:
cpu: 1000m
requests:
cpu: 1000m
env:
- name: ELASTICSEARCH_HOSTS
value: http://elasticsearch:9200
ports:
- containerPort: 5601
[root@master kibana]# kubectl create -f kibana.yaml
service/kibana created
deployment.apps/kibana created
[root@master kibana]# kubectl get pods --namespace=logging
NAME READY STATUS RESTARTS AGE
es-0 1/1 Running 0 85m
es-1 1/1 Running 0 84m
es-2 1/1 Running 0 83m
kibana-5c565c47dd-xj4bd 1/1 Running 0 80m
[root@master kibana]# kubectl get svc -n logging
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
elasticsearch ClusterIP None 9200/TCP,9300/TCP 41h
kibana NodePort 10.99.98.23 5601:30417/TCP 41h
Fluentd 是一个高效的日志聚合器,是用 Ruby 编写的,并且可以很好地扩展。对于大部分企业来说,Fluentd 足够高效并且消耗的资源相对较少,另外一个工具Fluent-bit更轻量级,占用资源更少,但是插件相对 Fluentd 来说不够丰富,所以整体来说,Fluentd 更加成熟,使用更加广泛,所以我们这里也同样使用 Fluentd 来作为日志收集工具
Fluentd 通过一组给定的数据源抓取日志数据,处理后(转换成结构化的数据格式)将它们转发给其他服务,比如 Elasticsearch、对象存储等等
1、首先 Fluentd 从多个日志源获取数据
2、结构化并且标记这些数据
3、然后根据匹配的标签将数据发送到多个目标服务去
@id elasticsearch
@type elasticsearch
@log_level info
include_tag_key true
type_name fluentd
host "#{ENV['OUTPUT_HOST']}"
port "#{ENV['OUTPUT_PORT']}"
logstash_format true
@type file
path /var/log/fluentd-buffers/kubernetes.system.buffer
flush_mode interval
retry_type exponential_backoff
flush_thread_count 2
flush_interval 5s
retry_forever
retry_max_interval 30
chunk_limit_size "#{ENV['OUTPUT_BUFFER_CHUNK_LIMIT']}"
queue_limit_length "#{ENV['OUTPUT_BUFFER_QUEUE_LIMIT']}"
overflow_action block
# 删除无用的属性
@type record_transformer
remove_keys $.docker.container_id,$.kubernetes.container_image_id,$.kubernetes.pod_id,$.kubernetes.namespace_id,$.kubernetes.master_url,$.kubernetes.labels.pod-template-hash
# 只保留具有logging=true标签的Pod日志
@id filter_log
@type grep
key $.kubernetes.labels.logging
pattern ^true$
kind: ConfigMap
apiVersion: v1
metadata:
name: fluentd-config
namespace: logging
data:
system.conf: |-
root_dir /tmp/fluentd-buffers/
containers.input.conf: |-
# 在日志输出中检测异常,并将其作为一条日志转发
# https://github.com/GoogleCloudPlatform/fluent-plugin-detect-exceptions
# 匹配tag为raw.kubernetes.**日志信息
@id raw.kubernetes
@type detect_exceptions # 使用detect-exceptions插件处理异常栈信息
remove_tag_prefix raw # 移除 raw 前缀
message log
stream stream
multiline_flush_interval 5
max_bytes 500000
max_lines 1000
# 拼接日志
@id filter_concat
@type concat # Fluentd Filter 插件,用于连接多个事件中分隔的多行日志。
key message
multiline_end_regexp /\n$/ # 以换行符“\n”拼接
separator ""
# 添加 Kubernetes metadata 数据
@id filter_kubernetes_metadata
@type kubernetes_metadata
# 修复 ES 中的 JSON 字段
# 插件地址:https://github.com/repeatedly/fluent-plugin-multi-format-parser
@id filter_parser
@type parser # multi-format-parser多格式解析器插件
key_name log # 在要解析的记录中指定字段名称。
reserve_data true # 在解析结果中保留原始键值对。
remove_key_name_field true # key_name 解析成功后删除字段。
@type multi_format
format json
format none
# 删除一些多余的属性
@type record_transformer
remove_keys $.docker.container_id,$.kubernetes.container_image_id,$.kubernetes.pod_id,$.kubernetes.namespace_id,$.kubernetes.master_url,$.kubernetes.labels.pod-template-hash
# 只保留具有logging=true标签的Pod日志
@id filter_log
@type grep
key $.kubernetes.labels.logging
pattern ^true$
###### 监听配置,一般用于日志聚合用 ######
forward.input.conf: |-
# 监听通过TCP发送的消息
output.conf: |-
@id elasticsearch
@type elasticsearch
@log_level info
include_tag_key true
host elasticsearch
port 9200
logstash_format true
logstash_prefix k8s # 设置 index 前缀为 k8s
request_timeout 30s
@type file
path /var/log/fluentd-buffers/kubernetes.system.buffer
flush_mode interval
retry_type exponential_backoff
flush_thread_count 2
flush_interval 5s
retry_forever
retry_max_interval 30
chunk_limit_size 2M
queue_limit_length 8
overflow_action block
nodeSelector:
beta.kubernetes.io/fluentd-ds-ready: "true"
kubectl label nodes node名 beta.kubernetes.io/fluentd-ds-ready=true
tolerations:
- operator: Exists
[root@node2 ~]# docker info
Docker Root Dir: /var/lib/docker
[root@master fluentd]# kubectl create -f fluentd-configmap.yaml
configmap "fluentd-config" created
[root@master fluentd]# kubectl create -f fluentd-daemonset.yaml
serviceaccount "fluentd-es" created
clusterrole.rbac.authorization.k8s.io "fluentd-es" created
clusterrolebinding.rbac.authorization.k8s.io "fluentd-es" created
daemonset.apps "fluentd-es" created
[root@master fluentd]# kubectl get po -n logging
NAME READY STATUS RESTARTS AGE
es-0 1/1 Running 1 46h
es-1 1/1 Running 1 46h
es-2 1/1 Running 1 46h
fluentd-es-9b9c6 1/1 Running 0 6s
fluentd-es-jlr69 1/1 Running 0 6s
fluentd-es-psccl 1/1 Running 0 6s
kibana-749ccd7d79-bsfgd 1/1 Running 0 34m
apiVersion: v1
kind: Pod
metadata:
name: counter
labels:
logging: "true"
spec:
containers:
- name: count
image: busybox
args: [/bin/sh, -c,
'i=0; while true; do echo "$i: $(date)"; i=$((i+1)); sleep 1; done']
[root@master pod]# kubectl get po
NAME READY STATUS RESTARTS AGE
counter 1/1 Running 0 5h20m
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx
namespace: default
labels:
nginx: "true"
spec:
replicas: 2
selector:
matchLabels:
app: nginx
release: stabel
template:
metadata:
labels:
app: nginx
nginx: "true"
release: stabel
spec:
containers:
- name: nginx
image: nginx
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: nginx
labels:
app: nginx
nginx: "true"
release: stabel
namespace: default
spec:
type: NodePort
selector:
app: nginx
nginx: "true"
ports:
- name: http
port: 80
targetPort: 80
nodePort: 30002
@id filter_log
@type grep
key $.kubernetes.labels.logging
key $.kubernetes.labels.nginx //新添加的nginx为true字段
pattern ^true$
和
output.conf: |-
@id elasticsearch
@type elasticsearch
@log_level info
include_tag_key true
host elasticsearch
port 9200
logstash_format true
logstash_prefix k8s
logstash_prefix nginx //新添加的前缀为nginx的索引字段
request_timeout 30s
@type file
path /var/log/fluentd-buffers/kubernetes.system.buffer
flush_mode interval
retry_type exponential_backoff
flush_thread_count 2
flush_interval 5s
retry_forever
retry_max_interval 30
chunk_limit_size 2M
queue_limit_length 8
overflow_action block
[root@master fluentd]# kubectl apply -f fluentd-configmap.yaml
[root@master fluentd]# kubectl apply -f fluentd-daemonset.yaml
{"LOGLEVEL":"WARNING","serviceName":"msg-processor","serviceEnvironment":"staging","message":"WARNING client connection terminated unexpectedly."}
{"LOGLEVEL":"INFO","serviceName":"msg-processor","serviceEnvironment":"staging","message":"","eventsNumber":5}
{"LOGLEVEL":"INFO","serviceName":"msg-receiver-api":"msg-receiver-api","serviceEnvironment":"staging","volume":14,"message":"API received messages"}
{"LOGLEVEL":"ERROR","serviceName":"msg-receiver-api","serviceEnvironment":"staging","message":"ERROR Unable to upload files for processing"}
因为 JSON 格式的日志解析非常容易,当我们将日志结构化传输到 ES 过后,我们可以根据特定的字段值而不是文本搜索日志数据,当然纯文本格式的日志我们也可以进行结构化,但是这样每个应用的日志格式不统一,都需要单独进行结构化,非常麻烦,所以建议将日志格式统一成 JSON 格式输出。
我们这里的示例应用会定期输出不同类型的日志消息,包含不同日志级别(INFO/WARN/ERROR)的日志,一行 JSON 日志就是我们收集的一条日志消息,该消息通过 fluentd 进行采集发送到 Elasticsearch。这里我们会使用到 fluentd 里面的自动 JSON 解析插件,默认情况下,fluentd 会将每个日志文件的一行作为名为 log 的字段进行发送,并自动添加其他字段,比如 tag 标识容器,stream 标识 stdout 或者 stderr
@id filter_parser
@type parser # multi-format-parser多格式解析器插件
key_name log # 在要解析的记录中指定字段名称
reserve_data true # 在解析结果中保留原始键值对
remove_key_name_field true # key_name 解析成功后删除字段。
@type multi_format
format json
format none
apiVersion: apps/v1
kind: Deployment
metadata:
name: dummylogs
spec:
replicas: 3
selector:
matchLabels:
app: dummylogs
template:
metadata:
labels:
app: dummylogs
logging: "true" # 要采集日志需要加上该标签
spec:
containers:
- name: dummy
image: cnych/dummylogs:latest
args:
- msg-processor
kubectl apply -f dummylogs.yaml
kubectl get pods -l logging=true
https://www.qikqiak.com/post/install-efk-stack-on-k8s/
https://www.jianshu.com/p/91833f7c42cd
https://www.jianshu.com/p/aca71580a7c8