在 K8S 上部署 ELK 7.14 集群实现采集容器日志

部署架构

  • Filebeat、Kafka、Logstash、Elasticsearch、Kibana 全部部署在 K8S 中,实现采集容器日志
  • Elasticsearch 主节点、数据节点、协调/客户端节点分开部署
  • Elasticsearch 启用了 X-Pack 插件,实现账号和密码认证

部署过程

示例环境使用的是阿里云 Kubernetes 服务,如使用其他 K8S 环境会有些差异,可自行修改 yaml 文件相应的 storageClassNameService type

1. 创建单独的 namespace

# cat 00-ns.yaml
apiVersion: v1
kind: Namespace
metadata:
  name: ns-elk
  labels:
    app: elasticsearch

# kubectl apply -f 00-ns.yaml

2. 生成开启 x-pack 时的 ssl 证书

// 参考:https://github.com/elastic/helm-charts/blob/master/elasticsearch/examples/security/Makefile#L24-L35
# docker run --name es-certutil -i -w /tmp docker.elastic.co/elasticsearch/elasticsearch:7.14.0 /bin/sh -c  \
    "elasticsearch-certutil ca --out /tmp/es-ca.p12 --pass '' && \
    elasticsearch-certutil cert --name security-master --dns \
    security-master --ca /tmp/es-ca.p12 --pass '' --ca-pass '' --out /tmp/elastic-certificates.p12"
# docker cp es-certutil:/tmp/elastic-certificates.p12 ./
// 生成 ssl 认证要使用的 secret elastic-certificates
# kubectl -n ns-elk create secret generic elastic-certificates --from-file=./elastic-certificates.p12

3. 部署 elasticsearch master 节点

# cat 01-es-master.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
  namespace: ns-elk
  name: elasticsearch-master
  labels:
    app: elasticsearch
    role: master
spec:
  serviceName: elasticsearch-master
  replicas: 3
  selector:
    matchLabels:
      app: elasticsearch
      role: master
  template:
    metadata:
      labels:
        app: elasticsearch
        role: master
    spec:
      containers:
        - name: elasticsearch
          image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
          resources:
            requests:
              memory: 2Gi
              cpu: 1.0
            limits:
              memory: 4Gi
              cpu: 2.0
          command: ["bash", "-c", "ulimit -l unlimited && sysctl -w vm.max_map_count=262144 && chown -R elasticsearch:elasticsearch /usr/share/elasticsearch/data && exec su elasticsearch docker-entrypoint.sh"]
          ports:
            - containerPort: 9200
              name: http
            - containerPort: 9300
              name: transport
          env:
            # 使用修改环境变量的方式修改 elasticsearch 配置
            - name: discovery.seed_hosts
              value: "elasticsearch-master.ns-elk.svc.cluster.local"
            - name: cluster.initial_master_nodes
              value: "elasticsearch-master-0,elasticsearch-master-1,elasticsearch-master-2"
            - name: ES_JAVA_OPTS
              value: -Xms2g -Xmx2g

            - name: node.master
              value: "true"
            - name: node.ingest
              value: "false"
            - name: node.data
              value: "false"

            - name: cluster.name
              value: "elasticsearch-cluster-v7"
            - name: node.name
              valueFrom:
                fieldRef:
                  fieldPath: metadata.name

            - name: xpack.security.enabled
              value: "true"
            - name: xpack.security.transport.ssl.enabled
              value: "true"
            - name: xpack.monitoring.collection.enabled
              value: "true"
            - name: xpack.security.transport.ssl.verification_mode
              value: "certificate"
            - name: xpack.security.transport.ssl.keystore.path
              value: "/usr/share/elasticsearch/config/elastic-certificates.p12"
            - name: xpack.security.transport.ssl.truststore.path
              value: "/usr/share/elasticsearch/config/elastic-certificates.p12"

          volumeMounts:
           - mountPath: /usr/share/elasticsearch/data
             name: pv-storage-elastic-master
           - name: elastic-certificates
             readOnly: true
             mountPath: "/usr/share/elasticsearch/config/elastic-certificates.p12"
             subPath: elastic-certificates.p12
           - mountPath: /etc/localtime
             name: localtime
          # Gave permission to init container
          securityContext:
            privileged: true

      volumes:
      - name: elastic-certificates
        secret:
          secretName: elastic-certificates
      - hostPath:
          path: /etc/localtime
        name: localtime

      # Pull image from private repo
      # imagePullSecrets:
      # - name: 
  volumeClaimTemplates:
  - metadata:
      name: pv-storage-elastic-master
    spec:
      accessModes: [ "ReadWriteOnce" ]
      storageClassName: "alicloud-disk-ssd"
      resources:
        requests:
          storage: 20Gi

# kubectl -n ns-elk apply -f 01-es-master.yaml

4. 部署 elasticsearch data 节点

# cat 02-es-data.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
  namespace: ns-elk
  name: elasticsearch-data
  labels:
    app: elasticsearch
    role: data
spec:
  serviceName: elasticsearch-data
  replicas: 3
  selector:
    matchLabels:
      app: elasticsearch
      role: data
  template:
    metadata:
      labels:
        app: elasticsearch
        role: data
    spec:
      containers:
        - name: elasticsearch
          image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
          resources:
            requests:
              memory: 8Gi
              cpu: 4.0
            limits:
              memory: 16Gi
              cpu: 8.0
          command: ["bash", "-c", "ulimit -l unlimited && sysctl -w vm.max_map_count=262144 && chown -R elasticsearch:elasticsearch /usr/share/elasticsearch/data && exec su elasticsearch docker-entrypoint.sh"]
          ports:
            - containerPort: 9200
              name: http
            - containerPort: 9300
              name: transport
          env:
            - name: discovery.seed_hosts
              value: "elasticsearch-master.ns-elk.svc.cluster.local"
            - name: ES_JAVA_OPTS
              value: -Xms8g -Xmx8g

            - name: node.master
              value: "false"
            - name: node.ingest
              value: "false"
            - name: node.data
              value: "true"

            - name: cluster.name
              value: "elasticsearch-cluster-v7"
            - name: node.name
              valueFrom:
                fieldRef:
                  fieldPath: metadata.name

            - name: xpack.security.enabled
              value: "true"
            - name: xpack.security.transport.ssl.enabled
              value: "true"
            - name: xpack.monitoring.collection.enabled
              value: "true"
            - name: xpack.security.transport.ssl.verification_mode
              value: "certificate"
            - name: xpack.security.transport.ssl.keystore.path
              value: "/usr/share/elasticsearch/config/elastic-certificates.p12"
            - name: xpack.security.transport.ssl.truststore.path
              value: "/usr/share/elasticsearch/config/elastic-certificates.p12"

          volumeMounts:
           - mountPath: /usr/share/elasticsearch/data
             name: pv-storage-elastic-data
           - name: elastic-certificates
             readOnly: true
             mountPath: "/usr/share/elasticsearch/config/elastic-certificates.p12"
             subPath: elastic-certificates.p12
           - mountPath: /etc/localtime
             name: localtime

          # Gave permission to init container
          securityContext:
            privileged: true

      volumes:
      - name: elastic-certificates
        secret:
          secretName: elastic-certificates
      - hostPath:
          path: /etc/localtime
        name: localtime

      # Pull image from private repo
      # imagePullSecrets:
      # - name: 
  volumeClaimTemplates:
  - metadata:
      name: pv-storage-elastic-data
    spec:
      accessModes: [ "ReadWriteOnce" ]
      storageClassName: "alicloud-disk-ssd"
      resources:
        requests:
          storage: 20Gi


# kubectl -n ns-elk apply -f 02-es-data.yaml

5. 部署 elasticsearch client/ingest 节点

# cat 03-es-client.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
  namespace: ns-elk
  name: elasticsearch-client
  labels:
    app: elasticsearch
    role: client
spec:
  serviceName: elasticsearch-client
  replicas: 3
  selector:
    matchLabels:
      app: elasticsearch
      role: client
  template:
    metadata:
      labels:
        app: elasticsearch
        role: client
    spec:
      containers:
        - name: elasticsearch
          image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
          resources:
            requests:
              memory: 4Gi
              cpu: 2.0
            limits:
              memory: 8Gi
              cpu: 4.0
          command: ["bash", "-c", "ulimit -l unlimited && sysctl -w vm.max_map_count=262144 && chown -R elasticsearch:elasticsearch /usr/share/elasticsearch/data && exec su elasticsearch docker-entrypoint.sh"]
          ports:
            - containerPort: 9200
              name: http
            - containerPort: 9300
              name: transport
          env:
            - name: discovery.seed_hosts
              value: "elasticsearch-master.ns-elk.svc.cluster.local"
            - name: ES_JAVA_OPTS
              value: -Xms4g -Xmx4g

            - name: node.master
              value: "false"
            - name: node.ingest
              value: "true"
            - name: node.data
              value: "false"

            - name: cluster.name
              value: "elasticsearch-cluster-v7"
            - name: node.name
              valueFrom:
                fieldRef:
                  fieldPath: metadata.name
            # 设置密码:kubectl -n ns-elk exec -it $(kubectl -n ns-elk get pods | grep elasticsearch-client | sed -n 1p | awk '{print $1}') -- bin/elasticsearch-setup-passwords auto -b
            - name: xpack.security.enabled
              value: "true"
            - name: xpack.security.transport.ssl.enabled
              value: "true"
            - name: xpack.monitoring.collection.enabled
              value: "true"
            - name: xpack.security.transport.ssl.verification_mode
              value: "certificate"
            - name: xpack.security.transport.ssl.keystore.path
              value: "/usr/share/elasticsearch/config/elastic-certificates.p12"
            - name: xpack.security.transport.ssl.truststore.path
              value: "/usr/share/elasticsearch/config/elastic-certificates.p12"

          volumeMounts:
           - mountPath: /usr/share/elasticsearch/data
             name: pv-storage-elastic-client
           - name: elastic-certificates
             readOnly: true
             mountPath: "/usr/share/elasticsearch/config/elastic-certificates.p12"
             subPath: elastic-certificates.p12
           - mountPath: /etc/localtime
             name: localtime

          # Gave permission to init container
          securityContext:
            privileged: true

      volumes:
      - name: elastic-certificates
        secret:
          secretName: elastic-certificates
      - hostPath:
          path: /etc/localtime
        name: localtime

      # Pull image from private repo
      # imagePullSecrets:
      # - name: 
  volumeClaimTemplates:
  - metadata:
      name: pv-storage-elastic-client
    spec:
      accessModes: [ "ReadWriteOnce" ]
      storageClassName: "alicloud-disk-ssd"
      resources:
        requests:
          storage: 20Gi

# kubectl -n ns-elk apply -f 03-es-client.yaml

6. 暴露 elasticsearch service

# cat 04-es-service.yaml
apiVersion: v1
kind: Service
metadata:
  namespace: ns-elk
  name: elasticsearch-master
  labels:
    app: elasticsearch
    role: master
spec:
  clusterIP: None
  selector:
    app: elasticsearch
    role: master
  ports:
    - port: 9200
      name: http
    - port: 9300
      name: node-to-node
---
apiVersion: v1
kind: Service
metadata:
  namespace: ns-elk
  name: elasticsearch-data
  labels:
    app: elasticsearch
    role: data
spec:
  clusterIP: None
  selector:
    app: elasticsearch
    role: data
  ports:
    - port: 9200
      name: http
    - port: 9300
      name: node-to-node
---
apiVersion: v1
kind: Service
metadata:
  annotations:
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-address-type: "intranet"
    # alicloud-loadbalancer-id 更改为对应阿里云 SLB ID 
    service.beta.kubernetes.io/alicloud-loadbalancer-id: "lb-wz96ujq51shspyxb5hpd7"
    service.beta.kubernetes.io/alicloud-loadbalancer-force-override-listeners: "true"
  namespace: ns-elk
  name: elasticsearch-service
  labels:
    app: elasticsearch
    role: client
spec:
  externalTrafficPolicy: Local
  sessionAffinity: None
  type: LoadBalancer
  ports:
    - port: 9200
      targetPort: 9200
  selector:
    app: elasticsearch
    role: client


# kubectl -n ns-elk apply -f 04-es-service.yaml

7. 设置 elasticsearch 的密码

// 记得保存这些初始密码
# kubectl -n ns-elk exec -it $(kubectl -n ns-elk get pods | grep elasticsearch-client | sed -n 1p | awk '{print $1}') -- bin/elasticsearch-setup-passwords auto -b
Changed password for user apm_system
PASSWORD apm_system = MxlMXbMah7x54c4YQjPj

Changed password for user kibana_system
PASSWORD kibana_system = dgPCuR2ayG9FPCYLHlav

Changed password for user kibana
PASSWORD kibana = dgPCuR2ayG9FPCYLHlav

Changed password for user logstash_system
PASSWORD logstash_system = KgynQ5D3pD3OXDmV5IMA

Changed password for user beats_system
PASSWORD beats_system = ZMTRWXeVkEsrKU3BPl27

Changed password for user remote_monitoring_user
PASSWORD remote_monitoring_user = Vp5WI34HRO8XfRgAzBrC

Changed password for user elastic
PASSWORD elastic = 03sWFWzGOjNOCioqcbV3

这时就可以通过账号 elastic 和密码 03sWFWzGOjNOCioqcbV3登录 elasticsearch 了

8. 配置 kibana.yaml 连接 elasticsearch 的 secret

// 使用上面生成的用户 elastic 的密码 03sWFWzGOjNOCioqcbV3(登录 kibana 要用这个用户和密码)
# kubectl -n ns-elk create secret generic elasticsearch-password --from-literal password=03sWFWzGOjNOCioqcbV3 

9. 部署 kibana

# cat 05-kibana.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  namespace: ns-elk
  name: kibana-config
  labels:
    app: kibana
data:
  kibana.yml: |-
    server.host: 0.0.0.0
    elasticsearch:
      hosts: ${ELASTICSEARCH_HOSTS}
      username: ${ELASTICSEARCH_USER}
      password: ${ELASTICSEARCH_PASSWORD}
---
kind: Deployment
apiVersion: apps/v1
metadata:
  labels:
    app: kibana
  name: kibana
  namespace: ns-elk
spec:
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app: kibana
  template:
    metadata:
      labels:
        app: kibana
    spec:
      containers:
        - name: kibana
          image: docker.elastic.co/kibana/kibana:7.14.0
          ports:
            - containerPort: 5601
              protocol: TCP
          env:
            # elasticsearch.hosts ???
            - name: ELASTICSEARCH_HOSTS
              value: "http://elasticsearch-service:9200"
            - name: ELASTICSEARCH_USER
              value: "elastic"
            - name: ELASTICSEARCH_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: elasticsearch-password
                  key: password
            - name: xpack.encryptedSavedObjects.encryptionKey
              value: "min-32-byte-long-strong-encryption-key"

          volumeMounts:
          - name: kibana-config
            mountPath: /usr/share/kibana/config/kibana.yml
            readOnly: true
            subPath: kibana.yml
          - mountPath: /etc/localtime
            name: localtime
      volumes:
      - name: kibana-config
        configMap:
          name: kibana-config
      - hostPath:
          path: /etc/localtime
        name: localtime

      # imagePullSecrets:
      # - name: 
      tolerations:
        - key: node-role.kubernetes.io/master
          effect: NoSchedule
---
kind: Service
apiVersion: v1
metadata:
  annotations:
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-address-type: "intranet"
    # alicloud-loadbalancer-id 更改为对应阿里云 SLB ID 
    service.beta.kubernetes.io/alicloud-loadbalancer-id: "lb-wz96ujq51shspyxb5hpd7"
    service.beta.kubernetes.io/alicloud-loadbalancer-force-override-listeners: "true"
  labels:
    app: kibana
  name: kibana-service
  namespace: ns-elk
spec:
  externalTrafficPolicy: Local
  sessionAffinity: None
  type: LoadBalancer
  ports:
    - port: 5601
      targetPort: 5601
  selector:
    app: kibana


# kubectl -n ns-elk apply -f 05-kibana.yaml

这时就可以通过账号 elastic 和密码 03sWFWzGOjNOCioqcbV3 登录 kiban了

10. 部署 zookeeper

// 参考文档:https://github.com/bitnami/charts/tree/master/bitnami/zookeeper
# helm repo add bitnami https://charts.bitnami.com/bitnami
# helm search repo zookeeper -l
# helm pull bitnami/zookeeper --version 5.0.3
# tar xvf zookeeper-5.0.3.tgz

# cat 06-zk.yaml
## 根据需求修改副本数 replicaCount 的值,这里修改为 3
## 根据需求修改 storageClass 的值,这里使用阿里云的 alicloud-disk-ssd
## 根据需求修改 ssd 磁盘大小的 size (alicloud-disk-ssd 限制最小为 20Gi)


## Global Docker image parameters
## Please, note that this will override the image parameters, including dependencies, configured to use the global value
## Current available global Docker image parameters: imageRegistry and imagePullSecrets
##
# global:
#   imageRegistry: myRegistryName
#   imagePullSecrets:
#     - myRegistryKeySecretName
#   storageClass: myStorageClass

## Bitnami Zookeeper image version
## ref: https://hub.docker.com/r/bitnami/zookeeper/tags/
##
image:
  registry: docker.io
  repository: bitnami/zookeeper
  tag: 3.5.5-debian-9-r123
  ## Specify a imagePullPolicy
  ## Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent'
  ## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images
  ##
  pullPolicy: IfNotPresent
  ## Optionally specify an array of imagePullSecrets.
  ## Secrets must be manually created in the namespace.
  ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
  ##
  # pullSecrets:
  #   - myRegistryKeySecretName
  ## Set to true if you would like to see extra information on logs
  ## It turns BASH and NAMI debugging in minideb
  ## ref:  https://github.com/bitnami/minideb-extras/#turn-on-bash-debugging
  ##
  debug: false

## String to partially override zookeeper.fullname template (will maintain the release name)
# nameOverride:

## String to fully override zookeeper.fullname template
# fullnameOverride:

## Init containers parameters:
## volumePermissions: Change the owner and group of the persistent volume mountpoint to runAsUser:fsGroup values from the securityContext section.
##
volumePermissions:
  enabled: false
  image:
    registry: docker.io
    repository: bitnami/minideb
    tag: stretch
    pullPolicy: Always
    ## Optionally specify an array of imagePullSecrets.
    ## Secrets must be manually created in the namespace.
    ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
    ##
    # pullSecrets:
    #   - myRegistryKeySecretName
  resources: {}

## StatefulSet controller supports automated updates. There are two valid update strategies: RollingUpdate and OnDelete
## ref: https://kubernetes.io/docs/tutorials/stateful-application/basic-stateful-set/#updating-statefulsets
##
updateStrategy: RollingUpdate

## Limits the number of pods of the replicated application that are down simultaneously from voluntary disruptions
## The PDB will only be created if replicaCount is greater than 1
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions
##
podDisruptionBudget:
  maxUnavailable: 1

## Partition update strategy
## https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#partitions
##
# rollingUpdatePartition:

# Cluster domain
clusterDomain: cluster.local

## StatefulSet controller supports relax its ordering guarantees while preserving its uniqueness and identity guarantees. There are two valid pod management policies: OrderedReady and Parallel
## ref: https://kubernetes.io/docs/tutorials/stateful-application/basic-stateful-set/#pod-management-policy
##
podManagementPolicy: Parallel

## Number of ZooKeeper nodes
##
replicaCount: 3

## Basic time unit in milliseconds used by ZooKeeper for heartbeats
##
tickTime: 2000

## ZooKeeper uses to limit the length of time the ZooKeeper servers in quorum have to connect to a leader
##
initLimit: 10

## How far out of date a server can be from a leader
##
syncLimit: 5

## Limits the number of concurrent connections that a single client may make to a single member of the ZooKeeper ensemble
##
maxClientCnxns: 60

## A list of comma separated Four Letter Words commands to use
##
fourlwCommandsWhitelist: srvr, mntr

## Allow to accept connections from unauthenticated users
##
allowAnonymousLogin: true

auth:
  ## Use existing secret (ignores previous password)
  ##
  # existingSecret:
  ## Enable Zookeeper auth. It uses SASL/Digest-MD5
  ##
  enabled: false
  ## User that will use Zookeeper clients to auth
  ##
  # clientUser:
  ## Password that will use Zookeeper clients to auth
  ##
  # clientPassword:
  ## Comma, semicolon or whitespace separated list of user to be created. Specify them as a string, for example: "user1,user2,admin"
  ##
  # serverUsers: ""
  ## Comma, semicolon or whitespace separated list of passwords to assign to users when created. Specify them as a string, for example: "pass4user1, pass4user2, pass4admin"
  ##
  # serverPasswords: ""

## Size in MB for the Java Heap options (Xmx and XMs). This env var is ignored if Xmx an Xms are configured via JVMFLAGS
##
heapSize: 1024

## Log level for the Zookeeper server. ERROR by default. Have in mind if you set it to INFO or WARN the ReadinessProve will produce a lot of logs.
##
logLevel: ERROR

## Default JVMFLAGS for the ZooKeeper process
##
# jvmFlags:

## Configure ZooKeeper with a custom zoo.cfg file
##
# config:

## Kubernetes configuration
## For minikube, set this to NodePort, elsewhere use LoadBalancer
##
service:
  type: ClusterIP
  port: 2181
  followerPort: 2888
  electionPort: 3888
  publishNotReadyAddresses: true

## Zookeeper Pod Security Context
securityContext:
  enabled: true
  fsGroup: 1001
  runAsUser: 1001

## Zookeeper data Persistent Volume Storage Class
## If defined, storageClassName: 
## If set to "-", storageClassName: "", which disables dynamic provisioning
## If undefined (the default) or set to null, no storageClassName spec is
##   set, choosing the default provisioner.  (gp2 on AWS, standard on
##   GKE, AWS & OpenStack)
##
persistence:
  enabled: true
  storageClass: "alicloud-disk-ssd"
  accessModes:
    - ReadWriteOnce
  size: 20Gi
  annotations: {}

## Node labels for pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}

## Tolerations for pod assignment
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: []

## Affinity for pod assignment
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
##
affinity: {}

## Configure resource requests and limits
## ref: http://kubernetes.io/docs/user-guide/compute-resources/
##
resources:
  requests:
    memory: 256Mi
    cpu: 250m

## Configure extra options for liveness and readiness probes
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes)
##
livenessProbe:
  enabled: true
  initialDelaySeconds: 30
  periodSeconds: 10
  timeoutSeconds: 5
  failureThreshold: 6
  successThreshold: 1

readinessProbe:
  enabled: true
  initialDelaySeconds: 5
  periodSeconds: 10
  timeoutSeconds: 5
  failureThreshold: 6
  successThreshold: 1

## Prometheus Exporter / Metrics
##
metrics:
  enabled: false

  image:
    registry: docker.io
    repository: bitnami/zookeeper-exporter
    tag: 0.1.1-debian-9-r58
    pullPolicy: IfNotPresent
    ## Optionally specify an array of imagePullSecrets.
    ## Secrets must be manually created in the namespace.
    ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
    ##
    # pullSecrets:
    #   - myRegistryKeySecretName

  ## Metrics exporter pod Annotation and Labels
  podAnnotations:
    prometheus.io/scrape: "true"
    prometheus.io/port: "9141"
  podLabels: {}

  timeoutSeconds: 3

  ## Metrics exporter resource requests and limits
  ## ref: http://kubernetes.io/docs/user-guide/compute-resources/
  ##
  resources: {}

  ## Metrics exporter labels and tolerations for pod assignment
  # nodeSelector: {"beta.kubernetes.io/arch": "amd64"}
  # tolerations: []


# helm -n ns-elk install zookeeper -f 06-zk.yaml ./zookeeper

11. 部署 kafka

// 参考文档:https://github.com/bitnami/charts/tree/master/bitnami/kafka
// filebeat 对 kafka 的版本有要求,参考:https://www.elastic.co/guide/en/beats/filebeat/current/kafka-output.html#kafka-compatibility
# helm search repo kafka  -l
# helm pull bitnami/kafka --version 3.0.9
# tar xvf kafka-3.0.9.tgz
// !!! 把 statefulset api 版本为从 apiVersion: apps/v1beta2 修改为 apiVersion: apps/v1
# vim kafka/templates/statefulset.yaml
# vim kafka/charts/zookeeper/templates/statefulset.yaml

# cat 07-kafka.yaml
## 根据需求修改副本数 replicaCount 的值,这里修改为 3
## 根据需求修改 storageClass 的值,这里使用阿里云的 alicloud-disk-ssd
## 根据需求修改 size 的 ssd 磁盘大小(alicloud-disk-ssd 限制最小为 20Gi)
## 根据需求修改 externalZookeeper.servers 为 zookeeper 的地址


## Global Docker image parameters
## Please, note that this will override the image parameters, including dependencies, configured to use the global value
## Current available global Docker image parameters: imageRegistry and imagePullSecrets
##
# global:
#   imageRegistry: myRegistryName
#   imagePullSecrets:
#     - myRegistryKeySecretName

## Bitnami Kafka image version
## ref: https://hub.docker.com/r/bitnami/kafka/tags/
##
image:
  registry: docker.io
  repository: bitnami/kafka
  tag: 2.2.1-debian-9-r12
  ## Specify a imagePullPolicy
  ## Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent'
  ## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images
  ##
  pullPolicy: IfNotPresent
  ## Optionally specify an array of imagePullSecrets.
  ## Secrets must be manually created in the namespace.
  ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
  ##
  # pullSecrets:
  #   - myRegistryKeySecretName

  ## Set to true if you would like to see extra information on logs
  ## It turns BASH and NAMI debugging in minideb
  ## ref:  https://github.com/bitnami/minideb-extras/#turn-on-bash-debugging
  debug: false

## String to partially override kafka.fullname template (will maintain the release name)
# nameOverride:

 ## String to fully override kafka.fullname template
# fullnameOverride:

## StatefulSet controller supports automated updates. There are two valid update strategies: RollingUpdate and OnDelete
## ref: https://kubernetes.io/docs/tutorials/stateful-application/basic-stateful-set/#updating-statefulsets
##
updateStrategy: RollingUpdate

## Partition update strategy
## https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#partitions
##
# rollingUpdatePartition:

## Limits the number of pods of the replicated application that are down simultaneously from voluntary disruptions
## The PDB will only be created if replicaCount is greater than 1
## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions
##
podDisruptionBudget:
  maxUnavailable: 1

replicaCount: 3

config: |-
#  broker.id=-1
#  listeners=PLAINTEXT://:9092
#  advertised.listeners=PLAINTEXT://KAFKA_IP:9092
#  num.network.threads=3
#  num.io.threads=8
#  socket.send.buffer.bytes=102400
#  socket.receive.buffer.bytes=102400
#  socket.request.max.bytes=104857600
#  log.dirs=/opt/bitnami/kafka/data
#  num.partitions=1
#  num.recovery.threads.per.data.dir=1
#  offsets.topic.replication.factor=1
#  transaction.state.log.replication.factor=1
#  transaction.state.log.min.isr=1
#  log.flush.interval.messages=10000
#  log.flush.interval.ms=1000
#  log.retention.hours=168
#  log.retention.bytes=1073741824
#  log.segment.bytes=1073741824
#  log.retention.check.interval.ms=300000
#  zookeeper.connect=ZOOKEEPER_SERVICE_NAME
#  zookeeper.connection.timeout.ms=6000
#  group.initial.rebalance.delay.ms=0

## Kafka docker image available customizations
## https://github.com/bitnami/bitnami-docker-kafka#configuration
##
## Allow to use the PLAINTEXT listener.
allowPlaintextListener: true

## The address the socket server listens on.
# listeners:

## Hostname and port the broker will advertise to producers and consumers.
# advertisedListeners:

## The protocol->listener mapping
# listenerSecurityProtocolMap:

## The listener that the brokers should communicate on
# interBrokerListenerName:

## ID of the Kafka node.
brokerId: -1

## Switch to enable topic deletion or not.
deleteTopicEnable: false

## Kafka's Java Heap size.
heapOpts: -Xmx1024m -Xms1024m

## The number of messages to accept before forcing a flush of data to disk.
logFlushIntervalMessages: 10000

## The maximum amount of time a message can sit in a log before we force a flush.
logFlushIntervalMs: 1000

## A size-based retention policy for logs.
logRetentionBytes: _1073741824

## The interval at which log segments are checked to see if they can be deleted.
logRetentionCheckIntervalMs: 300000

## The minimum age of a log file to be eligible for deletion due to age.
logRetentionHours: 168

## The maximum size of a log segment file. When this size is reached a new log segment will be created.
logSegmentBytes: _1073741824

## Log message format version
logMessageFormatVersion: ""

## A comma separated list of directories under which to store log files.
logsDirs: /opt/bitnami/kafka/data

## The largest record batch size allowed by Kafka
maxMessageBytes: _1000012

## Default replication factors for automatically created topics
defaultReplicationFactor: 1

## The replication factor for the offsets topic
offsetsTopicReplicationFactor: 1

## The replication factor for the transaction topic
transactionStateLogReplicationFactor: 1

## Overridden min.insync.replicas config for the transaction topic
transactionStateLogMinIsr: 1

## The number of threads doing disk I/O.
numIoThreads: 8

## The number of threads handling network requests.
numNetworkThreads: 3

## The default number of log partitions per topic.
numPartitions: 1

## The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
numRecoveryThreadsPerDataDir: 1

## The receive buffer (SO_RCVBUF) used by the socket server.
socketReceiveBufferBytes: 102400

## The maximum size of a request that the socket server will accept (protection against OOM).
socketRequestMaxBytes: _104857600

## The send buffer (SO_SNDBUF) used by the socket server.
socketSendBufferBytes: 102400

## Timeout in ms for connecting to zookeeper.
zookeeperConnectionTimeoutMs: 6000

## The endpoint identification algorithm to validate server hostname using server certificate.
sslEndpointIdentificationAlgorithm: https

## All the parameters from the configuration file can be overwritten by using environment variables with this format: KAFKA_CFG_{KEY}
##
# extraEnvVars:
#   - name: KAFKA_CFG_BACKGROUND_THREADS
#     value: "10"

## Authentication parameteres
## https://github.com/bitnami/bitnami-docker-kafka#security
##
auth:
  ## Switch to enable the kafka authentication.
  enabled: false

  ## Name of the existing secret containing credentials for brokerUser, interBrokerUser and zookeeperUser.
  #existingSecret:

  ## Name of the existing secret containing the certificate files that will be used by Kafka.
  #certificatesSecret:

  ## Password for the above certificates if they are password protected.
  #certificatesPassword:

  ## Kafka client user.
  brokerUser: user

  ## Kafka client password.
  # brokerPassword:

  ## Kafka inter broker communication user.
  interBrokerUser: admin

  ## Kafka inter broker communication password.
  # interBrokerPassword:

  ## Kafka Zookeeper user.
  #zookeeperUser:

  ## Kafka Zookeeper password.
  #zookeeperPassword:

## Kubernetes Security Context
## https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
##
securityContext:
  enabled: true
  fsGroup: 1001
  runAsUser: 1001

## Kubernetes configuration
## For minikube, set this to NodePort, elsewhere use LoadBalancer
##
service:
  type: ClusterIP
  port: 9092

  ## Specify the NodePort value for the LoadBalancer and NodePort service types.
  ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport
  ##
  # nodePort:

  ## Use loadBalancerIP to request a specific static IP,
  # loadBalancerIP:

  ## Service annotations done as key:value pairs
  annotations:

## Kafka data Persistent Volume Storage Class
## If defined, storageClassName: 
## If set to "-", storageClassName: "", which disables dynamic provisioning
## If undefined (the default) or set to null, no storageClassName spec is
##   set, choosing the default provisioner.  (gp2 on AWS, standard on
##   GKE, AWS & OpenStack)
##
persistence:
  enabled: true
  ## A manually managed Persistent Volume and Claim
  ## If defined, PVC must be created manually before volume will be bound
  ## The value is evaluated as a template, so, for example, the name can depend on .Release or .Chart
  ##
  # existingClaim:

  storageClass: "alicloud-disk-ssd"
  accessModes:
    - ReadWriteOnce
  size: 20Gi
  annotations: {}

## Node labels for pod assignment
## Ref: https://kubernetes.io/docs/user-guide/node-selection/
##
nodeSelector: {}

## Tolerations for pod assignment
## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
##
tolerations: []

## Affinity for pod assignment
## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity
##
affinity: {}

## Configure resource requests and limits
## ref: http://kubernetes.io/docs/user-guide/compute-resources/
##
resources:
#  limits:
#    cpu: 200m
#    memory: 1Gi
#  requests:
#    memory: 256Mi
#    cpu: 250m

## Configure extra options for liveness and readiness probes
## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes)
livenessProbe:
  enabled: true
  initialDelaySeconds: 10
  periodSeconds: 10
  timeoutSeconds: 5
  failureThreshold: 2
  successThreshold: 1

readinessProbe:
  enabled: true
  initialDelaySeconds: 5
  periodSeconds: 10
  timeoutSeconds: 5
  failureThreshold: 6
  successThreshold: 1

## Prometheus Exporters / Metrics
##
metrics:
  ## Prometheus Kafka Exporter: exposes complimentary metrics to JMX Exporter
  kafka:
    enabled: false

    image:
      registry: docker.io
      repository: danielqsj/kafka-exporter
      tag: v1.0.1
      pullPolicy: IfNotPresent
      ## Optionally specify an array of imagePullSecrets.
      ## Secrets must be manually created in the namespace.
      ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
      ##
      # pullSecrets:
      #   - myRegistryKeySecretName

    ## Interval at which Prometheus scrapes metrics, note: only used by Prometheus Operator
    interval: 10s

    ## Port kafka-exporter exposes for Prometheus to scrape metrics
    port: 9308

    ## Resource limits
    resources: {}
#      limits:
#        cpu: 200m
#        memory: 1Gi
#      requests:
#        cpu: 100m
#        memory: 100Mi

  ## Prometheus JMX Exporter: exposes the majority of Kafkas metrics
  jmx:
    enabled: false

    image:
      registry: docker.io
      repository: solsson/kafka-prometheus-jmx-exporter@sha256
      tag: a23062396cd5af1acdf76512632c20ea6be76885dfc20cd9ff40fb23846557e8
      pullPolicy: IfNotPresent
      ## Optionally specify an array of imagePullSecrets.
      ## Secrets must be manually created in the namespace.
      ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
      ##
      # pullSecrets:
      #   - myRegistryKeySecretName

    ## Interval at which Prometheus scrapes metrics, note: only used by Prometheus Operator
    interval: 10s

    ## Port jmx-exporter exposes Prometheus format metrics to scrape
    exporterPort: 5556

    resources: {}
      # limits:
      #   cpu: 200m
      #   memory: 1Gi
      # requests:
      #   cpu: 100m
      #   memory: 100Mi

    ## Credits to the incubator/kafka chart for the JMX configuration.
    ## https://github.com/helm/charts/tree/master/incubator/kafka
    ##
    ## Rules to apply to the Prometheus JMX Exporter.  Note while lots of stats have been cleaned and exposed,
    ## there are still more stats to clean up and expose, others will never get exposed.  They keep lots of duplicates
    ## that can be derived easily.  The configMap in this chart cleans up the metrics it exposes to be in a Prometheus
    ## format, eg topic, broker are labels and not part of metric name. Improvements are gladly accepted and encouraged.
    configMap:
      ## Allows disabling the default configmap, note a configMap is needed
      enabled: true
      ## Allows setting values to generate confimap
      ## To allow all metrics through (warning its crazy excessive) comment out below `overrideConfig` and set
      ## `whitelistObjectNames: []`
      overrideConfig: {}
        # jmxUrl: service:jmx:rmi:///jndi/rmi://127.0.0.1:5555/jmxrmi
        # lowercaseOutputName: true
        # lowercaseOutputLabelNames: true
        # ssl: false
        # rules:
        # - pattern: ".*"
      ## If you would like to supply your own ConfigMap for JMX metrics, supply the name of that
      ## ConfigMap as an `overrideName` here.
      overrideName: ""
    ## Port the jmx metrics are exposed in native jmx format, not in Prometheus format
    jmxPort: 5555
    ## JMX Whitelist Objects, can be set to control which JMX metrics are exposed.  Only whitelisted
    ## values will be exposed via JMX Exporter.  They must also be exposed via Rules.  To expose all metrics
    ## (warning its crazy excessive and they aren't formatted in a prometheus style) (1) `whitelistObjectNames: []`
    ## (2) commented out above `overrideConfig`.
    whitelistObjectNames:  # []
    - kafka.controller:*
    - kafka.server:*
    - java.lang:*
    - kafka.network:*
    - kafka.log:*

##
## Zookeeper chart configuration
##
## https://github.com/bitnami/charts/blob/master/bitnami/zookeeper/values.yaml
##
zookeeper:
  enabled: false

externalZookeeper:
  ## This value is only used when zookeeper.enabled is set to false

  ## Server or list of external zookeeper servers to use.
  servers: zookeeper.ns-elk.svc.cluster.local:2181


# helm -n ns-elk install kafka -f 07-kafka.yaml ./kafka

12. 部署 filebeat

# cat 08-filebeat.yaml
---
apiVersion: v1
kind: ConfigMap
metadata:
  # 参考:https://www.elastic.co/guide/en/beats/filebeat/current/running-on-kubernetes.html
  name: filebeat-config
  namespace: ns-elk
  labels:
    k8s-app: filebeat
data:
  filebeat.yml: |-
    filebeat.inputs:
    - type: container
      paths:
        - '/var/lib/docker/containers/*/*.log'
      processors:
        - add_kubernetes_metadata:
            host: ${NODE_NAME}
            matchers:
            - logs_path:
                logs_path: "/var/lib/docker/containers/"

    # To enable hints based autodiscover, remove `filebeat.inputs` configuration and uncomment this:
    #filebeat.autodiscover:
    #  providers:
    #    - type: kubernetes
    #      node: ${NODE_NAME}
    #      hints.enabled: true
    #      hints.default_config:
    #        type: container
    #        paths:
    #          - /var/log/containers/*${data.kubernetes.container.id}.log

    processors:
      - add_cloud_metadata:
      - add_host_metadata:

    # 参考:https://www.elastic.co/guide/en/beats/filebeat/current/kafka-output.html
    output:
      kafka:
        enabled: true
        hosts: ["kafka.ns-elk.svc.cluster.local:9092"]
        topic: filebeat
        max_message_bytes: 5242880
        partition.round_robin:
          reachable_only: true
        keep-alive: 120
        required_acks: 1
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: filebeat
  namespace: ns-elk
  labels:
    k8s-app: filebeat
spec:
  selector:
    matchLabels:
      k8s-app: filebeat
  template:
    metadata:
      labels:
        k8s-app: filebeat
    spec:
      serviceAccountName: filebeat
      terminationGracePeriodSeconds: 30
      hostNetwork: true
      dnsPolicy: ClusterFirstWithHostNet
      containers:
      - name: filebeat
        image: docker.elastic.co/beats/filebeat:7.14.0
        args: [
          "-c", "/etc/filebeat.yml",
          "-e",
        ]
        env:
        - name: NODE_NAME
          valueFrom:
            fieldRef:
              fieldPath: spec.nodeName
        securityContext:
          runAsUser: 0
          # If using Red Hat OpenShift uncomment this:
          #privileged: true
        resources:
          limits:
            memory: 200Mi
          requests:
            cpu: 100m
            memory: 100Mi
        volumeMounts:
        - name: config
          mountPath: /etc/filebeat.yml
          readOnly: true
          subPath: filebeat.yml
        - name: data
          mountPath: /usr/share/filebeat/data
        - name: varlibdockercontainers
          mountPath: /var/lib/docker/containers
          readOnly: true
      volumes:
      - name: config
        configMap:
          defaultMode: 0640
          name: filebeat-config
      - name: varlibdockercontainers
        hostPath:
          path: /var/lib/docker/containers
      # data folder stores a registry of read status for all files, so we don't send everything again on a Filebeat pod restart
      - name: data
        hostPath:
          # When filebeat runs as non-root user, this directory needs to be writable by group (g+w).
          path: /var/lib/filebeat-data
          type: DirectoryOrCreate
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: filebeat
subjects:
- kind: ServiceAccount
  name: filebeat
  namespace: ns-elk
roleRef:
  kind: ClusterRole
  name: filebeat
  apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: filebeat
  namespace: ns-elk
subjects:
  - kind: ServiceAccount
    name: filebeat
    namespace: ns-elk
roleRef:
  kind: Role
  name: filebeat
  apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: filebeat-kubeadm-config
  namespace: ns-elk
subjects:
  - kind: ServiceAccount
    name: filebeat
    namespace: ns-elk
roleRef:
  kind: Role
  name: filebeat-kubeadm-config
  apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: filebeat
  labels:
    k8s-app: filebeat
rules:
- apiGroups: [""] # "" indicates the core API group
  resources:
  - namespaces
  - pods
  - nodes
  verbs:
  - get
  - watch
  - list
- apiGroups: ["apps"]
  resources:
    - replicasets
  verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  name: filebeat
  # should be the namespace where filebeat is running
  namespace: ns-elk
  labels:
    k8s-app: filebeat
rules:
  - apiGroups:
      - coordination.k8s.io
    resources:
      - leases
    verbs: ["get", "create", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  name: filebeat-kubeadm-config
  namespace: ns-elk
  labels:
    k8s-app: filebeat
rules:
  - apiGroups: [""]
    resources:
      - configmaps
    resourceNames:
      - kubeadm-config
    verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: filebeat
  namespace: ns-elk
  labels:
    k8s-app: filebeat

# kubectl -n ns-elk apply -f 08-filebeat.yaml

13. 部署 logstach

# cat 09-logstach.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: logstash-configmap
  namespace: ns-elk
data:
  logstash.yml: |
    http.host: "0.0.0.0"
    path.config: /usr/share/logstash/pipeline
  logstash.conf: |
    # 参考:https://www.elastic.co/guide/en/logstash/current/plugins-inputs-kafka.html
    input {
      kafka {
        bootstrap_servers => "kafka.ns-elk.svc.cluster.local:9092"
        topics => ["filebeat"]
        # 保留容器日志的 json 格式
        codec => "json"
      }
    }
    filter {
      date {
        match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ]
      }
    }
      # 参考:https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html
      output {
        elasticsearch {
          hosts => ["elasticsearch-service:9200"]
          user => "elastic"
          password => "03sWFWzGOjNOCioqcbV3"
          # 参考:https://discuss.elastic.co/t/separate-indexes-for-each-kubernetes-namespace/169131/3
          # 根据 kubernetes.pod.name 字段单独创建 pod 的索引
          index => "%{[kubernetes][pod][name]}-%{+YYYY.MM.dd}"
      }
    }

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: logstash-deployment
  namespace: ns-elk
spec:
  selector:
    matchLabels:
      app: logstash
  replicas: 3
  template:
    metadata:
      labels:
        app: logstash
    spec:
      containers:
      - name: logstash
        image: docker.elastic.co/logstash/logstash:7.14.0
        ports:
        - containerPort: 5044
        volumeMounts:
          - name: config-volume
            mountPath: /usr/share/logstash/config
          - name: logstash-pipeline-volume
            mountPath: /usr/share/logstash/pipeline
          - mountPath: /etc/localtime
            name: localtime
      volumes:
      - name: config-volume
        configMap:
          name: logstash-configmap
          items:
            - key: logstash.yml
              path: logstash.yml
      - name: logstash-pipeline-volume
        configMap:
          name: logstash-configmap
          items:
            - key: logstash.conf
              path: logstash.conf
      - hostPath:
          path: /etc/localtime
        name: localtime
---
kind: Service
apiVersion: v1
metadata:
  name: logstash-service
  namespace: ns-elk
spec:
  selector:
    app: logstash
  ports:
  - protocol: TCP
    port: 5044
    targetPort: 5044

# kubectl -n ns-elk apply -f 09-logstach.yaml

这时就可以从 kibana 上检索到日志了


收集到的日志详情


14. 其他

  • 如果拉取不了 docker.elastic.co 的镜像,可以使用 https://hub.docker.com/u/awker 下的相关镜像
  • 完整的项目文件在 https://github.com/py-k8s/kube-elk

你可能感兴趣的:(在 K8S 上部署 ELK 7.14 集群实现采集容器日志)