企业级实战 Spring Boot + K8S 中的滚动发布、优雅停机、弹性伸缩、应用监控、配置分离

下面为大家介绍我司生产环境使用了3年的基于K8S的dev ops 配置实现 K8s + SpringCloud实现零宕机发版,优雅重启:健康检查+滚动更新+优雅停机+弹性伸缩+Prometheus监控+配置分离(镜像复用)

汇总配置

业务层面

项目依赖 pom.xml

使用 spring-boot-starter-actuator 镜像 存活、就绪检查

使用 prometheus 进行应用监控



    org.springframework.boot
    spring-boot-starter-actuator


    io.micrometer
    micrometer-registry-prometheus

定义访问端口、路径及权限 application.yaml

spring:
  application:
    name: project-sample
  profiles:
    active: @profileActive@
  lifecycle:
    timeout-per-shutdown-phase: 30s     # 停机过程超时时长设置30s,超过30s,直接停机

server:
  port: 8080
  shutdown: graceful                    # 默认为IMMEDIATE,表示立即关机;GRACEFUL表示优雅关机

management:
  server:
    port: 50000                         # 启用独立运维端口
  metrics:
    tags:
      application: ${spring.application.name}
  endpoint:                             # 开启shutdown和health、serviceregistry(用于nacos下线)端点
    shutdown:
      enabled: true
    serviceregistry:
       enabled: true
    health:
      probes:
        enabled: true
  endpoints:
    web:
      exposure:
        base-path: /actuator            # 指定上下文路径,启用相应端点
        include: health,shutdown,metrics,prometheus

运维层面

  • 打包属于我们自己的dockerfile基础镜像
FROM openjdk:8-jdk-alpine

LABEL Author  andanyang
## 时区环境
ENV TZ=Asia/Shanghai
ENV JAVA_OPT
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

WORKDIR /root
## 使用 skywalking 不用可忽略
COPY skywalking-agent /root/agent

## 解决EasyExcel 缺少字体问题
RUN apk --update add curl bash ttf-dejavu && \
  rm -rf /var/cache/apk/*
RUN apk --update --no-cache add tini
## 使用 tini 启动应用 让容器里也可以使用jps等命令
ENTRYPOINT ["tini"]
docker build -t common/jdk8 .
  • 打包各个服务的镜像

    FROM common/jdk8
    ## 不使用 skywalking 可移除
    CMD /usr/bin/java -Djava.security.egd=file:/dev/./urandom -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/root/log/ -javaagent:/root/agent/skywalking-agent.jar ${JAVA_OPT} -jar ${app_name}.jar
                    
    COPY ${app_name}-impl/target/${app_name}-impl.jar ${app_name}.jar
    
    

k8s部署模版deployment.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: {APP_NAME}
  labels:
    app: {APP_NAME}
spec:
  selector:
    matchLabels:
      app: {APP_NAME}
  replicas: {REPLICAS}                            # Pod副本数
  strategy:
    type: RollingUpdate                           # 滚动更新策略
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  template:
    metadata:
      name: {APP_NAME}
      labels:
        app: {APP_NAME}
      annotations:
        timestamp: {TIMESTAMP}
        prometheus.io/port: "50000"               # 不能动态赋值
        prometheus.io/path: /actuator/prometheus
        prometheus.io/scrape: "true"              # 基于pod的服务发现
    spec:
      affinity:                                   # 设置调度策略,采取多主机/多可用区部署
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            podAffinityTerm:
              labelSelector:
                matchExpressions:
                - key: app
                  operator: In
                  values:
                  - {APP_NAME}
              topologyKey: "kubernetes.io/hostname" # 多可用区为"topology.kubernetes.io/zone"
      terminationGracePeriodSeconds: 30             # 优雅终止宽限期
      containers:
      - name: {APP_NAME}
        image: {IMAGE_URL}
        imagePullPolicy: Always
        ports:
        - containerPort: {APP_PORT}
        - name: management-port
          containerPort: 50000         # 应用管理端口
        readinessProbe:                # 就绪探针
          httpGet:
            path: /actuator/health/readiness
            port: management-port
          initialDelaySeconds: 30      # 延迟加载时间
          periodSeconds: 10            # 重试时间间隔
          timeoutSeconds: 1            # 超时时间设置
          successThreshold: 1          # 健康阈值
          failureThreshold: 9          # 不健康阈值
        livenessProbe:                 # 存活探针
          httpGet:
            path: /actuator/health/liveness
            port: management-port
          initialDelaySeconds: 30      # 延迟加载时间
          periodSeconds: 10            # 重试时间间隔
          timeoutSeconds: 1            # 超时时间设置
          successThreshold: 1          # 健康阈值
          failureThreshold: 6          # 不健康阈值
        resources:                     # 容器资源管理
          limits:                      # 资源限制(监控使用情况)
            cpu: 0.5
            memory: 1Gi
          requests:                    # 最小可用资源(灵活调度)
            cpu: 0.1
            memory: 200Mi
        lifecycle:
          preStop:
              exec:
                command:               # 结束回调钩子,在实战中SpringClould 中防止注册中心没有及时下掉服务,最好需要手动下线服务。再安全下线
                  - /bin/sh
                  - "-c"
                  - >-
                    curl -X "POST" "http://localhost:50000/actuator/service-registry?status=DOWN" -H "Content-Type: application/vnd.spring-boot.actuator.v2+json;charset=UTF-8";sleep
                    10s;curl -X "POST" "http://localhost:50000/actuator/shutdown" -H "Content-Type: application/vnd.spring-boot.actuator.v2+json;charset=UTF-8";
        env:
          - name: TZ
            value: Asia/Shanghai
          - name: JAVA_OPT              # 配置JAVA启动参数
          	value: "-Xmx4g -Xms4g"
---
kind: HorizontalPodAutoscaler            # 弹性伸缩控制器
apiVersion: autoscaling/v2beta2
metadata:
  name: {APP_NAME}
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {APP_NAME}
  minReplicas: {REPLICAS}                # 缩放范围
  maxReplicas: 6
  metrics:
    - type: Resource
      resource:
        name: cpu                        # 指定资源指标
        target:
          type: Utilization
          averageUtilization: 50

友情导读

k8s学习系列

Prometheus 实战系列

Prometheus Operator实战—— Prometheus、Alertmanager、Grafana 监控Springboot服务

基于K8S构建企业级Jenkins CI/CD平台实战(一) 之 环境搭建

基于K8S构建企业级Jenkins CI/CD平台实战(二) 之 kubernetes-plugin 插件使用

Jenkins优化Kubernetes部署流水线

你可能感兴趣的:(k8s,spring,boot,kubernetes,java)