1、部署准备
说明:所有的容器组都运行在monitoring 命名空间
本文参考https://github.com/coreos/kube-prometheus
由于官方维护的版本在现有的部署环境出现问题所以下面做了一些修改及变更不影响整体效果
2、准备相关yaml
mkdir kube-prometheus
cd kube-prometheus
git clone https://github.com/coreos/kube-prometheus
3、custom-metrics-api 部署
cd kube-prometheus/experimental/custom-metrics-api/
kubectl apply -n monitoring -f custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml
kubectl apply -n monitoring -f custom-metrics-apiservice.yaml
kubectl apply -n monitoring -f custom-metrics-cluster-role.yaml
kubectl apply -n monitoring -f custom-metrics-configmap.yaml
kubectl apply -n monitoring -f hpa-custom-metrics-cluster-role-binding.yaml
4、prometheus-adapter 准备
4.1、整理prometheus-adapter yaml
cd kube-prometheus/manifests/
mkdir prometheus-adapter
mv prometheus-adapter*.yaml prometheus-adapter
cd prometheus-adapter
说明:custom-metrics-api 里面已经有configmap 不能覆盖
rm -rf prometheus-adapter-configMap.yaml
4.2、创建prometheus-adapter ca 证书
cd kube-prometheus/manifests/prometheus-adapter
cat << EOF | tee /apps/work/k8s/cfssl/k8s/apiserver.json
{
"CN": "apiserver",
"hosts": [""],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "GuangDong",
"L": "GuangZhou",
"O": "niuke",
"OU": "niuke"
}
]
}
EOF
### 生成证书
cfssl gencert -ca=/apps/work/k8s/cfssl/pki/k8s/k8s-ca.pem -ca-key=/apps/work/k8s/cfssl/pki/k8s/k8s-ca-key.pem \
-config=/apps/work/k8s/cfssl/ca-config.json \
-profile=kubernetes /apps/work/k8s/cfssl/k8s/apiserver.json | cfssljson -bare ./apiserver
###重命名证书名字
mv apiserver-key.pem apiserver.key
mv apiserver.pem apiserver.crt
### 创建secret
kubectl create secret generic volume-serving-cert --from-file=apiserver.crt --from-file=apiserver.key -n monitoring
kubectl get secret -n monitoring | grep volume-serving-cert
kubectl get secret volume-serving-cert -n monitoring volume-serving-cert -o yaml
4.3、执行prometheus-adapter yaml
kubectl apply -f .
5、验证prometheus-adapter 部署是否正常
kubectl api-versions| grep custom
[root@jenkins prometheus-adapter]# kubectl api-versions| grep custom
custom.metrics.k8s.io/v1beta1
[root@jenkins prometheus-adapter]# kubectl get pods -n monitoring -o wide | grep prometheus-adapter
prometheus-adapter-c6c6b84dc-tvljm 1/1 Running 1 35d 10.65.1.36 node02
https://10.65.1.36:6443/
可以看到api 接口
[root@jenkins prometheus-adapter]# kubectl get service -n monitoring | grep prometheus-adapter
prometheus-adapter ClusterIP 10.64.97.41 443/TCP 35d
https://10.64.97.41/
一样可以打开看到api接口
kubectl get --raw "/apis/custom.metrics.k8s.io" | jq .
[root@jenkins prometheus-adapter]# kubectl get --raw "/apis/custom.metrics.k8s.io" | jq .
{
"kind": "APIGroup",
"apiVersion": "v1",
"name": "custom.metrics.k8s.io",
"versions": [
{
"groupVersion": "custom.metrics.k8s.io/v1beta1",
"version": "v1beta1"
}
],
"preferredVersion": {
"groupVersion": "custom.metrics.k8s.io/v1beta1",
"version": "v1beta1"
}
}
kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq .
[root@jenkins prometheus-adapter]# kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq . | more
{
"kind": "APIResourceList",
"apiVersion": "v1",
"groupVersion": "custom.metrics.k8s.io/v1beta1",
"resources": [
{
"name": "persistentvolumes/kube_persistentvolume_info",
"singularName": "",
"namespaced": false,
"kind": "MetricValueList",
"verbs": [
"get"
]
},
{
"name": "pods/node_filesystem_readonly",
"singularName": "",
"namespaced": true,
"kind": "MetricValueList",
"verbs": [
"get"
]
},
{
"name": "jobs.batch/grafana_api_user_signup_completed",
"singularName": "",
"namespaced": true,
"kind": "MetricValueList",
"verbs": [
"get"
]
},
{
"name": "services/prometheus_sd_dns_lookups",
"singularName": "",
"namespaced": true,
"kind": "MetricValueList",
"verbs": [
"get"
]
},
{
"name": "services/etcd_request_cache_add_latencies_summary_sum",
"singularName": "",
"namespaced": true,
"kind": "MetricValueList",
"verbs": [
"get"
]
如果有这些数据证明prometheus-adapter 部署正常
6、使用官方测试hpa 项目测试自定义接口扩容
cd kube-prometheus/experimental/custom-metrics-api/
kubectl apply -f sample-app.yaml
[root@jenkins custom-metrics-api]# kubectl get pod | grep sample-app
sample-app-855d8f8998-qr7w2 1/1 Running 1 5d15h
[root@jenkins custom-metrics-api]# kubectl get service | grep sample-app
sample-app ClusterIP 10.64.108.86 8080/TCP 5d15h
[root@jenkins custom-metrics-api]# kubectl get hpa | grep sample-app
sample-app Deployment/sample-app 399m/500m 1 10 1 5d15h
http://10.64.108.86:8080/metrics
得到监控值http_requests_total
以后所有的监控值后面_total 在这个接口都是去除的
kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1/namespaces/default/pods/*/http_requests" | jq .
[root@jenkins custom-metrics-api]# kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1/namespaces/default/pods/*/http_requests" | jq .
{
"kind": "MetricValueList",
"apiVersion": "custom.metrics.k8s.io/v1beta1",
"metadata": {
"selfLink": "/apis/custom.metrics.k8s.io/v1beta1/namespaces/default/pods/%2A/http_requests"
},
"items": [
{
"describedObject": {
"kind": "Pod",
"namespace": "default",
"name": "sample-app-855d8f8998-qr7w2",
"apiVersion": "/v1"
},
"metricName": "http_requests",
"timestamp": "2019-06-26T01:32:21Z",
"value": "399m"
}
]
}
测试自动伸缩
安装hey
go get -u github.com/rakyll/hey
hey -n 10000 -q 5 -c 5 http://10.64.108.86:8080
等等几分钟
kubectl get pod | grep sample-app
看看是否增加节点如果增加证明成功
7、创建coredns hpa
删除旧 hpa
kubectl get hpa -n kube-system | grep coredns
[root@jenkins custom-metrics-api]# kubectl get hpa -n kube-system | grep coredns
coredns Deployment/coredns 14%/80% 2 10 2 68d
[root@jenkins custom-metrics-api]# kubectl delete hpa coredns -n kube-system
horizontalpodautoscaler.autoscaling "coredns" deleted
[root@jenkins custom-metrics-api]# kubectl get hpa -n kube-system | grep coredns
No resources found.
[root@jenkins custom-metrics-api]# kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1/namespaces/kube-system/pods/*/coredns_dns_request_count" | jq .
{
"kind": "MetricValueList",
"apiVersion": "custom.metrics.k8s.io/v1beta1",
"metadata": {
"selfLink": "/apis/custom.metrics.k8s.io/v1beta1/namespaces/kube-system/pods/%2A/coredns_dns_request_count"
},
"items": [
{
"describedObject": {
"kind": "Pod",
"namespace": "kube-system",
"name": "coredns-6cbf85dbc6-sjnjt",
"apiVersion": "/v1"
},
"metricName": "coredns_dns_request_count",
"timestamp": "2019-06-26T01:42:41Z",
"value": "4466m"
},
{
"describedObject": {
"kind": "Pod",
"namespace": "kube-system",
"name": "coredns-6cbf85dbc6-wcqmd",
"apiVersion": "/v1"
},
"metricName": "coredns_dns_request_count",
"timestamp": "2019-06-26T01:42:41Z",
"value": "1866m"
}
]
}
创建 coredns hpa yaml
vi hpa-coredns.yaml
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
name: coredns
namespace: kube-system
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: coredns
minReplicas: 2
maxReplicas: 10
metrics:
- type: Pods
pods:
metricName: coredns_dns_request_count
targetAverageValue: 1000
kubectl apply -f hpa-coredns.yaml
等等几分钟
[root@jenkins custom-metrics-api]# kubectl get hpa -n kube-system | grep coredns
coredns Deployment/coredns 4666m/1k 3 10 3 8m14s
[root@jenkins custom-metrics-api]# kubectl describe hpa coredns -n kube-system
Name: coredns
Namespace: kube-system
Labels:
Annotations: kubectl.kubernetes.io/last-applied-configuration:
{"apiVersion":"autoscaling/v2beta1","kind":"HorizontalPodAutoscaler","metadata":{"annotations":{},"name":"coredns","namespace":"kube-syste...
CreationTimestamp: Wed, 26 Jun 2019 09:41:02 +0800
Reference: Deployment/coredns
Metrics: ( current / target )
"coredns_dns_request_count" on pods: 4666m / 1k
Min replicas: 3
Max replicas: 10
Deployment pods: 3 current / 3 desired
Conditions:
Type Status Reason Message
---- ------ ------ -------
AbleToScale True ReadyForNewScale recommended size matches current size
ScalingActive True ValidMetricFound the HPA was able to successfully calculate a replica count from pods metric coredns_dns_request_count
ScalingLimited True TooFewReplicas the desired replica count is increasing faster than the maximum scale rate
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal SuccessfulRescale 70s horizontal-pod-autoscaler New size: 3; reason: Current number of replicas below Spec.MinReplicas
查看pod 是否增加
[root@jenkins custom-metrics-api]# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-6cbf85dbc6-sjnjt 1/1 Running 1 4d3h
coredns-6cbf85dbc6-sqzhh 0/1 ContainerCreating 0 3m21s
coredns-6cbf85dbc6-wcqmd 1/1 Running 1 47d
下一篇: Kubernetes 生产环境安装部署 基于 Kubernetes v1.14.0 之 vpa 部署