k3s&helm
curl -sfL https://rancher-mirror.oss-cn-beijing.aliyuncs.com/k3s/k3s-install.sh | INSTALL_K3S_MIRROR=cn sh -
yum -y install docker-ce-19.03.7-3.el7
systemctl enable docker && systemctl start docker && systemctl status docker
#优化
cat >/etc/docker/daemon.json <<EOF
{
"registry-mirrors": [
"http://hub-mirror.c.163.com",
"https://docker.mirrors.ustc.edu.cn",
"https://registry.docker-cn.com"
]
}
EOF
[root@vm ~]# docker pull redis:5.0.6-alpine
#报错一
Get https://registry-1.docker.io/v2/: x509: certificate has expired or is not yet valid
[root@vm ~]# hwclock --show
Wed 28 Sep 2022 10:39:04 AM CST -0.951797 seconds
[root@vm ~]# hwclock --set --date '05/15/2023 14:46:30'
[root@vm ~]# date
Wed Sep 28 10:39:56 CST 2022
[root@vm ~]# hwclock --hctosys
[root@vm ~]# clock -w
[root@vm ~]# reboot
#报错二
error pulling image configuration: Get https://production.cloudflare.docker.com/registry-v2/docker/...
[root@vm ~]# vim /etc/resolv.conf
nameserver 8.8.8.8
systemctl daemon-reload && systemctl restart docker
systemctl daemon-reload && systemctl restart docker
#docker pull registry.cn-beijing.aliyuncs.com/ilemonrain/pause-amd64:3.1
#docker tag registry.cn-beijing.aliyuncs.com/ilemonrain/pause-amd64:3.1 k8s.gcr.io/pause:3.1
vim /etc/profile
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
source /etc/profile
https://get.helm.sh/helm-v3.10.0-linux-amd64.tar.gz
[root@vm ~]# tar xf helm-3.6.0.tar.gz
[root@vm ~]# mv linux-amd64/helm /usr/local/bin/
[root@vm ~]# helm repo add stable http://mirror.azure.cn/kubernetes/charts
"stable" has been added to your repositories
[root@vm ~]# helm search repo nginx
https://github.com/chaosblade-io/chaosblade-operator/tags
[root@vm ~]# helm install --namespace kube-system --name-template chaosblade-operator chaosblade-operator-1.7.0.tgz
[root@vm ~]# helm list -n kube-system
NAME NAMESPACE REVISION UPDATED
chaosblade-operator kube-system 1
[root@vm ~]# helm uninstall chaosblade-operator -n kube-system 卸载
[root@vm ~]# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
chaosblade-tool-zc6p9 1/1 Running 0 5m24s
chaosblade-operator-f8996b8b4-m8gmw 1/1 Running 0 5m24s
[root@vm ~]# docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
chaosbladeio/chaosblade-tool 1.7.0 11a45c766467 8 months ago 80.4MB
chaosbladeio/chaosblade-operator 1.7.0 c46846d5e185 8 months ago 150MB
#ChaosBlade Operator 启动后会在每个节点部署 chaosblade-tool Pod
[root@vm ~]# kubectl get all -n kube-system | grep chaos|awk '{print $1}'
pod/chaosblade-tool-zc6p9
pod/chaosblade-operator-f8996b8b4-m8gmw
service/chaosblade-webhook-server
daemonset.apps/chaosblade-tool
deployment.apps/chaosblade-operator
replicaset.apps/chaosblade-operator-f8996b8b4
[root@vm ~]# blade create k8s --help
Kubernetes experiment, for example kill pod
Usage:
blade create k8s [flags]
blade create k8s [command]
Available Commands:
container-container Execute a docker experiment
container-cpu Cpu experiment
container-disk Disk experiment
container-druid Experiment with the Druid
container-dubbo Experiment with the Dubbo
container-es ElasticSearch experiment!
container-file File experiment
container-gateway gateway experiment!
container-hbase hbase experiment!
container-http http experiment
container-jedis jedis experiment
container-jvm Experiment with the JVM
container-kafka kafka experiment
container-lettuce redis client lettuce experiment
container-log log experiment
container-mem Mem experiment
container-mongodb MongoDB experiment
container-mysql mysql experiment
container-network Network experiment
container-process Process experiment
container-psql Postgrelsql experiment
container-rabbitmq rabbitmq experiment
container-redisson redisson experiment
container-rocketmq Rocketmq experiment,can make message send or pull delay and exception
container-script Script chaos experiment
container-servlet java servlet experiment
container-tars tars experiment
node-cpu Cpu experiment
node-disk Disk experiment
node-file File experiment
node-mem Mem experiment
node-network Network experiment
node-process Process experiment
node-script Script chaos experiment
pod-cpu Cpu experiment
pod-disk Disk experiment
pod-file File experiment
pod-mem Mem experiment
pod-network Network experiment
pod-pod Pod experiments
pod-process Process experiment
pod-script Script chaos experiment
Flags:
-h, --help help for k8s
--kubeconfig string kubeconfig file
--waiting-time string Waiting time for invoking, default value is 20s
Global Flags:
-a, --async whether to create asynchronously, default is false
-d, --debug Set client to DEBUG mode
-e, --endpoint string the create result reporting address. It takes effect only when the async value is true and the value is not empty
-n, --nohup used to internal async create, no need to config
--uid string Set Uid for the experiment, adapt to docker
Use "blade create k8s [command] --help" for more information about a command.
[root@vm ~]# cat pod.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: my-nginx
spec:
containers:
- name: my-nginx
image: nginx
imagePullPolicy: IfNotPresent
restartPolicy: Always
[root@vm ~]# kubectl apply -f pod.yaml
[root@vm ~]# kubectl get pod -owide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
my-nginx 1/1 Running 0 89s 10.42.0.200 vm <none> <none>
[root@vm ~]# curl 10.42.0.200
<h1>Welcome to nginx!</h1>
blade create k8s node-cpu
[root@vm ~]# blade create k8s node-cpu fullload --help
[root@vm ~]# ln -s /etc/rancher/k3s/k3s.yaml /root/.kube/config
[root@vm ~]# kubectl label node vm cpu-load=test #给node打个标签
#准备终端查看
[root@vm ~]# blade create k8s node-cpu fullload --labels cpu-load=test --cpu-percent 80 --kubeconfig ./.kube/config
{"code":200,"success":true,"result":"90dfadd971e07737"}
[root@vm ~]# blade destroy 90dfadd971e07737
#blade create k8s node-cpu fullload --names vm --cpu-percent 80 --kubeconfig ./.kube/config
#执行失败,可能与k3s有关,待正式集群测试
#也可以使用yaml实现
#参考https://github.com/chaosblade-io/chaosblade-operator/tree/v0.0.1/examples
[root@vm ~]# cat node-cpu.yaml
apiVersion: chaosblade.io/v1alpha1
kind: ChaosBlade
metadata:
name: cpu-load
spec:
experiments:
- scope: node
target: cpu
action: fullload
desc: "increase node cpu load by names"
matchers:
- name: "labels"
value:
- "cpu-load=test"
- name: cpu-percent
value:
- "80"
[root@vm ~]# kubectl apply -f node-cpu.yaml
chaosblade.chaosblade.io/cpu-load created
[root@vm ~]# kubectl delete chaosblade cpu-load #停止实验
[root@vm ~]# kubectl delete -f node-cpu.yaml
blade create k8s node-network
blade create k8s node-network delay/loss/dns
--evict-count string 限制实验生效的数量
--evict-percent string 限制实验生效数量的百分比,不包含 %
--labels string 节点资源标签
--names string 节点资源名,多个资源名之间使用逗号分隔
--kubeconfig string kubeconfig 文件全路径(仅限使用 blade 命令调用时使用)
--waiting-time string 实验结果等待时间,默认为 20s,参数值要包含单位,例如 10s,1m
11
apiVersion: chaosblade.io/v1alpha1
kind: ChaosBlade
metadata:
name: loss-node-network-by-names
spec:
experiments:
- scope: node
target: network
action: loss
desc: "node network loss"
matchers:
- name: "labels"
value:
- "cpu-poad=test"
- name: percent
value: ["60"]
- name: interface
value: ["eth0"]
- name: local-port
value: ["80"]
[root@vm ~]# kubectl apply -f loss-node.yaml
[root@vm ~]# blade create k8s node-network loss --percent 60 --interface eth0 --local-port 80 --labels cpu-poad=test --kubeconfig ./.kube/config
#待做丢包验证
blade create k8s node-process
blade create k8s node-process kill/stop
[root@vm ~]# tar xf apache-tomcat-8.5.88.tar.gz -C /opt/
[root@vm ~]# cd /opt/apache-tomcat-8.5.88/
[root@vm apache-tomcat-8.5.88]# bin/c
catalina.sh ciphers.sh configtest.sh
[root@vm apache-tomcat-8.5.88]# bin/catalina.sh start
[root@vm apache-tomcat-8.5.88]# ps -ef | grep tomcat
root 13592 1 55 19:49 pts/3 00:00:03 /usr/bin/java -Djava.util.loggi....
[root@vm ~]# blade create k8s node-process kill --process tomcat --signal 9 --labels cpu-poad=test --kubeconfig ./.kube/config --kubeconfig ./.kube/config
[root@vm ~]# ps -ef | grep tomcat
root 17819 16746 0 19:52 pts/3 00:00:00 grep --color=auto tomcat
blade create k8s node-disk
blade create k8s node-disk fill/burn
[root@vm ~]# df -h|head
Filesystem Size Used Avail Use% Mounted on
/dev/mapper/centos-root 17G 5.3G 12G 32% /
[root@vm ~]# blade c k8s node-disk fill --labels cpu-poad=test --percent 50 --kubeconfig ~/.kube/config
[root@vm ~]# df -h|head
Filesystem Size Used Avail Use% Mounted on
/dev/mapper/centos-root 17G 8.6G 8.5G 51% /
blade create k8s pod-pod
--namespace string Pod 所属的命名空间,只能填写一个值,必填项
--evict-count string 限制实验生效的数量
--evict-percent string 限制实验生效数量的百分比,不包含 %
--labels string Pod 资源标签,多个标签之前是或的关系
--names string Pod 资源名
--kubeconfig string kubeconfig 文件全路径(仅限使用 blade 命令调用时使用)
--waiting-time string 实验结果等待时间,默认为 20s,参数值要包含单位,例如 10s,1m
blade create k8s pod-pod delete 删除 POD
[root@vm ~]# kubectl run nginx-pod --image=nginx:1.14 --port=80
pod/nginx-pod created
[root@vm ~]# kubectl get pod
NAME READY STATUS RESTARTS AGE
nginx-pod 1/1 Running 0 2s
[root@vm ~]# blade create k8s pod-pod delete --namespace default --names nginx-pod --kubeconfig ~/.kube/config
{"code":200,"success":true,"result":"b60319c88af750fa"}
[root@vm ~]# kubectl get pod
No resources found in default namespace.
blade create k8s pod-IO
实验准备
需先通过deployment部署测试pod,并在pod的annotation里面指定需要注入I/O异常的volume以及子目录。
[root@vm ~]# echo delay > /mnt/delay.txt
[root@vm ~]# cat /mnt/delay.txt
delay
[root@vm ~]# cat io.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: test
name: test
namespace: test
spec:
replicas: 1
selector:
matchLabels:
app: test
template:
metadata:
annotations: #加如下注解,chaosblade webhook会根据pod的annotation,注入fuse的sidecar容器
chaosblade/inject-volume: data
chaosblade/inject-volume-subpath: conf
labels:
app: test
spec:
containers:
- command: ["/bin/sh", "-c", "while true; do sleep 10000; done"]
image: busybox
imagePullPolicy: IfNotPresent
name: test
volumeMounts:
- mountPath: /data
mountPropagation: HostToContainer ##需要加上这个配置
name: data
volumes:
- hostPath:
path: /mnt
name: data
[root@vm ~]# kubectl create namespace test
[root@vm ~]# kubectl apply -f io.yaml
[root@vm ~]# kubectl get pod -ntest
NAME READY STATUS RESTARTS AGE
test-5d9479fcbb-fkhtr 2/2 Running 0 1m
[root@vm ~]# kubectl get pod -ntest -oyaml | grep image:
image: chaosbladeio/chaosblade-tool:1.7.0
image: busybox
实验测试
[root@vm ~]# blade create k8s pod-pod IO -h #查看帮助
#做了个80% 故障实验
[root@vm ~]# blade create k8s pod-pod IO --method read --delay 1000 --path /data --percent 80 --errno 28 --labels "app=test" --namespace test --kubeconfig ~/.kube/config
[root@vm ~]# time kubectl exec -it test-5d9479fcbb-jpjph -n test -c test cat /data/conf/delay.txt
cat: read error: No space left on device #异常
command terminated with exit code 1
real 0m3.254s
user 0m0.129s
sys 0m0.052s
[root@vm ~]# time kubectl exec -it test-5d9479fcbb-jpjph -n test -c test cat /data/conf/delay.txt
delay
real 0m2.265s
user 0m0.121s
sys 0m0.070s
#2秒延迟,不指定percent
[root@vm ~]# blade create k8s pod-pod IO --method read --delay 2000 --path /data --errno 28 --labels "app=test" --namespace test --kubeconfig ~/.kube/config
[root@vm ~]# time kubectl exec -it test-5d9479fcbb-jpjph -n test -c test cat /data/conf/delay.txt
cat: read error: No space left on device
command terminated with exit code 1
real 0m6.284s
user 0m0.126s
sys 0m0.041s
[root@vm ~]# blade create k8s pod-pod IO --method read --delay 1000 --path /data --percent 100 --errno 28 --labels "app=test" --namespace test --kubeconfig ~/.kube/config
[root@vm ~]# time kubectl exec -it test-5d9479fcbb-jpjph -n test -c test cat /data/conf/delay.txt
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
delay
real 0m0.273s #不知为什么100%了,竟然出现未delay
user 0m0.129s
sys 0m0.048s
[root@vm ~]# time kubectl exec -it test-5d9479fcbb-jpjph -n test -c test cat /data/conf/delay.txt
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
delay
real 0m2.219s #触发delay
user 0m0.103s
sys 0m0.046s
[root@vm ~]# cat read-delay.yaml
apiVersion: chaosblade.io/v1alpha1
kind: ChaosBlade
metadata:
name: inject-pod-by-labels
spec:
experiments:
- scope: pod
target: pod
action: IO
desc: "Pod IO Exception by labels"
matchers:
- name: labels
value:
- "app=test"
- name: namespace
value:
- "test"
- name: method
value:
- "read"
- name: delay
value:
- "5000"
- name: path
value:
- "/data"
- name: percent
value:
- "50"
- name: errno
value:
- "28"
[root@vm ~]# kubectl apply -f read-delay.yaml
blade create k8s pod-network
[root@vm ~]# kubectl run nginx-pod --image=nginx:1.14 --port=80
pod/nginx-pod created
[root@vm ~]# blade create k8s pod-network delay --time 3000 --offset 1000 --interface eth0 --local-port 80 --names nginx-pod --namespace default --kubeconfig ~/.kube/config
[root@vm ~]# kubectl get pod -owide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE
nginx-pod 1/1 Running 0 157m 10.42.0.211 vm <none>
[root@vm ~]# curl -sIL -w "%{http_code}\n" -o /dev/null --connect-timeout 2 10.42.0.211
000
[root@vm ~]# curl -sIL -w "%{http_code}\n" -o /dev/null --connect-timeout 20 10.42.0.211
200
#delay-pod-network.yaml 参考
apiVersion: chaosblade.io/v1alpha1
kind: ChaosBlade
metadata:
name: delay-pod-network-by-names
spec:
experiments:
- scope: pod
target: network
action: delay
desc: "delay pod network by names"
matchers:
- name: names
value:
- "nginx-pod"
- name: namespace
value:
- "default"
- name: local-port
value: ["80"]
- name: interface
value: ["eth0"]
- name: time
value: ["3000"]
- name: offset
value: ["1000"]
blade create k8s container-cpu
blade create k8s container-cpu fullload --cpu-percent 100 ---names nginx-pod --namespace default --kubeconfig ~/.kube/config
[root@vm ~]# blade create k8s container-cpu load -h
[root@vm ~]# kubectl run centos --image=centos:7.9.2009 /usr/sbin/init
#--container-ids 后的参数,是容器ID这与pod里containerID的前12位,是同一个值
[root@vm ~]# docker ps | grep centos
cf189d672699 eeb6ee3f44bd ...
[root@vm ~]# kubectl get pod -oyaml | grep -i containerid
- containerID: docker://cf189d6726991a3e6476026e9717b7f817e334cb6...
[root@vm ~]# blade create k8s container-cpu fullload --cpu-percent 100 --names centos --container-ids eeb6ee3f44bd --namespace default --kubeconfig ~/.kube/config
#cgroups load failed, cgroups: cgroup deleted 问题 待定 # 用systemd不知道会怎样
#[root@vm ~]# blade create docker cpu fullload --cpu-percent 50 --chaosblade-release /root/chaosblade-1.3.0.tar.gz --container-id cf189d672699
#上面的执行,把包解压到了容器的opt目录里,k8s的操作问题还不清楚
#[root@centos /]# ls /opt/chaosblade/
#bin blade chaosblade.dat lib logs yaml
blade create k8s container-network
blade create k8s container-network delay/loss/dns
[root@vm ~]# docker ps | grep centos # 获取 --container-ids
cf189d672699 ...
[root@vm ~]# kubectl get pod # 获取 --names
NAME READY STATUS RESTARTS AGE
centos 1/1 Running 0 64m
[root@vm ~]# blade create k8s container-network dns --domain www.baidu.com --ip 10.0.0.1 --names centos --namespace default --container-ids cf189d672699 --kubeconfig ~/.kube/config
{"code":200,"success":true,"result":"907eee9daabc2a3a"}
[root@vm ~]# kubectl exec -it centos -- ping www.baidu.com
PING www.baidu.com (10.0.0.1) 56(84) bytes of data.
^C
--- www.baidu.com ping statistics ---
5 packets transmitted, 0 received, 100% packet loss, time 4032ms
blade create k8s container-process
blade create k8s container-process kill/stop
# 新开终端
[root@vm ~]# kubectl exec -it centos -- top
top - 16:54:07 up 6:53, 0 users, load average: 0.72, 0.57, 0.46
Tasks: 3 total, 1 running, 2 sleeping, 0 stopped, 0 zombie
... #执行下面的操作后显示terminated
command terminated with exit code 137
[root@vm ~]# blade create k8s container-process kill --process top --signal 9 --names centos --container-ids cf189d672699 --namespace default --kubeconfig ~/.kube/config
blade create k8s container-container
#新开终端
[root@vm ~]# kubectl get pod -w
NAME READY STATUS RESTARTS AGE
centos 1/1 Running 1 90m #执行下面的remove命令
centos 0/1 ContainerCreating 2 90m
centos 1/1 Running 2 90m
[root@vm ~]# blade create k8s container-container remove --force --names centos --namespace default --container-ids cf189d672699 --kubeconfig ~/.kube/config
#pod被杀死重启,并非真正的删掉
[root@vm ~]# kubectl get all
NAME READY STATUS RESTARTS AGE
pod/centos 1/1 Running 2 91m
#yaml
apiVersion: chaosblade.io/v1alpha1
kind: ChaosBlade
metadata:
name: remove-container-by-id
spec:
experiments:
- scope: container
target: container
action: remove
desc: "remove container by id"
matchers:
- name: container-ids
value: ["cf189d672699"]
- name: names
value: ["centos"]
- name: namespace
value: ["default"]