chaosblade&k8s

k3s&helm

curl -sfL https://rancher-mirror.oss-cn-beijing.aliyuncs.com/k3s/k3s-install.sh | INSTALL_K3S_MIRROR=cn sh -

yum -y install docker-ce-19.03.7-3.el7
systemctl enable docker && systemctl start docker  && systemctl status docker

#优化
cat >/etc/docker/daemon.json <<EOF 
{
    "registry-mirrors": [
        "http://hub-mirror.c.163.com",
        "https://docker.mirrors.ustc.edu.cn",
        "https://registry.docker-cn.com"
    ]
}
EOF


[root@vm ~]# docker pull redis:5.0.6-alpine
#报错一
Get https://registry-1.docker.io/v2/: x509: certificate has expired or is not yet valid
[root@vm ~]# hwclock --show
Wed 28 Sep 2022 10:39:04 AM CST  -0.951797 seconds
[root@vm ~]# hwclock --set --date '05/15/2023 14:46:30'
[root@vm ~]# date
Wed Sep 28 10:39:56 CST 2022
[root@vm ~]# hwclock --hctosys
[root@vm ~]# clock -w
[root@vm ~]# reboot
#报错二
error pulling image configuration: Get https://production.cloudflare.docker.com/registry-v2/docker/...
[root@vm ~]# vim /etc/resolv.conf
nameserver 8.8.8.8


systemctl daemon-reload  && systemctl restart docker


systemctl daemon-reload  && systemctl restart docker

#docker pull registry.cn-beijing.aliyuncs.com/ilemonrain/pause-amd64:3.1
#docker tag registry.cn-beijing.aliyuncs.com/ilemonrain/pause-amd64:3.1 k8s.gcr.io/pause:3.1


vim /etc/profile
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
source /etc/profile


https://get.helm.sh/helm-v3.10.0-linux-amd64.tar.gz
[root@vm ~]# tar xf helm-3.6.0.tar.gz
[root@vm ~]# mv linux-amd64/helm /usr/local/bin/
[root@vm ~]# helm repo add stable http://mirror.azure.cn/kubernetes/charts
"stable" has been added to your repositories
[root@vm ~]# helm search repo  nginx
https://github.com/chaosblade-io/chaosblade-operator/tags

[root@vm ~]# helm install --namespace kube-system --name-template  chaosblade-operator chaosblade-operator-1.7.0.tgz

[root@vm ~]# helm list -n kube-system
NAME                    NAMESPACE       REVISION        UPDATED                           
chaosblade-operator     kube-system     1         
[root@vm ~]# helm uninstall chaosblade-operator  -n kube-system  卸载
[root@vm ~]# kubectl get pod -n kube-system
NAME                                      READY   STATUS      RESTARTS       AGE
chaosblade-tool-zc6p9                     1/1     Running     0              5m24s
chaosblade-operator-f8996b8b4-m8gmw       1/1     Running     0              5m24s
[root@vm ~]# docker images
REPOSITORY                          TAG            IMAGE ID       CREATED         SIZE
chaosbladeio/chaosblade-tool       1.7.0          11a45c766467   8 months ago    80.4MB
chaosbladeio/chaosblade-operator   1.7.0          c46846d5e185   8 months ago    150MB

#ChaosBlade Operator 启动后会在每个节点部署 chaosblade-tool Pod 
[root@vm ~]# kubectl get all -n kube-system | grep chaos|awk '{print $1}'
pod/chaosblade-tool-zc6p9
pod/chaosblade-operator-f8996b8b4-m8gmw
service/chaosblade-webhook-server
daemonset.apps/chaosblade-tool
deployment.apps/chaosblade-operator
replicaset.apps/chaosblade-operator-f8996b8b4
[root@vm ~]# blade create k8s --help
Kubernetes experiment, for example kill pod

Usage:
  blade create k8s [flags]
  blade create k8s [command]

Available Commands:
  container-container Execute a docker experiment
  container-cpu       Cpu experiment
  container-disk      Disk experiment
  container-druid     Experiment with the Druid
  container-dubbo     Experiment with the Dubbo
  container-es        ElasticSearch experiment!
  container-file      File experiment
  container-gateway   gateway experiment!
  container-hbase     hbase experiment!
  container-http      http experiment
  container-jedis     jedis experiment
  container-jvm       Experiment with the JVM
  container-kafka     kafka experiment
  container-lettuce   redis client lettuce experiment
  container-log       log experiment
  container-mem       Mem experiment
  container-mongodb   MongoDB experiment
  container-mysql     mysql experiment
  container-network   Network experiment
  container-process   Process experiment
  container-psql      Postgrelsql experiment
  container-rabbitmq  rabbitmq experiment
  container-redisson  redisson experiment
  container-rocketmq  Rocketmq experiment,can make message send or pull delay and exception
  container-script    Script chaos experiment
  container-servlet   java servlet experiment
  container-tars      tars experiment
  node-cpu            Cpu experiment
  node-disk           Disk experiment
  node-file           File experiment
  node-mem            Mem experiment
  node-network        Network experiment
  node-process        Process experiment
  node-script         Script chaos experiment
  pod-cpu             Cpu experiment
  pod-disk            Disk experiment
  pod-file            File experiment
  pod-mem             Mem experiment
  pod-network         Network experiment
  pod-pod             Pod experiments
  pod-process         Process experiment
  pod-script          Script chaos experiment

Flags:
  -h, --help                  help for k8s
      --kubeconfig string     kubeconfig file
      --waiting-time string   Waiting time for invoking, default value is 20s

Global Flags:
  -a, --async             whether to create asynchronously, default is false
  -d, --debug             Set client to DEBUG mode
  -e, --endpoint string   the create result reporting address. It takes effect only when the async value is true and the value is not empty
  -n, --nohup             used to internal async create, no need to config
      --uid string        Set Uid for the experiment, adapt to docker

Use "blade create k8s [command] --help" for more information about a command.

[root@vm ~]# cat pod.yaml
---
kind: Pod
apiVersion: v1
metadata:
  name: my-nginx
spec:
  containers:
  - name: my-nginx
    image: nginx
    imagePullPolicy: IfNotPresent
  restartPolicy: Always
  
[root@vm ~]# kubectl apply -f pod.yaml
[root@vm ~]# kubectl get pod -owide
NAME       READY   STATUS    RESTARTS   AGE   IP            NODE   NOMINATED NODE   READINESS GATES
my-nginx   1/1     Running   0          89s   10.42.0.200   vm     <none>           <none>
[root@vm ~]# curl 10.42.0.200
<h1>Welcome to nginx!</h1>

blade create k8s node-cpu

[root@vm ~]# blade create k8s node-cpu  fullload --help
[root@vm ~]# ln -s /etc/rancher/k3s/k3s.yaml /root/.kube/config
[root@vm ~]# kubectl label node vm cpu-load=test   #给node打个标签
#准备终端查看
[root@vm ~]# blade create k8s node-cpu fullload --labels cpu-load=test --cpu-percent 80 --kubeconfig ./.kube/config
{"code":200,"success":true,"result":"90dfadd971e07737"}
[root@vm ~]# blade destroy 90dfadd971e07737

#blade create k8s node-cpu fullload --names vm --cpu-percent 80 --kubeconfig ./.kube/config   
#执行失败,可能与k3s有关,待正式集群测试

#也可以使用yaml实现  
#参考https://github.com/chaosblade-io/chaosblade-operator/tree/v0.0.1/examples
[root@vm ~]# cat node-cpu.yaml
apiVersion: chaosblade.io/v1alpha1
kind: ChaosBlade
metadata:
  name: cpu-load
spec:
  experiments:
  - scope: node
    target: cpu
    action: fullload
    desc: "increase node cpu load by names"
    matchers:
    - name: "labels"
      value:
      - "cpu-load=test"
    - name: cpu-percent
      value:
      - "80"

[root@vm ~]# kubectl apply -f node-cpu.yaml
chaosblade.chaosblade.io/cpu-load created
[root@vm ~]# kubectl delete chaosblade cpu-load  #停止实验
[root@vm ~]# kubectl delete -f node-cpu.yaml

blade create k8s node-network

blade create k8s node-network delay/loss/dns
--evict-count string 限制实验生效的数量
--evict-percent string 限制实验生效数量的百分比,不包含 %
--labels string 节点资源标签
--names string 节点资源名,多个资源名之间使用逗号分隔
--kubeconfig string kubeconfig 文件全路径(仅限使用 blade 命令调用时使用)
--waiting-time string 实验结果等待时间,默认为 20s,参数值要包含单位,例如 10s,1m

11

apiVersion: chaosblade.io/v1alpha1
kind: ChaosBlade
metadata:
  name: loss-node-network-by-names
spec:
  experiments:
  - scope: node
    target: network
    action: loss
    desc: "node network loss"
    matchers:
    - name: "labels"
      value:
      - "cpu-poad=test"
    - name: percent
      value: ["60"]
    - name: interface
      value: ["eth0"]
    - name: local-port
      value: ["80"]
      
      
[root@vm ~]# kubectl apply -f loss-node.yaml
[root@vm ~]# blade create k8s node-network loss --percent 60 --interface eth0 --local-port 80  --labels cpu-poad=test --kubeconfig ./.kube/config

#待做丢包验证

blade create k8s node-process

blade create k8s node-process kill/stop

[root@vm ~]# tar xf apache-tomcat-8.5.88.tar.gz -C /opt/
[root@vm ~]# cd /opt/apache-tomcat-8.5.88/
[root@vm apache-tomcat-8.5.88]# bin/c
catalina.sh    ciphers.sh     configtest.sh
[root@vm apache-tomcat-8.5.88]# bin/catalina.sh start
[root@vm apache-tomcat-8.5.88]# ps -ef | grep tomcat
root     13592     1 55 19:49 pts/3    00:00:03 /usr/bin/java -Djava.util.loggi....

[root@vm ~]# blade create k8s node-process kill --process tomcat --signal 9  --labels cpu-poad=test --kubeconfig ./.kube/config   --kubeconfig ./.kube/config
[root@vm ~]# ps -ef | grep tomcat
root     17819 16746  0 19:52 pts/3    00:00:00 grep --color=auto tomcat


blade create k8s node-disk

blade create k8s node-disk fill/burn
[root@vm ~]# df -h|head
Filesystem               Size  Used Avail Use% Mounted on
/dev/mapper/centos-root   17G  5.3G   12G  32% /

[root@vm ~]# blade c k8s node-disk fill --labels cpu-poad=test --percent 50 --kubeconfig ~/.kube/config

[root@vm ~]# df -h|head
Filesystem               Size  Used Avail Use% Mounted on
/dev/mapper/centos-root   17G  8.6G  8.5G  51% /

blade create k8s pod-pod

--namespace string Pod 所属的命名空间,只能填写一个值,必填项
--evict-count string 限制实验生效的数量
--evict-percent string 限制实验生效数量的百分比,不包含 %
--labels string Pod 资源标签,多个标签之前是或的关系
--names string Pod 资源名
--kubeconfig string kubeconfig 文件全路径(仅限使用 blade 命令调用时使用)
--waiting-time string 实验结果等待时间,默认为 20s,参数值要包含单位,例如 10s,1m
blade create k8s pod-pod delete 删除 POD

[root@vm ~]# kubectl run nginx-pod --image=nginx:1.14 --port=80
pod/nginx-pod created
[root@vm ~]# kubectl get pod
NAME        READY   STATUS    RESTARTS   AGE
nginx-pod   1/1     Running   0          2s
[root@vm ~]#  blade create k8s pod-pod delete --namespace default --names nginx-pod --kubeconfig ~/.kube/config
{"code":200,"success":true,"result":"b60319c88af750fa"}
[root@vm ~]# kubectl get pod
No resources found in default namespace.

blade create k8s pod-IO

实验准备

需先通过deployment部署测试pod,并在pod的annotation里面指定需要注入I/O异常的volume以及子目录。

[root@vm ~]# echo delay > /mnt/delay.txt
[root@vm ~]# cat /mnt/delay.txt
delay

[root@vm ~]# cat io.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: test
  name: test
  namespace: test
spec:
  replicas: 1
  selector:
    matchLabels:
      app: test
  template:
    metadata:
      annotations: #加如下注解,chaosblade webhook会根据pod的annotation,注入fuse的sidecar容器
        chaosblade/inject-volume: data
        chaosblade/inject-volume-subpath: conf
      labels:
        app: test
    spec:
      containers:
      - command: ["/bin/sh", "-c", "while true; do sleep 10000; done"]
        image: busybox
        imagePullPolicy: IfNotPresent
        name: test
        volumeMounts:
        - mountPath: /data
          mountPropagation: HostToContainer  ##需要加上这个配置
          name: data
      volumes:
      - hostPath:
          path: /mnt
        name: data
[root@vm ~]# kubectl create namespace test
[root@vm ~]# kubectl apply -f io.yaml
[root@vm ~]# kubectl get pod -ntest
NAME                    READY   STATUS    RESTARTS   AGE
test-5d9479fcbb-fkhtr   2/2     Running   0          1m
[root@vm ~]# kubectl get pod -ntest -oyaml | grep image:
      image: chaosbladeio/chaosblade-tool:1.7.0
      image: busybox

实验测试

[root@vm ~]# blade create k8s pod-pod IO -h      #查看帮助
#做了个80%  故障实验
[root@vm ~]# blade create k8s pod-pod IO --method read --delay 1000 --path /data --percent 80 --errno 28 --labels "app=test" --namespace test --kubeconfig ~/.kube/config

[root@vm ~]# time kubectl exec -it test-5d9479fcbb-jpjph -n test -c test cat   /data/conf/delay.txt
cat: read error: No space left on device  #异常
command terminated with exit code 1

real    0m3.254s
user    0m0.129s
sys     0m0.052s
[root@vm ~]# time kubectl exec -it test-5d9479fcbb-jpjph -n test -c test cat   /data/conf/delay.txt
delay

real    0m2.265s
user    0m0.121s
sys     0m0.070s


#2秒延迟,不指定percent
[root@vm ~]#  blade create k8s pod-pod IO --method read --delay 2000 --path /data --errno 28 --labels "app=test" --namespace test --kubeconfig ~/.kube/config
[root@vm ~]# time kubectl exec -it test-5d9479fcbb-jpjph -n test -c test cat   /data/conf/delay.txt

cat: read error: No space left on device
command terminated with exit code 1

real    0m6.284s
user    0m0.126s
sys     0m0.041s



[root@vm ~]#  blade create k8s pod-pod IO --method read --delay 1000 --path /data --percent 100 --errno 28 --labels "app=test" --namespace test --kubeconfig ~/.kube/config
[root@vm ~]# time kubectl exec -it test-5d9479fcbb-jpjph -n test -c test cat   /data/conf/delay.txt
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
delay

real    0m0.273s   #不知为什么100%了,竟然出现未delay
user    0m0.129s
sys     0m0.048s
[root@vm ~]# time kubectl exec -it test-5d9479fcbb-jpjph -n test -c test cat   /data/conf/delay.txt
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
delay

real    0m2.219s  #触发delay
user    0m0.103s
sys     0m0.046s

[root@vm ~]# cat read-delay.yaml
apiVersion: chaosblade.io/v1alpha1
kind: ChaosBlade
metadata:
  name: inject-pod-by-labels
spec:
  experiments:
  - scope: pod
    target: pod
    action: IO
    desc: "Pod IO Exception by labels"
    matchers:
    - name: labels
      value:
      - "app=test"
    - name: namespace
      value:
      - "test"
    - name: method
      value:
      - "read"
    - name: delay
      value:
      - "5000"
    - name: path
      value:
      - "/data"
    - name: percent
      value:
      - "50"
    - name: errno
      value:
      - "28"

[root@vm ~]# kubectl apply -f read-delay.yaml

blade create k8s pod-network

[root@vm ~]# kubectl run nginx-pod --image=nginx:1.14 --port=80
pod/nginx-pod created
[root@vm ~]# blade create k8s pod-network delay --time 3000 --offset 1000 --interface eth0 --local-port 80 --names nginx-pod  --namespace default  --kubeconfig ~/.kube/config

[root@vm ~]# kubectl get pod -owide
NAME        READY   STATUS    RESTARTS   AGE    IP            NODE   NOMINATED NODE   
nginx-pod   1/1     Running   0          157m   10.42.0.211   vm     <none>           
[root@vm ~]# curl -sIL -w "%{http_code}\n" -o /dev/null  --connect-timeout 2 10.42.0.211
000
[root@vm ~]# curl -sIL -w "%{http_code}\n" -o /dev/null  --connect-timeout 20 10.42.0.211
200
#delay-pod-network.yaml   参考
apiVersion: chaosblade.io/v1alpha1
kind: ChaosBlade
metadata:
  name: delay-pod-network-by-names
spec:
  experiments:
  - scope: pod
    target: network
    action: delay
    desc: "delay pod network by names"
    matchers:
    - name: names
      value:
      - "nginx-pod"
    - name: namespace
      value:
      - "default"
    - name: local-port
      value: ["80"]
    - name: interface
      value: ["eth0"]
    - name: time
      value: ["3000"]
    - name: offset
      value: ["1000"]

blade create k8s container-cpu

blade create k8s container-cpu fullload --cpu-percent 100  ---names nginx-pod  --namespace default  --kubeconfig ~/.kube/config
[root@vm ~]# blade create k8s container-cpu load -h
[root@vm ~]# kubectl run centos --image=centos:7.9.2009  /usr/sbin/init

#--container-ids 后的参数,是容器ID这与pod里containerID的前12位,是同一个值
[root@vm ~]# docker ps | grep centos
cf189d672699   eeb6ee3f44bd    ...
[root@vm ~]# kubectl get pod -oyaml | grep -i containerid
    - containerID: docker://cf189d6726991a3e6476026e9717b7f817e334cb6...

[root@vm ~]# blade create k8s container-cpu fullload --cpu-percent 100  --names centos  --container-ids eeb6ee3f44bd --namespace default  --kubeconfig ~/.kube/config

#cgroups load failed, cgroups: cgroup deleted  问题 待定   #  用systemd不知道会怎样
#[root@vm ~]# blade create docker cpu fullload --cpu-percent 50 --chaosblade-release /root/chaosblade-1.3.0.tar.gz --container-id cf189d672699   
#上面的执行,把包解压到了容器的opt目录里,k8s的操作问题还不清楚
#[root@centos /]# ls /opt/chaosblade/
#bin  blade  chaosblade.dat  lib  logs  yaml


blade create k8s container-network

blade create k8s container-network delay/loss/dns

[root@vm ~]# docker ps | grep centos   #  获取  --container-ids 
cf189d672699  ... 
[root@vm ~]# kubectl get pod     #  获取 --names
NAME     READY   STATUS    RESTARTS   AGE
centos   1/1     Running   0          64m
[root@vm ~]# blade create k8s container-network dns --domain www.baidu.com --ip 10.0.0.1 --names centos --namespace default --container-ids cf189d672699 --kubeconfig ~/.kube/config
{"code":200,"success":true,"result":"907eee9daabc2a3a"}
[root@vm ~]# kubectl exec -it centos -- ping www.baidu.com
PING www.baidu.com (10.0.0.1) 56(84) bytes of data.
^C
--- www.baidu.com ping statistics ---
5 packets transmitted, 0 received, 100% packet loss, time 4032ms

blade create k8s container-process

blade create k8s container-process kill/stop

# 新开终端
[root@vm ~]# kubectl exec -it centos  -- top
top - 16:54:07 up  6:53,  0 users,  load average: 0.72, 0.57, 0.46
Tasks:   3 total,   1 running,   2 sleeping,   0 stopped,   0 zombie
...   #执行下面的操作后显示terminated
command terminated with exit code 137

[root@vm ~]# blade create k8s container-process kill --process top --signal 9 --names centos --container-ids cf189d672699 --namespace default --kubeconfig ~/.kube/config

blade create k8s container-container

#新开终端
[root@vm ~]# kubectl get pod  -w
NAME     READY   STATUS    RESTARTS   AGE
centos   1/1     Running   1          90m  #执行下面的remove命令
centos   0/1     ContainerCreating   2          90m
centos   1/1     Running             2          90m


[root@vm ~]# blade create k8s container-container  remove --force --names centos --namespace default --container-ids cf189d672699 --kubeconfig ~/.kube/config

#pod被杀死重启,并非真正的删掉  
[root@vm ~]# kubectl get all
NAME         READY   STATUS    RESTARTS   AGE
pod/centos   1/1     Running   2          91m

#yaml
apiVersion: chaosblade.io/v1alpha1
kind: ChaosBlade
metadata:
  name: remove-container-by-id
spec:
  experiments:
  - scope: container
    target: container
    action: remove
    desc: "remove container by id"
    matchers:
    - name: container-ids
      value: ["cf189d672699"]
    - name: names
      value: ["centos"]
    - name: namespace
      value: ["default"]

你可能感兴趣的:(混沌测试,kubernetes,容器,云原生)