跟踪IO请求

1 目的

跟踪IO请求,分析磁盘读写性能瓶颈

2 工具

2.1 磁盘 geometry

# hdparm -g /dev/sdl
/dev/sdl:
 geometry      = 1191062/255/63, sectors = 19134414848, start = 0

2.2 读写请求

打开裸块设备
./block_syscall -device /dev/sdl
从offsett 0 开始读取64MB (offect sector 0; size sector 131072)
r 0 131072

// block_syscall.go
// go build block_syscall.go
package main

import (
    "bufio"
    "flag"
    "fmt"
    "log"
    "os"
    "strconv"
    "strings"
    "syscall"
    "time"
)

var (
    flagDevice = flag.String("device", "", "block device")
)

func init() {
    log.SetFlags(log.Lshortfile | log.LstdFlags)
    flag.Parse()
}
func main() {
    fd, err := syscall.Open(*flagDevice, syscall.O_DIRECT|os.O_RDWR, 0666)
    if err != nil {
        log.Println(err)
        return
    }
    inputReader := bufio.NewReader(os.Stdin)

    for {
        fmt.Printf(">")
        input, err := inputReader.ReadString('\n')
        if err != nil {
            log.Println(err)
            break
        }
        input = strings.Trim(input, "\n")
        param := strings.Split(input, " ")
        if len(param) < 3 {
            log.Printf("err param count:%s", input)
            continue
        }
        if param[0] != "r" && param[0] != "w" {
            log.Println("not support cmd")
            continue
        }
        offectSector, err := strconv.Atoi(param[1])
        if err != nil {
            log.Println(err)
            continue
        }
        sizeSector, err := strconv.Atoi(param[2])
        if err != nil {
            log.Println(err)
            continue
        }

        opStart := time.Now()
        _, err = syscall.Seek(fd, int64(offectSector*512), 0)
        if err != nil {
            log.Println(err)
            continue
        }
        buf := make([]byte, sizeSector*512)
        switch param[0] {
        case "r":
            n, err := syscall.Read(fd, buf)
            opEnd := time.Now()
            if err != nil {
                log.Println(err)
                break
            }
            log.Printf("complete:%s %.2fms", greek(int64(n), 2), opEnd.Sub(opStart).Seconds()*1000)
        case "w":
            n, err := syscall.Write(fd, buf)
            opEnd := time.Now()
            if err != nil {
                log.Println(err)
                break
            }
            log.Printf("complete:%s %.2fms", greek(int64(n), 2), opEnd.Sub(opStart).Seconds()*1000)
        }
    }

}

const (
    B  = int64(1)
    KB = 1024 * B
    MB = 1024 * KB
    GB = 1024 * MB
    TB = 1024 * GB
    PB = 1024 * TB
)

var unit = []struct {
    Cap  int64
    Name string
}{{PB, "P"}, {TB, "T"}, {GB, "G"}, {MB, "M"}, {KB, "K"}, {B, "B"}}

func greek(v int64, precision int) string {
    for _, s := range unit {
        if v >= s.Cap {
            if precision < 0 {
                return fmt.Sprintf("%f%s", float64(v)/float64(s.Cap), s.Name)
            } else {
                format := fmt.Sprintf("%%.%df%%s", precision)
                return fmt.Sprintf(format, float64(v)/float64(s.Cap), s.Name)
            }
        }
    }
    return ""
}

2.3 抓取IO协议栈日志

btrace /dev/sdl > cap.log

2.4 分析日志

对于磁盘驱动器来说是每1024个sector一个请求,64MB/512KB=128 所以生成了128个请求,与iostat看到的r/s一致

grep Q sdl.log |wc -l
>128
iostat -dx /dev/sdl 1 | grep sdl
Device:  rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s avgrq-sz avgqu-sz   await r_await w_await  svctm  %util
sdl      0.00     0.00      128.00   0.00   65536.00  0.00  1024.00  20.81     162.60  162.60 0.00    2.28  29.20


128个IO请求用了24ms已经全部到了磁盘控制器

grep D cap.log | sort -n -k 4 | awk '{print $4*1000}'

磁盘控制器的处理耗时,发现每隔8*512KB=4MB,会有10+ms的间隔,怀疑是磁头在做seek操作

grep -E "D|C" cap.log | sort -rk 6 |awk '{if($6=="D") s[$8"+"$10]=$4 e;else s[$8"+"$10]=($4-s[$8"+"$10])*1000}END{for(j in s)print j,s[j]}' | sort -t '+' -n | sort -nk 2


跟踪IO请求_第1张图片
image.png

你可能感兴趣的:(跟踪IO请求)