shell + go + mysql nginx日志统计 (二) :统计及存入数据库

开始

首先把我们要采集的日志拿出来,大概这个样子

123.131.xx.xxx 307 0.012 [2018-01-16T10:42:50+08:00] POST /login HTTP/1.1 - 0 .......
121.19.xx.xx 200 0.010 [2018-01-16T10:42:51+08:00] GET / HTTP/1.1 - 4228 ........
120.221.xxx.xx 200 0.007 [2018-01-16T10:42:56+08:00] GET / HTTP/1.1 - 4227 .........

而我所接触的服务中一个服务大概每天产生90万条访问日志,而类似的服务有6个左右。其他一些林散的服务每个每天大概产生日志30-40万条左右。再来看看我的机器性能,4核8G带宽1M的一台机器,上面运行了zabbix,jenkins,mysql等程序,白天有日志查看需求的时候,带宽占用也比较大。

所以一次性运行完,而且还要让统计后的结果尽量的小,就成了需要思考的问题。不是收集所有日志而只是把相同的统计到一起,所以时间粒度也就需要放大一点,这里我统计的每个小时不同URL的访问时间,IP,状态码等。当然如果需要更加精确的统计数据比如说秒,分也是可以做的,这个放到后面再说。

为什么用go?

1.我刚好开始学go语言,才把语法弄清楚了一些。
2.看到一片讲词频统计的代码片段 地址https://studygolang.com/articles/3393 觉得这个刚好能解决我的问题就照着写了。
3.一次编译到处运行,这一点是我觉得最爽的地方

首先是引入需要用到的包

package main

import (
    "bufio"
    "fmt"
    "os"
    "strings"
    "flag"
    "github.com/astaxie/beego/orm"
    _ "github.com/go-sql-driver/mysql"
    "strconv"
    "time"
)

之后定义数据的格式

\\用于Nginx响应时间
type ngx_res struct {
    Id        int64
    Date      time.Time
    Url       string
    Project   string
    Xiaoyu10  int
    Xiaoyu50  int
    Xiaoyu100 int
    Xiaoyu500 int
    Dayu500   int
}
\\用于IP访问次数
type ngx_ip struct {
    Id      int64
    Date    time.Time
    Project string
    Ip      string
    Times   string
}
\\用于状态码,Url,次数
type ngx_access struct {
    Id      int64
    Date    time.Time
    Project string
    Code    int64
    Url     string
    Times   int
}
type time_res struct {
    times_10       int
    times_50       int
    times_100      int
    times_500      int
    times_dayu_500 int
}

然后定义三个map 之后会把统计的东西放进去

var hourmap map[string]int = make(map[string]int, 0)
var resmap map[string]time_res = make(map[string]time_res, 0)
var ipmap map[string]int = make(map[string]int, 0)

定义一个读取及统计文件的函数

func read(filename string) {
//根据文件名读取文件
    fi, err := os.Open(filename)
    if err != nil {
        fmt.Printf("Error: %s\n", err)
        return
    }
    defer fi.Close()

    br := bufio.NewReader(fi)

    for {
//按照\n为分隔符来for循环
        a, err := br.ReadString('\n')
        if err != nil {
            break
        }
        log := string(a)

//这里就开始分日志了
        //这里可以看作再每行里面操作
        split := strings.Split(log, " ")
        ip := split[0]
        //取出日期
        date_tmp := strings.Split(split[3], "[")[1]
        date_string := strings.Split(date_tmp, ":")[0]
        //取出url,并且去掉=符号和?号后带的参数
        url1 := strings.Split(split[5], "?")[0]
        url := strings.Split(url1, "=")[0]
        //取出状态码
        code := split[1]
        //把日志中取出的响应时间转化微float64类型
        resp, err := strconv.ParseFloat(split[2], 64)
        if err != nil {
            break
        }
//这里开始就是写入到map中了
        //把  日期:url:状态码 作为键  访问次数作为值 传入hourmap中
        hourmap[date_string+":"+url+":"+code]++
        //把  日期:访问IP 作为键  访问次数作为值 传入ipmap中
        ipmap[date_string+":"+ip]++
        //把  日期:URL 作为键  把之前定义的time_res作为值 传入resmap中
        v, ok := resmap[date_string+":"+url]
        //判断,如果这个键存在就把resp拿出来做下面的判断在相应的地方加1,如果这个键不存在就再判断后创建这个键值对
        if ok {
            if resp <= 0.01 {
                a := time_res{v.times_10 + 1, v.times_50, v.times_100, v.times_500, v.times_dayu_500}
                resmap[date_string+":"+url] = a
            } else if resp > 0.01 && resp <= 0.05 {
                a := time_res{v.times_10, v.times_50 + 1, v.times_100, v.times_500, v.times_dayu_500}
                resmap[date_string+":"+url] = a
            } else if resp > 0.05 && resp <= 0.1 {
                a := time_res{v.times_10, v.times_50, v.times_100 + 1, v.times_500, v.times_dayu_500}
                resmap[date_string+":"+url] = a
            } else if resp > 0.1 && resp <= 0.5 {
                a := time_res{v.times_10, v.times_50, v.times_100, v.times_500 + 1, v.times_dayu_500}
                resmap[date_string+":"+url] = a
            } else {
                a := time_res{v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500 + 1}
                resmap[date_string+":"+url] = a
            }
        } else {
            if resp <= 0.01 {
                a := time_res{1, 0, 0, 0, 0}
                resmap[date_string+":"+url] = a
            } else if resp > 0.01 && resp <= 0.05 {
                a := time_res{0, 1, 0, 0, 0}
                resmap[date_string+":"+url] = a
            } else if resp > 0.1 && resp <= 0.5 {
                a := time_res{0, 0, 1, 0, 0}
                resmap[date_string+":"+url] = a
            } else if resp > 0.1 && resp <= 0.5 {
                a := time_res{0, 0, 0, 1, 0}
                resmap[date_string+":"+url] = a
            } else {
                a := time_res{0, 0, 0, 0, 1}
                resmap[date_string+":"+url] = a
            }
        }
    }
}

写入数据库

需要在这里说下的是如果你是统计的完全不相干的项目的日志,我认为不放在一个表里面是比较好的,也就是修改一下上面的数据格式名称,再下面初始化数据库的时候再修改new()中的东西再在后面改下sql中的表名。

这里写入数据库我使用beego提供的orm,事实上我只会着一种方式 。选用的数据库是mariadb.这里有个坑,mariadb的timezone CST 是美国中部时间。。。。。

初始化数据库

func RegisterDb(uname string, passwd string, ipaddr string, port string, databasename string) {
    orm.RegisterDriver("mysql", orm.DRMySQL)
    orm.RegisterDataBase("default", "mysql", uname+":"+passwd+"@tcp("+ipaddr+":"+port+")/"+databasename+"?charset=utf8", 10)
    orm.RegisterModel(new(ngx_access), new(ngx_ip), new(ngx_res))
}

定义插入数据的函数

func Add_access(project string, date string, code string, url string, times int) error {
    o := orm.NewOrm()

    codes, err := strconv.ParseInt(code, 10, 64)
    if err != nil {
        return err
    }
    _, error := o.Raw("INSERT INTO `ngx_access` (`date`, `project`, `code`, `url`, `times`) VALUES (?, ?, ?, ?, ?);", date, project, codes, url, times).Exec()
    return error
}
func Add_ip(project string, date string, ip string, times int) error {
    o := orm.NewOrm()

    _, error := o.Raw("INSERT INTO `ngx_ip` (`date`, `project`, `ip`,`times`) VALUES (?, ?, ?, ?);", date, project, ip, times).Exec()
    return error
}
func Add_res(project string, date string, url string, xiaoyu10 int, xiaoyu50 int, xiaoyu100 int, xiaoyu500 int, dayu500 int) error {
    o := orm.NewOrm()

    _, error := o.Raw("INSERT INTO `ngx_res` (`date`, `project`,`url`,`xiaoyu10`,`xiaoyu50`,`xiaoyu100`,`xiaoyu500`,`dayu500`) VALUES (?, ?, ?, ?, ?, ?, ?, ?);", date, project, url, xiaoyu10, xiaoyu50, xiaoyu100, xiaoyu500, dayu500).Exec()
    return error
}

再定义一个时间替换函数,作用是把字符串转换为时间类型

func time_tihuan(date_hour string) time.Time {
    //输入时间字符串并拼接
    //time_string := date_hour
    //获取服务器时区
    //loc, _ := time.LoadLocation("Asia/Chongqing")

    //字符串转为时间类型
    theTime, err := time.Parse("2006-01-02T15:04:05 -0700", date_hour)
    if err != nil {
        fmt.Println(err)
    }
    return theTime
}

初始化数据库填入数据库的连接信息

func init() {
    RegisterDb("uername", "password", "xxx.xxx.xxx.xxx", "xxxx", "databasename")
}

主函数定义

func main() {
//定义一个从命令行传入参数函数把filename从命令行传入
    var filename string
    flag.StringVar(&filename, "filename", "2017-12-35_xxxxx.log", "nginx access log filename!")
    flag.Parse()
    //read函数 执行后数据统计入map中
    read(filename)
    orm.Debug = true
    orm.RunSyncdb("default", false, true)
//更具filename 来确定project 的名字
    project1 := strings.Split(filename, ".")[0]
    project := strings.Split(project1, "_")[1]
//定义一个map 用来存放一小时只有一次访问的URL,用于去除类似扫描器之类的无效访问。
    var hourmap_one map[string]int = make(map[string]int, 0)
    for k, v := range hourmap {
         //hourmap如果键的值不等于1则写入数据库,反之写入hourmap_one
        if v != 1 {
            a := strings.Split(k, ":")
            date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
            Add_access(project, date, a[2], a[1], v)
        } else {
            a := strings.Split(k, ":")
            hourmap_one[a[0]+":oneurl:200"]++
        }
    }
//把hourmap_one写入数据库
    for k, v := range hourmap_one {
        a := strings.Split(k, ":")
        date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
        Add_access(project, date, a[2], a[1], v)
    }
//把ipmap每小时大于5次访问的IP写入数据库
    for k, v := range ipmap {
        if v > 5 {
            a := strings.Split(k, ":")
            date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
            Add_ip(project, date, a[1], v)
        }
    }
//把resmap写入数据库
    for k, v := range resmap {

        a := strings.Split(k, ":")
        date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
        Add_res(project, date, a[1], v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500)
    }

}

完整的代码

package main

import (
    "bufio"
    "fmt"
    "os"
    "strings"
    //"sort"
    "flag"
    "github.com/astaxie/beego/orm"
    _ "github.com/go-sql-driver/mysql"
    "strconv"
    "time"
)

//写入数据库
type ngx_res struct {
    Id        int64
    Date      time.Time
    Url       string
    Project   string
    Xiaoyu10  int
    Xiaoyu50  int
    Xiaoyu100 int
    Xiaoyu500 int
    Dayu500   int
}
type ngx_ip struct {
    Id      int64
    Date    time.Time
    Project string
    Ip      string
    Times   string
}
type ngx_access struct {
    Id      int64
    Date    time.Time
    Project string
    Code    int64
    Url     string
    Times   int
}
type time_res struct {
    times_10       int
    times_50       int
    times_100      int
    times_500      int
    times_dayu_500 int
}

func Add_access(project string, date string, code string, url string, times int) error {
    o := orm.NewOrm()

    codes, err := strconv.ParseInt(code, 10, 64)
    if err != nil {
        return err
    }
    _, error := o.Raw("INSERT INTO `ngx_access` (`date`, `project`, `code`, `url`, `times`) VALUES (?, ?, ?, ?, ?);", date, project, codes, url, times).Exec()
    return error
}
func Add_ip(project string, date string, ip string, times int) error {
    o := orm.NewOrm()

    _, error := o.Raw("INSERT INTO `ngx_ip` (`date`, `project`, `ip`,`times`) VALUES (?, ?, ?, ?);", date, project, ip, times).Exec()
    return error
}
func Add_res(project string, date string, url string, xiaoyu10 int, xiaoyu50 int, xiaoyu100 int, xiaoyu500 int, dayu500 int) error {
    o := orm.NewOrm()

    _, error := o.Raw("INSERT INTO `ngx_res` (`date`, `project`,`url`,`xiaoyu10`,`xiaoyu50`,`xiaoyu100`,`xiaoyu500`,`dayu500`) VALUES (?, ?, ?, ?, ?, ?, ?, ?);", date, project, url, xiaoyu10, xiaoyu50, xiaoyu100, xiaoyu500, dayu500).Exec()
    return error
}

//初始化数据库
func RegisterDb(uname string, passwd string, ipaddr string, port string, databasename string) {
    orm.RegisterDriver("mysql", orm.DRMySQL)
    orm.RegisterDataBase("default", "mysql", uname+":"+passwd+"@tcp("+ipaddr+":"+port+")/"+databasename+"?charset=utf8", 10)
    orm.RegisterModel(new(ngx_access), new(ngx_ip), new(ngx_res))
}

var hourmap map[string]int = make(map[string]int, 0)
var resmap map[string]time_res = make(map[string]time_res, 0)
var ipmap map[string]int = make(map[string]int, 0)

//读取文件
func read(filename string) {
    fi, err := os.Open(filename)
    if err != nil {
        fmt.Printf("Error: %s\n", err)
        return
    }
    defer fi.Close()

    br := bufio.NewReader(fi)
    for {
        a, err := br.ReadString('\n')
        if err != nil {
            break
        }
        log := string(a)
        //计算每小时访问次数
        split := strings.Split(log, " ")
        ip := split[0]
        date_tmp := strings.Split(split[3], "[")[1]
        date_string := strings.Split(date_tmp, ":")[0]
        //      date_time := time_tihuan(date_string[0],date_string[1])
        url1 := strings.Split(split[5], "?")[0]
        url := strings.Split(url1, "=")[0]
        code := split[1]
        resp, err := strconv.ParseFloat(split[2], 64)
        if err != nil {
            break
        }
        hourmap[date_string+":"+url+":"+code]++
        ipmap[date_string+":"+ip]++
        v, ok := resmap[date_string+":"+url]
        if ok {
            if resp <= 0.01 {
                a := time_res{v.times_10 + 1, v.times_50, v.times_100, v.times_500, v.times_dayu_500}
                resmap[date_string+":"+url] = a
            } else if resp > 0.01 && resp <= 0.05 {
                a := time_res{v.times_10, v.times_50 + 1, v.times_100, v.times_500, v.times_dayu_500}
                resmap[date_string+":"+url] = a
            } else if resp > 0.05 && resp <= 0.1 {
                a := time_res{v.times_10, v.times_50, v.times_100 + 1, v.times_500, v.times_dayu_500}
                resmap[date_string+":"+url] = a
            } else if resp > 0.1 && resp <= 0.5 {
                a := time_res{v.times_10, v.times_50, v.times_100, v.times_500 + 1, v.times_dayu_500}
                resmap[date_string+":"+url] = a
            } else {
                a := time_res{v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500 + 1}
                resmap[date_string+":"+url] = a
            }
        } else {
            if resp <= 0.01 {
                a := time_res{1, 0, 0, 0, 0}
                resmap[date_string+":"+url] = a
            } else if resp > 0.01 && resp <= 0.05 {
                a := time_res{0, 1, 0, 0, 0}
                resmap[date_string+":"+url] = a
            } else if resp > 0.1 && resp <= 0.5 {
                a := time_res{0, 0, 1, 0, 0}
                resmap[date_string+":"+url] = a
            } else if resp > 0.1 && resp <= 0.5 {
                a := time_res{0, 0, 0, 1, 0}
                resmap[date_string+":"+url] = a
            } else {
                a := time_res{0, 0, 0, 0, 1}
                resmap[date_string+":"+url] = a
            }
        }
    }
}

//时间转换函数
func time_tihuan(date_hour string) time.Time {
    //输入时间字符串并拼接
    //time_string := date_hour
    //获取服务器时区
    //loc, _ := time.LoadLocation("Asia/Chongqing")

    //字符串转为时间类型
    theTime, err := time.Parse("2006-01-02T15:04:05 -0700", date_hour)
    if err != nil {
        fmt.Println(err)
    }
    return theTime
}

func init() {
    RegisterDb("username", "password", "ipaddr", "port", "databasename")
}
func main() {
    var filename string
    flag.StringVar(&filename, "filename", "2017-12-35_mobile.log", "nginx access log filename!")
    flag.Parse()
    //read函数 执行后数据统计入map中
    read(filename)
    orm.Debug = true
    orm.RunSyncdb("default", false, true)
    project1 := strings.Split(filename, ".")[0]
    project := strings.Split(project1, "_")[1]
    var hourmap_one map[string]int = make(map[string]int, 0)
    for k, v := range hourmap {
        if v != 1 {
            a := strings.Split(k, ":")
            date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
            Add_access(project, date, a[2], a[1], v)
        } else {
            a := strings.Split(k, ":")
            hourmap_one[a[0]+":oneurl:200"]++
        }
    }
    for k, v := range hourmap_one {
        a := strings.Split(k, ":")
        date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
        Add_access(project, date, a[2], a[1], v)
    }
    for k, v := range ipmap {
        if v > 5 {
            a := strings.Split(k, ":")
            date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
            Add_ip(project, date, a[1], v)
        }
    }
    for k, v := range resmap {

        a := strings.Split(k, ":")
        date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
        Add_res(project, date, a[1], v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500)
    }

}

你可能感兴趣的:(shell + go + mysql nginx日志统计 (二) :统计及存入数据库)