go 多线程获取url返回码

之前一直采用的是shell里的wget或curl获取返回码的操作,由于效率较低,于是计划利用go的多线程优势写这样一个小工具。经过简单的验证,效率较单线程提升比较明显,唯一很难保证的一点是多线程的状态下会不会导致访问网页出现失败,从而使本来是200的页面返回非200的状态码,这一点有待解决。

代码如下:


package main

import (
    "bufio"
    "fmt"
    "net/http"
    "os"
    "runtime"
    "strings"
    "sync"
)

var urlChan chan string
var wg sync.WaitGroup

func main() {
    maxProcs := runtime.NumCPU()
    runtime.GOMAXPROCS(maxProcs)

    fmt.Println("start read")
    urlChan = make(chan string, 5000)
    file, err := os.Open("C:\\Users\\chant\\Desktop\\addNewUrl")
    if err != nil {
        fmt.Println(err)
    }
    defer file.Close()
    scanner := bufio.NewScanner(file)

    for scanner.Scan() {
        urlChan <- scanner.Text()
    }
    for i := 0; i < 10; i++ {
        wg.Add(1)
        go fetchUrl()
    }
    wg.Wait()
    fmt.Println("DONE")
}

func fetchUrl() {
    for {
        if len(urlChan) <= 0 {
            break
        }
        url := <-urlChan

        codeArr := make([]int, 0, 10)
        response, err := http.Get(url)
        if err != nil {
            fmt.Println("%s   %s", url, err)
            return
        }

        defer response.Body.Close()
        for {
            if response == nil {
                break
            }
            code := response.StatusCode
            req := response.Request
            if req != nil {
                if strings.Contains(req.URL.Path, "error.htm") {
                    code = 404
                }
            }
            codeArr = append(codeArr, code)
            response = response.Request.Response
        }
        size := len(codeArr)
        var result string
        for i := size - 1; i >= 0; i-- {
            result = fmt.Sprintf("%s  %d", result, codeArr[i])
        }
        fmt.Println(url, result)
    }
    wg.Done()
}




 
   
 
   
 
   
 
   
 
   
 
   
 
   
 
   
 
   
 
  

你可能感兴趣的:(go)