2017-12-29 go dygod

// DATE: 2017-12-29
// 顺序执行版本需要 Total time: 12.393550402
// go 协程版本只需要 [0.5, 1.134134881]
//

package main

import (
    "fmt"
    "github.com/PuerkitoBio/goquery"
    "log"
    "bytes"
    "io/ioutil"
    "golang.org/x/text/transform"
    "golang.org/x/text/encoding/simplifiedchinese"
    "strconv"
    "time"
)

const (
    host = "http://www.ygdy8.net"
    url = "http://www.ygdy8.net/html/gndy/dyzz/list_23_"
)

var pages []string
func main() {
    start := time.Now()
    ch := make(chan string)
    getPage(1)
    for _,page := range pages {
        go getBTLink(page, ch)
    }
    for range pages { // 有点像是等待多少次 ch,未达到次数前,不会退出程序。但是很多时候是不知道可以等待多久的。
        fmt.Printf("Link=%q\n", <-ch)
    }
    fmt.Printf("Total time: %v\n", time.Since(start).Seconds())
}

func getPage(index int)  {
    doc, err := goquery.NewDocument(url + strconv.Itoa(index) + ".html")
    if err != nil {
        log.Fatal(err) // 安排重试
    }

    doc.Find(".co_content8 .ulink").Each(func(i int, s *goquery.Selection) {
        title, _ := GbkToUtf8([]byte(s.Text()))
        href, _ := s.Attr("href")
        href = host + href

        fmt.Printf("Title=%q\nLink=%q\n\n", title, href)
        //getBTLink(href)
        pages = append(pages, href)
    })
}

func getBTLink(url string, ch chan <- string)  {
    doc, err := goquery.NewDocument(url)
    if err != nil {
        log.Fatal(err) // 安排重试
    }

    doc.Find("#Zoom td a").Each(func(i int, selection *goquery.Selection) {
        link,_ := selection.Attr("href")
        link1,_ := GbkToUtf8([]byte(link))
        //fmt.Printf("download link is %q\n", link1)
        ch <- fmt.Sprintf("%s", link1)
    })
}

// 页面是 gbk 编码
func GbkToUtf8(s []byte) ([]byte, error) {
    reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder())
    d, e := ioutil.ReadAll(reader)
    if e != nil {
        return nil, e
    }
    return d, nil
}

你可能感兴趣的:(2017-12-29 go dygod)