golang协程实战之抓取豆瓣电影top数据


package main

import (
    "fmt"
    "io/ioutil"
    "net/http"
    "os"
    "regexp"
    "runtime"
    "strconv"
    "time"
)
//定义新的数据类型 (结构体)
type Spider struct {
    url    string
    header map[string]string
}


//定义 Spider get的方法 (没有return 所以无需定义返回类型)
func (keyword Spider) get_html_header() {

    client := &http.Client{}
    req, err := http.NewRequest("GET", keyword.url, nil)
    if err != nil {
    }
    for key, value := range keyword . header {
        req.Header.Add(key, value)
    }
    resp, err := client.Do(req)
    if err != nil {
    }
    
    //defer 的意思是,不管运行是否异常,最后都会执行关闭
    defer resp.Body.Close()
    body, err := ioutil.ReadAll(resp.Body)
    if err != nil {
    }


    //return string(body)
    Channel <- string(body)

}

func parse()  {
    header := map[string]string{
        "Host": "movie.douban.com",
        "Connection": "keep-alive",
        "Cache-Control": "max-age=0",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Referer": "https://movie.douban.com/top250",
    }


    //循环每页解析并把结果写入excel
    for i:=0;i<10;i++{

        fmt.Println("正在抓取第"+strconv.Itoa(i)+"页......")
        url := "https://movie.douban.com/top250?start="+strconv.Itoa(i*25)+"&filter="
        spider := &Spider{url, header}
        go spider.get_html_header()

    }

    fmt.Print("抓取完事了")
}

func process(html string){


    //html := spider.get_html_header()
    //评价人数
    pattern2:=`(.*?)评价`
    rp2 := regexp.MustCompile(pattern2)
    find_txt2 := rp2.FindAllStringSubmatch(html,-1)

    //评分
    pattern3:=`property="v:average">(.*?)`
    rp3 := regexp.MustCompile(pattern3)
    find_txt3 := rp3.FindAllStringSubmatch(html,-1)

    //电影名称
    pattern4:=`alt="(.*?)" src="`
    rp4 := regexp.MustCompile(pattern4)
    find_txt4 := rp4.FindAllStringSubmatch(html,-1)

    // 写入UTF-8 BOM 防止乱码
    File.WriteString("\xEF\xBB\xBF")
    //  打印全部数据和写入excel文件
    for i:=0;i

 

你可能感兴趣的:(golang,协程)