go spider example

package main

import (

"github.com/PuerkitoBio/goquery"

"github.com/hu17889/go_spider/core/common/page"

"github.com/hu17889/go_spider/core/pipeline"

"github.com/hu17889/go_spider/core/spider"

)

type MyPageProcesser struct {

}

func NewMyPageProcesser() *MyPageProcesser {

return &MyPageProcesser{}

}

// Parse html dom here and record the parse result that we want to Page.

// Package goquery (http://godoc.org/github.com/PuerkitoBio/goquery) is used to parse html.

func (this *MyPageProcesser) Process(p *page.Page) {

query := p.GetHtmlParser()

query.Find("td div[class='flex-middle']").Each(func(i int, s *goquery.Selection) {

println(s.Text())

})

}

func (*MyPageProcesser) Finish() {

}

func main() {

spider.NewSpider(NewMyPageProcesser(), "TaskName").

AddUrl("http://101.200.54.63/", "html").    // start url, html is the responce type ("html" or "json")

AddPipeline(pipeline.NewPipelineConsole()). // print result on screen

SetThreadnum(3).                            // crawl request by three Coroutines

Run()

}

你可能感兴趣的:(go spider example)