golang 使用线程池进行高并发爬虫

golang 使用线程池进行高并发爬虫

gospider 介绍

gospider 是一个golang 爬虫神器,拥有python到golang爬虫过渡的所有必需库。用于python爬虫从业者快速且无坑的过渡到golang

安装

go get -u gitee.com/baixudong/gospider

gitee地址

https://gitee.com/baixudong/gospider

github地址

https://github.com/baixudong007/gospider

代码示例

package main

import (
	"context"
	"log"
	"net/url"

	"gitee.com/baixudong/gospider/requests"
	"gitee.com/baixudong/gospider/thread"
)

var reqCli *requests.Client

func init() {
	var err error
	reqCli, err = requests.NewClient(nil, requests.ClientOption{
		GetProxy: func(ctx context.Context, url *url.URL) (string, error) { //自动获取代理
			return "", nil
		},
	})
	if err != nil {
		log.Panic(err)
	}
}
func test(ctx context.Context, num int) {
	log.Printf("第%d个请求开始", num)
	resp, err := reqCli.Request(ctx, "get", "http://myip.top")
	if err != nil {
		log.Printf("第%d个请求失败%s", num, err.Error())
	} else {
		log.Printf("第%d个请求成功%d", num, resp.StatusCode())
	}
	log.Printf("第%d个请求结束", num)
}
func main() {
	threadCli := thread.NewClient(nil, 3) //限制并发为3
	for i := 0; i < 10; i++ {
		//读取任务
		threadCli.Write(&thread.Task{
			Func: test,
			Args: []any{i},
		})
	}
	threadCli.Join()
}

你可能感兴趣的:(golang,爬虫,python)