Golang碎碎念 bfs走迷宫和爬虫应用

广度优先算法bfs练习 走迷宫


const MAZE = `6 5 0 1 0 0 0 0 0 0 1 0 0 1 0 1 0 1 1 1 0 0 0 1 0 0 1 0 1 0 0 0`

// point
type point struct {
	i, j int
}

func getArr(maze string) [][]int {
	var row, col int
	read := strings.NewReader(maze)
	fmt.Fscanf(read, "%d %d", &row, &col)
	fmt.Println(row, col)
	arr := make([][]int, row)
	for i := range arr {
		arr[i] = make([]int, col)
		for j := range arr[i] {
			fmt.Fscanf(read, "%d", &arr[i][j])
		}
	}
	return arr
}

// 方向数组,逆时针
var po = []point{
	{-1, 0}, {0, -1}, {1, 0}, {0, 1},
}

// 坐标移动
func (p point) add(r point) point {
	return point{p.i + r.i, p.j + r.j}
}

// 符合节点判断
func (p point) at(grid [][]int) (int, bool) {
	if p.i < 0 || p.i > len(grid)-1 {
		return 0, false
	}
	if p.j < 0 || p.j > len(grid[p.i])-1 {
		return 0, false
	}
	return grid[p.i][p.j], true
}

// 广搜算法
func bfs(a [][]int, start, end point) [][]int {
	// 初始化step和q队列
	steps := make([][]int, len(a))
	for i := range steps {
		steps[i] = make([]int, len(a[0]))
	}
	Q := []point{start}
	// 队列里有值,继续往下搜
	for len(Q) > 0 {
		curr := Q[0]
		Q = Q[1:]
		// 结束条件
		if curr == end {
			break
		}
		for _, p := range po {
			next := curr.add(p)
			val, ok := next.at(a)
			if !ok || val == 1 {
				continue
			}
			val, ok = next.at(steps)
			if !ok || val != 0 {
				continue
			}
			if next == start {
				continue
			}
			// 当前标记
			currNum, _ := curr.at(steps)
			steps[next.i][next.j] = currNum + 1
			Q = append(Q, next)
		}
	}
	return steps
}

func main() {
	arr := getArr(MAZE)
	for _, row := range arr {
		for _, val := range row {
			fmt.Printf("%d ", val)
		}
		fmt.Println()
	}
	fmt.Println()
	steps := bfs(arr, point{0, 0}, point{len(arr), len(arr[0])})
	for i := range steps {
		for j := range steps[i] {
			fmt.Printf("%d ", steps[i][j])
		}
		fmt.Println()
	}
}

用bfs获取嵌套网页

以一个小说网站为例,仅供学习交流。

抽象请求结构


type Request struct {
	Url      string
	Cookie   string
	ParseUrl func(b []byte, req *Request) *ParseRes
}
type ParseRes struct {
	Requests []*Request
}

抽象方法接口


type Fetcher interface {
	Get(req *type_.Request) ([]byte, error)
}
type BrowserFetch struct{}

func (b *BrowserFetch) Get(req *type_.Request) ([]byte, error) {
	client := &http.Client{}
	request, err := http.NewRequest("GET", req.Url, nil)
	if err != nil {
		log.Println(err.Error())
	}
	request.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36")
	if len(req.Cookie) > 0 {
		request.Header.Set("Cookie", req.Cookie)
	}
	res, err := client.Do(request)
	defer res.Body.Close()
	if err != nil {
		log.Println(err.Error())
	}
	body := bufio.NewReader(res.Body)
	e := DeterminEncode(body)
	utf8Read := transform.NewReader(body, e.NewEncoder())
	return ioutil.ReadAll(utf8Read)
}

// DeterminEncode utf8编码
func DeterminEncode(read *bufio.Reader) encoding.Encoding {
	peek, err := read.Peek(1024)
	if err != nil {
		return unicode.UTF8
	}
	e, _, _ := charset.DetermineEncoding(peek, "")
	return e
}

算法的实现

package main

import (
	"fmt"
	"regexp"
	"test/xiangqin/fetch_"
	"test/xiangqin/type_"
	"time"
)

const (
	Regurl = `(http://www.shuhai.com/book/.*?)"[^>]*>([^<]+)`
	cookie = "cookie值"
)

func main() {
	var workList []*type_.Request
	for i := 1; i <= 10; i++ {
		url := fmt.Sprintf("http://www.shuhai.com/shuku/0_0_0_0_0_0_0_%d.html", i)
		//fmt.Println(url)
		workList = append(workList, &type_.Request{
			Url:      url,
			Cookie:   cookie,
			ParseUrl: ParseFunc,
		})
	}
	// 开始广搜
	for len(workList) > 0 {
		items := workList
		workList = nil
		var f fetch_.Fetcher = &fetch_.BrowserFetch{}
		for _, item := range items {
			time.Sleep(time.Second)
			fmt.Println(item.Url)
			body, _ := f.Get(item)
			//fmt.Println(string(body))
			list := item.ParseUrl(body, item)
			//fmt.Println(list.Requests)
			workList = append(workList, list.Requests...)
		}
	}
}
func ParseFunc(body []byte, request *type_.Request) *type_.ParseRes {
	re := regexp.MustCompile(Regurl)
	//fmt.Println(string(body))
	bytes := re.FindAllSubmatch(body, -1)
	var req = &type_.ParseRes{}
	for _, u := range bytes {
		fmt.Println(string(u[1])) // 0 1 2 全部 第一个 第二个
		req.Requests = append(req.Requests, &type_.Request{
			Url:    string(u[1]),
			Cookie: request.Cookie,
		})
	}
	return req
}

你可能感兴趣的:(碎碎念,golang,宽度优先,算法,爬虫)