初学go语言开发第一个爬虫项目

package main
 
import (
	"fmt"
	"net/http"
	"os"
	"regexp"
	"strconv"
	"strings"
)
 
func HttpGet(url string) (result string, err error) {
	resp, err1 := http.Get(url)//发送Get请求
	if err1 != nil {
		err = err1
		return
	}
	defer resp.Body.Close()
    //读取网页内容
	buf := make([]byte, 4*1024)
	for {
		n, err := resp.Body.Read(buf)
		if n == 0 {
			fmt.Println("resp.Body.Read err=", err)
			break
		}
		result += string(buf[:n])
	}
	return
}
func SpiderOneJoy(url string) (title, content string, err error) {
	result, err1 := HttpGet(url)
	if err1 != nil {
		err = err1
		return
	}
    //取标题:

标题

re := regexp.MustCompile(`

(?s:(.*?))

`) if re == nil { err = fmt.Errorf("%s", "regexp.MustCompile err") return } tmpTitle := re.FindAllStringSubmatch(result, 1)//1过滤一个 for _, data := range tmpTitle { title = data[1] title = strings.Replace(title, "\t", "", -1) break // 防止有多次直接break } //取内容: re = regexp.MustCompile(`

 

你可能感兴趣的:(go)