golang爬虫登录

www.changtu.com

实现登录页面的抓取,http请求登录,下面直接贴代码:

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
	"regexp"

)

type MySpider struct {
	indexUrl string
}

//实现GET请求函数
func (this MySpider) readUrlBody() (string, error) {
	resp, err := http.Get(this.indexUrl)
	if err != nil {
		return "err", err
	}
	defer resp.Body.Close()
	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return "err", err
	}
	return string(body), err
}

//抓取登录的域名地址
func (this MySpider) catchCategoryUrl() []string {
	body, _ := this.readUrlBody()
	rcg := regexp.MustCompile(`class="catalog_sec_btn clearfix mgt15">(?sU:.*)(?sU:.*)(.*?)(?sU:.*)

`) result1 := rcg1.FindAllStringSubmatch(body1, -1) for i := range result1 { line1 := result1[i] fmt.Println("<<======>>", line1[1]) } // fmt.Println(result1) return "" } // fmt.Println(string(body)) fmt.Println("<<======>>登录失败") return "" } //验证目前是否处于未登录状态 func (this MySpider) catchLoginInfo() string { body, _ := this.readUrlBody() rcg := regexp.MustCompile(`
(?sU:.*)

(.*?)

(?sU:.*)
`) result := rcg.FindAllStringSubmatch(body, -1) // fmt.Println("body=", body) for i := range result { line := result[i] fmt.Println("<<======>>", line[1]) } return "" } func (this MySpider) run() string { cateUrls := this.catchCategoryUrl() for _, u := range cateUrls { this.indexUrl = u this.catchLoginInfo() this.getData() break } return "" } func main() { ms := new(MySpider) ms.indexUrl = "https://www.changtu.com" ms.run() }

 

你可能感兴趣的:(golang爬虫登录)