Go语言实现获取有道网页结果

在获取像百度、有道这样的翻译网页,可能网页本身会有反获取机制,所以会一直输出550/998/997这样的错误,此时就需要动态生成参数,来不断变化sign让服务器识别该次访问不是机器,而是人为访问,此时反获取机制就无效。具体代码参考如下:破解反获取机制就是图中的字符串加密等操作。

package main

import (
	"bytes"
	"crypto/md5"
	"encoding/hex"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"log"
	"math/rand"
	"net/http"
	"os"
	"strconv"
	"strings"
	"sync"
	"time"
)

type DictRequest struct {
	TransType string `json:"trans_type"`
	Source    string `json:"source"`
	UserID    string `json:"user_id"`
}
type DictResponse struct {
	TranslateResult [][]struct {
		Tgt string `json:"tgt"`
		Src string `json:"src"`
	} `json:"translateResult"`
	ErrorCode   int    `json:"errorCode"`
	Type        string `json:"type"`
	SmartResult struct {
		Entries []string `json:"entries"`
		Type    int      `json:"type"`
	} `json:"smartResult"`
}

func encrypt(str string) string { //md5 加密函数, 传入字符串,返回加密后的字符串
	h := md5.New()
	h.Write([]byte(str))
	return hex.EncodeToString(h.Sum(nil))
}

func query_youdao(word string) {
	t := time.Now().UnixMilli()     //获取时间戳
	lts := strconv.FormatInt(t, 10) //时间戳就是lts
	rand.Seed(time.Now().UnixNano())
	salt := lts + strconv.Itoa(rand.Intn(9))                                //lts + 随机数就是salt
	sign := encrypt("fanyideskweb" + word + salt + "Ygy_4c=r#e#4EX^NUGUc5") //对这些字符串 进行md5加密,返回就是sign
	client := &http.Client{}                                                //创建了一个http client,可以携带很多参数
	
	//var data = bytes.NewReader(buf)
	var data = strings.NewReader("i=" + word + "&from=AUTO&to=AUTO&smartresult=dict&client=fanyideskweb&salt=" + salt + "&sign=" + sign + "<s=" + lts + "&bv=d60b9bede0ddd264422f25a5e061c49a&doctype=json&version=2.1&keyfrom=fanyi.web&action=FY_BY_REALTlME")
	req, err := http.NewRequest("POST", "https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule", data)
	if err != nil {
		log.Fatal(err)
	}
	req.Header.Set("Connection", "keep-alive")
	req.Header.Set("Pragma", "no-cache")
	req.Header.Set("Cache-Control", "no-cache")
	req.Header.Set("sec-ch-ua", `" Not;A Brand";v="99", "Google Chrome";v="97", "Chromium";v="97"`)
	req.Header.Set("Accept", "application/json, text/javascript, */*; q=0.01")
	req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
	req.Header.Set("X-Requested-With", "XMLHttpRequest")
	req.Header.Set("sec-ch-ua-mobile", "?0")
	req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36")
	req.Header.Set("sec-ch-ua-platform", `"Windows"`)
	req.Header.Set("Origin", "https://fanyi.youdao.com")
	req.Header.Set("Sec-Fetch-Site", "same-origin")
	req.Header.Set("Sec-Fetch-Mode", "cors")
	req.Header.Set("Sec-Fetch-Dest", "empty")
	req.Header.Set("Referer", "https://fanyi.youdao.com/")
	req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")
	req.Header.Set("Cookie", "OUTFOX_SEARCH_USER_ID_NCOO=571199853.2191676; _ntes_nnid=345417059c531595fb4fe238fef920d8,1629898865976; [email protected]; JSESSIONID=aaaAUKGsqk_QBgByiQJcy; fanyi-ad-id=305838; fanyi-ad-closed=1; ___rl__test__cookies=1652014252491")
	resp, err := client.Do(req)
	if err != nil {
		log.Fatal(err)
	}
	defer resp.Body.Close()
	bodyText, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		log.Fatal(err)
	}
	if resp.StatusCode != 200 {
		log.Fatal("bad StatusCode:", resp.StatusCode, "body", string(bodyText))
	}

	var dictResponse DictResponse
	err = json.Unmarshal(bodyText, &dictResponse)
	if err != nil {
		log.Fatal(err)
	}
	//fmt.Printf("%#v\n", dictResponse)
	//fmt.Println(word, "UK:", dictResponse.SmartResult.Entries, "US:", dictResponse.SmartResult.Type)
	for _, item := range dictResponse.SmartResult.Entries {
		fmt.Println(item)
	}
}

func main() {
	if len(os.Args) != 2 {
		fmt.Fprintf(os.Stderr, `usage: simpleDict WORD
example: simpleDict hello
		`)
		os.Exit(1)
	}
	word := os.Args[1]
	query_youdao(word)
}

你可能感兴趣的:(GoLang,golang,开发语言,爬虫)