反反爬虫数据提取(有道,人人网)

import requests
import hashlib
import time



class Youdao(object):

    def __init__(self):
        self.url = 'https://dict.youdao.com/webtranslate'
        # self.url2 = 'https://dict.youdao.com/keyword/key'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.76',
            'Cookie': 'OUTFOX_SEARCH_USER_ID_NCOO = 1517347445.9602115;OUTFOX_SEARCH_USER_ID = 1953018217 @ 36.143.41.135;YOUDAO_MOBILE_ACCESS_TYPE = 0',
            'Referer':'https: // fanyi.youdao.com /'
        }
        self.formdata = None
        # self.formdata2 = None

    def generate_formdata(self,word):

        '''
        return {
                    sign: h(t, e),
                    client: d,
                    product: u,
                    appVersion: p,
                    vendor: b,
                    pointParam: m,
                    mysticTime: t,
                    keyfrom: f
                }
        '''
        t = str(int(time.time() * 1000))
        # `client=${d}&mysticTime=${e}&product=${u}&key=${t}`
        md5_sign_=f'client=fanyideskweb&mysticTime={word}&product=webfanyi&key={t}'
        md5_sign=hashlib.md5()
        md5_sign.update(md5_sign_.encode())
        sign_md5=md5_sign.hexdigest()

        self.formdata = {
            "i": word,
            "from": " auto",
            "to": "",
            "domaind": "0",
            "dictResult": "true",
            "keyid": "webfanyi",
            "sign": sign_md5,
            "client": "fanyideskweb",
            "product": "webfanyi",
            "appVersion": "1.0.0",
            "vendor": "web",
            "pointParam": "client, mysticTime, product",
            "mysticTime": t,
            "keyfrom": "fanyi.web"
        }
        # self.formdata2 = {
        #     "text":word
        # }
        print(self.formdata)

    def get_data(self):
        requests.post(self.url, data=self.formdata, headers=self.headers)
        # response2 = requests.post(self.url2, data=self.formdata2, headers=self.headers)
        return response2.content

    def run(self,word):
        # url

        # HEADERS

        # FROMdata
        self.generate_formdata(word)
        # 发送数据
        data = self.get_data()
        # 解析数据
        print(data.decode())

if __name__ == '__main__':
    youdao=Youdao()
    youdao.run('人生苦短')
import hashlib


data='python37'
# 创建hash对象
md5=hashlib.md5()

# 向hash对象中添加需要做hash运算的字符串
md5.update(data.encode())

# 获取字符串的hash值
result=md5.hexdigest()
print(result)

你可能感兴趣的:(爬虫,selenium,python)