百度指数代码

import requests
from urllib import parse
import time
import random
import json
from fake_useragent import UserAgent

class IndexSpider(object):
def init(self):
self.url = ‘http://index.baidu.com/api/SearchApi/index?area=913&word={}&startDate=2019-{}-{}&endDate=2019-{}-{}’
self.i = 0

def get_headers(self):
    ua = UserAgent()
    headers = {
        'Cookie': 'BIDUPSID=D344F0C8E43042FD7A4F1163A6CE12FE; PSTM=1573874676; BAIDUID=D344F0C8E43042FD08776EA5228928C6:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDUSS=pnaGpUUmg4N2NHQ3hDblRkWE5aejFFbk1DRnlXeUFvR05KN0VKdnB5M1Uwd0plSVFBQUFBJCQAAAAAAAAAAAEAAAAmaKZqAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAANRG213URttdNm; CHKFORREG=d741aa86ceb3a17ef4d801868e250147; bdindexid=5id2ebs6rqk5ecq89blfh3nk71; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=2; BDSFRCVID=db-sJeCCxG3JggRwdPf5XAy9t8FOeQZRddMu3J; H_BDCLCKID_SF=tR30WJbHMTrDHJTg5DTjhPrMbMKLbMT-027OKKOF5b3CfnbMb6oiLJc-5a7lW-QIyHrb0p6athF0HPonHj_WejbP; Hm_lvt_d101ea4d2a5c67dab98251f0b5de24dc=1574404993,1574651565,1574651689,1574653563; Hm_lpvt_d101ea4d2a5c67dab98251f0b5de24dc=1574658537; H_PS_PSSID=1448_21096_18559_20882_29567_29221',
        'User-Agent': ua.random,
    }
    return headers

def get_json(self, url):
    html_json = requests.get(url=url,
                             headers=self.get_headers(),
                             proxies={
                                 'http':'http://157.245.205.81:8080',
                                 'https':'https://157.245.205.81:8080'
                             }
                             ).text
    html_json = json.loads(html_json)

    return html_json

def parse_html(self, url):
    one_html = self.get_json(url)
    index = one_html['data']['generalRatio'][0]['all']['avg']
    print(index)
    self.i += 1
    with open('index_dianli.txt', 'a') as f:
        f.write(str(index) + '\n')

def run(self):
    word = input('请输入要搜素的关键词:')
    word = parse.quote(word)
    for month in range(5, 11):
        for day in range(1, 31):
            url = self.url.format(word, month, day, month, day)
            self.parse_html(url)

    print(self.i)
    time.sleep(random.randint(5, 10))

if name == ‘main’:
spider = IndexSpider()
spider.run()
`>

你可能感兴趣的:(百度指数代码)