python模拟百度搜索

一:必须要懒

import requests
from bs4 import BeautifulSoup
import urllib


ip = input("请输入代理ip(无需要请输入127.0.0.1):")

proxies = {
    'https': ip,
}
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
    'Connection': 'keep-alive',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'}

content = input('请输入搜索内容:')
initial_content = content
content_code = urllib.request.quote(content)  # 解决中文编码的问题
var = 1
while var > 0:
    print('输入 1 即退出')
    page = int(input('请输入页数:')) - 1
    pn = page * 10
    url = 'https://www.baidu.com/s?wd=' + content_code + '&pn=' + str(pn)
    r=requests.get(url,headers=headers,proxies=proxies)

    soup = BeautifulSoup(r.text,features="html.parser")

    #页数,需要改进
    target = soup.find(id="page")
    ye_list = target.find_all('a')
    for y in ye_list:
        s = y.find_all(class_='pc')
        print(s)

    url_list=soup.select('div.result > h3 > a')
    for i in url_list:
        print(i.getText())
        print(i.attrs.get("href"))
    var = page
else:
    print("Good bye!")

小编QQ:944420535

Github

页数需要改进

你可能感兴趣的:(python模拟百度搜索)