python-爬虫

爬虫数据
代码

# -*-coding:utf-8 -*-
# BY WANGCC

import requests
from bs4 import BeautifulSoup

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:48.0) Gecko/20100101 Firefox/48.0', }


def get_html(url):
    r = requests.get(url, headers=headers)
    html = r.content
    return html


def get_juzi(html):
    soup = BeautifulSoup(html, "lxml")
    juzilist = soup.find_all('a', class_="xlistju")
    for x in juzilist:
        print(x.get_text())
        print("\n")


def get_title(html):
    soup = BeautifulSoup(html, "lxml")
    print(soup.title.get_text().replace('_句子迷', ''))


if __name__ == '__main__':
    # url = 'http://www.juzimi.com/article/316132?page=0' url 的模式
    for item in range(8):  # 这里是手动模式 ^_^
        url = 'https://www.juzimi.com/article/20657?page=%s' % item
        html = get_html(url)
        if item == 0:
            get_title(html)
        get_juzi(html)

有参考网上的代码,但目前还有问题,需要明天调整

你可能感兴趣的:(python-爬虫)