用python爬取一部小说的前面51章节


#/usr/bin/python
#coding=utf-8

import requests
from bs4 import BeautifulSoup

def handleUrl(n):
      #网址是错的
    baseUrl = "http://www.baidu.com/books/74/74873/";
    baseNum = 13099890;
    url = baseUrl + str(n+baseNum)+ '.html';

    print("url--%s"%(url));
    return url;

def handleHtml(n):
    print(n);   
    url = handleUrl(n);
    response = requests.get(url)
    response.encoding = response.apparent_encoding
    print(response);

    if response.status_code == 200:
        # print("获取网页成功:%s",response.text);
        return response.text;
    else:
        print("获取网页失败");
        return None;

def parserText(n):

    result = handleHtml(n);
    if len(result) >0:
        soup = BeautifulSoup(result,'html.parser')
        soup = soup.find(id = "contents");
        # print(soup.find(id ='contents'));

        text = soup.get_text();
        # print(text);
        return text;

    else:
        return None;    

if __name__ == "__main__":
    
    for x in range(0,51):
        text = parserText(x);   
        fileName = "/Users/hbddz/Desktop/小说.text";
        with open(fileName,'a') as f:
            f.write(text);

你可能感兴趣的:(用python爬取一部小说的前面51章节)