python 爬虫 lxml基础代码保存文件

爬虫基础代码 保存为文件

import requests  # 导入requests包
from lxml import html

# 80:E8:2C:DE:61:0D

def wirte(filename,data):
    with open(filename,'a+',encoding='utf-8') as file:
        file.write(data)
        file.write("\n")
        file.close()

def getData(url):

    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                             'Chrome/95.0.4638.54 Safari/537.36 Edg/95.0.1020.30'}
    strhtml = requests.get(url, headers=headers)  # Get方式获取网页数据
    data = html.fromstring(strhtml.content)
    div=data.xpath("//div[@class='indent']/table")
    listtitle = ['bookName','personName','sum','content']
    wirte('demo.txt', ",".join(listtitle))
    for table in div:
        list = []
        bookName=table.xpath(".//div[@class='pl2']/a/text()")
        bookName=",".join(bookName).replace(" ","").replace("\n","")
        personName=table.xpath(".//p[@class='pl']/text()")
        personName = ",".join(personName).replace(" ","").replace("\n","")
        sum=table.xpath(".//span[@class='pl']/text()")
        sum = ",".join(sum).replace(" ","").replace("\n","")
        content = table.xpath(".//span[@class='inq']/text()")

        content = ",".join(content).replace(" ","").replace("\n","")
        list.append(bookName)
        list.append(personName)
        list.append(sum)
        list.append(content)
        print(list)
        wirte('demo.txt',",".join(list))

if __name__ == '__main__':
    a = 0
    for i in range(10):
        url='https://book.douban.com/top250?start='+str(a)
        a=a+25;
        getData(url)

你可能感兴趣的:(python,爬虫基础,python)