宝可梦的数据爬取

漫漫五一假期,我又拾起儿时回忆,对于口袋妖怪我还是有着久远的回忆的,但是关于口袋妖怪的种族值,除了能记住大致的,其他真的很难全部记住!!!今天,我终于忍无可忍,决定在五一把他彻底解决,制作利用爬虫来帮我实现宝可梦的种族值一览!
代码如下:

import re
import xlwt
import requests
from bs4 import BeautifulSoup


def main():
    data = []
    url = "http://www.pokemon.name/wiki/宝可梦种族值表%EF%BC%88第五世代%EF%BC%89"
    data = homepage(url)
    savepath = ".\\图鉴.xls"
    saveData(data, savepath)


def homepage(url):
    html = requests.get(url)
    soup = BeautifulSoup(html.text, "html.parser")
    pkm = []
    for item in soup.find_all('div', class_="mw-content-ltr"):
        item = str(item)
        findnum = re.compile(r'\n{0,1}(\d{3})\n{0,1}\n{0,1}(.*?)\n{0,1}(.{2,10})(.*?)\n{0,1}\n{0,1}(\d{2,3})\n{0,1}(\d{2,3})\n{0,1}(\d{2,3})\n{0,1}(\d{2,3})\n{0,1}(\d{2,3})\n{0,1}(\d{2,3})\n{0,1}(\d{2,3})\n{0,1}')
        num = re.findall(findnum, item)

        for items in num:
            data = []
            data.append(items[0])
            names = items[1]+items[2]+items[3]
            data.append(names)
            for i in range(4, 11):
                data.append(items[i])
            print(data)
            pkm.append(data)
    return pkm


def saveData(data, savepath):
    if __name__ == '__main__':
        book = xlwt.Workbook(encoding="utf-8")
        sheet = book.add_sheet('口袋妖怪数据表', cell_overwrite_ok=True)
        col = ("编号", "中文", "HP", "攻击", "防御", "特攻", "特防", "速度", "总计")
        for i in range(0, len(col)):
            sheet.write(0, i, col[i])
        for i in range(0, len(data)):
            datas = data[i]
            for j in range(0, len(col)):
                sheet.write(i+1, j, datas[j])

        book.save(savepath)


if __name__ == "__main__":
    main()

你可能感兴趣的:(爬虫,口袋妖怪)