《python网络爬虫——大学排名》

代码如下:

import requests, bs4
import tkinter
from bs4 import BeautifulSoup


def getHtmlText(url):
    try:
        html = requests.get(url)
        html.raise_for_status()
        html.encoding = html.apparent_encoding
        return html.text
    except:
        print(' ')


def parserHtml(html):
    soup = BeautifulSoup(html, "html.parser")
    return soup

#名次不用抓,从tds【1】开始#
def makeList(list, soup):
    for tr in soup.find('tbody').children:
        if isinstance(tr, bs4.element.Tag):
            tds = tr('td')
            list.append([tds[1].string, tds[2].string, tds[3].string])
    return list

#名次从i+1开始输出#
def printform(list, num):
    tplt = '{0:^8}\t{1:{4}^16}\t{2:{4}^16}\t{3:^8}'
    univList.insert(0, tplt.format('排名', '学校', '所在地', '总分', chr(12288)))
    for i in range(num):
        u = list[i]
        univList.insert('end', str(tplt.format(i + 1, u[0], u[1], u[2], chr(12288))))


def inquire():
    try:
        list = []
        num = int(varA.get())
        year=varB.get()

        '''year = varB.get() if varB.get() > '2015' else '2016'
        只能查询有16和17年的排名,'''

        url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming' + year + '.html'
        tplt = '{0:^8}\t{1:{4}^16}\t{2:{4}^16}\t{3:^8}'
        univList.insert(0, tplt.format('排名', '学校', '所在地', '总分', chr(12288)))
        html = getHtmlText(url)
        soup = parserHtml(html)
        ulist = makeList(list, soup)
        univList.delete(0, 'end')   #防止未点重置再次查询,帮用户重置#
        printform(ulist, num)
    except:
        pass


def reset():
    varA.set('')
    varB.set('')
    univList.delete(0, 'end')


root = tkinter.Tk()
root.title('历年大学排名查询')
varA = tkinter.StringVar()
varB = tkinter.StringVar()

My_label=tkinter.Label(text='目前只能查询16和17年的排名,16年有310所,17年有500所!',bitmap='info',compound='left',fg='red').grid(row=0)

FrameA = tkinter.LabelFrame(root, text='请输入大学数量和查询年份',fg='red4')
FrameA.grid(row=1, column=0)
tkinter.Label(FrameA, text='大学数量').grid(row=0, column=0, pady=5)
tkinter.Entry(FrameA, textvariable=varA).grid(row=0, column=1, pady=5)
tkinter.Label(FrameA, text='查询年份').grid(row=1, column=0, pady=5)
tkinter.Entry(FrameA, textvariable=varB).grid(row=1, column=1, pady=5)

FrameB = tkinter.LabelFrame(root, text='操作区',fg='blue')
FrameB.grid(row=1, column=1)
tkinter.Button(FrameB, text='查询',bg='green', command=inquire).pack()
tkinter.Button(FrameB, text='重置',bg='yellow',command=reset).pack()
tkinter.Button(FrameB, text='退出',bg='red', command=lambda: quit()).pack()

univList = tkinter.Listbox(root, width=80,height=30)
univList.grid(row=2, columnspan=2)

scroll=tkinter.Scrollbar(root,command=univList.yview)
scroll.grid(row=2,column=3,sticky='ns')
univList.configure(yscrollcommand=scroll.set)
root.mainloop()


你可能感兴趣的:(python,爬虫)