爬虫:研招网考研信息查询工具

作者接下来大三,可能准备考研了。想先做一下考前准备,看看那些学校可以报名。分别考那些科目,招多少人,学校在什么地方等等这些信息。

我们程序的目标网站是研招网:
直接在研招网上查询,非常麻烦,查询一下有时会跳出几千个结果,就是几千个网页,一个一个点开这些网页查看,还是非常麻烦的。我们做一个整合程序,将所有信息全部整合到一个excel中,便于我们筛选。

做这个工具的原因:

。所以作者做了这个工具,可以将所有信息整合到excel当中,方便我进行数据筛选。给大家看看示意图:

首先我们来进行抓包:
由这个包我们可以看出到它的目标地址、请求类型是post、和它post请求所需传递的参数。

代码编写:

首先导入所需的库
import tkinter
import re
from tkinter import ttk
import requests
from lxml import etree
import time
import threading
from queue import Queue
import xlwt
界面制作
win = tkinter.Tk()  # 构造窗体
win.geometry('450x350') #窗体大小设置
l=tkinter.Label(win,text="地区")#标签设置(窗体,文本,背景,字体属性(字体类型,大小),标签宽度,标签高度)
l.pack() #显示这个标签
下拉框0 所在城市选择
comvalue0 = tkinter.StringVar()  # 窗体自带的文本,新建一个值
comboxlist0 = ttk.Combobox(win, textvariable=comvalue0)  # 初始化
comboxlist0["values"] = ("不限","(11)北京市", "(12)天津市","(13)河北省","(14)山西省","(15)内蒙古自治区","(21)辽宁省","(22)吉林省","(23)黑龙江省","(31)上海市",
                         "(32)江苏省","(33)浙江省","(34)安徽省","(35)福建省","(36)江西省","(37)山东省","(41)河南省","(42)湖北省","(43)湖南省","(44)广东省",
                         "(45)广西壮族自治区","(46)海南省","(50)重庆市","(51)四川省","(52)贵州省","(53)云南省","(54)西藏自治区","(61)陕西省","(62)甘肃省","(63)青海省",
                         "(64)宁夏回族自治区","(65)新疆维吾尔自治区")
comboxlist0.pack()
l1=tkinter.Label(win,text="门类类别")#标签设置(窗体,文本,背景,字体属性(字体类型,大小),标签宽度,标签高度)
l1.pack() #显示这个标签
下拉框1 专硕学硕专业门类选择
comvalue1 = tkinter.StringVar()  # 窗体自带的文本,新建一个值
comboxlist1 = ttk.Combobox(win, textvariable=comvalue1)  # 初始化
comboxlist1["values"] = ("专硕", "(01)哲学","(02)经济学","(03)法学","(04)教育学","(05)文学","(06)历史学","(07)理学","(08)工学","(09)农学","(10)医学"
                         ,"(11)军事学","(12)管理学","(13)艺术学")
# comboxlist.current(0)  # 选择第一个
comboxlist1.bind("<>", zyly)  # 绑定事件,(下拉列表框被选中时,绑定go()函数)
# comboxlist1.current(0)  # 选择第一个
comboxlist1.pack()

l2=tkinter.Label(win,text="专业领域")#标签设置(窗体,文本,背景,字体属性(字体类型,大小),标签宽度,标签高度)
l2.pack() #显示这个标签
下拉框2 专业领域下拉框显示

这里只显示下拉框,下拉框的内容显示要靠下拉框1(学硕专硕专业门类)的选择而改变

comvalue2 = tkinter.StringVar()  # 窗体自带的文本,新建一个值
comboxlist2 = ttk.Combobox(win, textvariable=comvalue2)  # 初始化

comboxlist2.pack()
l3=tkinter.Label(win,text="学习方式")#标签设置(窗体,文本,背景,字体属性(字体类型,大小),标签宽度,标签高度)
l3.pack() #显示这个标签
下拉框3 全日制和非全日制选择
comvalue3 = tkinter.StringVar()  # 窗体自带的文本,新建一个值
comboxlist3 = ttk.Combobox(win, textvariable=comvalue3)  # 初始化
comboxlist3["values"] = ("不限","全日制", "非全日制")
comboxlist3.pack()
l4=tkinter.Label(win,width=20,height=1)#标签设置(窗体,文本,背景,字体属性(字体类型,大小),标签宽度,标签高度)
l4.pack() #显示这个标签
var2=tkinter.StringVar()
var2.set("查询结果显示")
l5=tkinter.Label(win,textvariable=var2,bg="red",font=('Arial',12),width=25,height=2)#标签设置(窗体,文本,背景,字体属性(字体类型,大小),标签宽度,标签高度)
l5.pack() #显示这个标签
#按钮  搜索相关信息到本地
b=tkinter.Button(win,text="查询到本地",width=8,height=1,command=infos)#按钮设置(窗体,文本,背景,字体属性(字体类型,大小),标签宽度,标签高度,调用函数)
b.pack()
win.mainloop()  # 进入消息循环

根据不同专硕学硕专业门类的选项,专业领域的下拉框内容会做出对应的变化
def zyly(*args):  # 专业领域下拉框设置
    value=comboxlist1.get()
    if value=="专硕":

        comboxlist2["values"] = ("(0251)金融", "(0252)应用统计", "(0253)税务", "(0254)国际商务","(0255)保险","(0256)资产评估"
                                 ,"(0257)审计","(0351)法律","(0352)社会工作","(0353)警务","(0451)教育","(0452)体育","(0453)汉语国际教育",
                                 "(0454)应用心理","(0551)翻译","(0552)新闻与传播","(0553)出版","(0651)文物与博物馆","(0851)建筑学","(0853)城市规划"
                                 ,"(0854)电子信息","(0855)机械","(0856)材料与化工","(0857)资源与环境","(0858)能源动力","(0859)土木水利","(0860)生物与医药",
                                 "(0861)交通运输","(0951)农业","(0952)兽医","(0953)风景园林","(0954)林业","(1051)临床医学","(1052)口腔医学",
                                 "(1053)公共卫生","(1054)护理","(1055)药学","(1056)中药学","(1057)中医","(1151)军事","(1251)工商管理","(1252)公共管理",
                                 "(1253)会计","(1254)旅游管理","(1255)图书情报","(1256)工程管理","(1351)艺术")
    elif value=="(01)哲学":
        #(0101)哲学
        comboxlist2["values"] = ("(0101)哲学")
    elif value=="(02)经济学":
        comboxlist2["values"] = ("(0201)理论经济学","(0202)应用经济学","(0270)统计学")
    elif value == "(03)法学":
        comboxlist2["values"] = ("(0301)法学", "(0302)政治学", "(0303)社会学","(0304)民族学", "(0305)马克思主义理论", "(0306)公安学")
    elif value == "(04)教育学":
        comboxlist2["values"] = ("(0401)教育学", "(0402)心理学", "(0403)体育学", "(0471)")
    elif value == "(05)文学":
        comboxlist2["values"] = ("(0501)中国语言文学", "(0502)外国语言文学", "(0503)新闻传播学")
    elif value == "(06)历史学":
        comboxlist2["values"] = ("(0601)考古学", "(0602)中国史", "(0603)世界史")
    elif value == "(07)理学":
        comboxlist2["values"] = ("(0701)数学", "(0702)物理学", "(0703)化学", "(0704)天文学", "(0705)地理学", "(0706)大气科学",
                                 "(0707)海洋科学", "(0708)地球物理学", "(0709)地质学", "(0710)生物学", "(0711)系统科学", "(0712)科学技术史",
                                 "(0713)生态学", "(0714)统计学", "(0771)心理学", "(0772)力学", "(0773)材料科学与工程", "(0774)电子科学与技术",
                                 "(0775)计算机科学与技术", "(0776)环境科学与工程", "(0777)生物医学工程", "(0778)基础医学",
                                 "(0779)公共卫生与预防医学", "(0780)药学", "(0781)中药学", "(0782)医学技术", "(0783)护理学", "(0784)",
                                 "(0785)", "(0786)")
    elif value == "(08)工学":
        comboxlist2["values"] = ("(0801)力学", "(0802)机械工程", "(0803)光学工程", "(0804)仪器科学与技术", "(0805)材料科学与工程", "(0806)冶金工程",
                                 "(0807)动力工程及工程热物理", "(0808)电气工程", "(0809)电子科学与技术", "(0810)信息与通信工程", "(0811)控制科学与工程", "(0812)计算机科学与技术",
                                 "(0813)建筑学", "(0814)土木工程", "(0815)水利工程", "(0816)测绘科学与技术", "(0817)化学工程与技术", "(0818)地质资源与地质工程", "(0819)矿业工程",
                                 "(0820)石油与天然气工程", "(0821)纺织科学与工程", "(0822)轻工技术与工程", "(0823)交通运输工程", "(0824)船舶与海洋工程", "(0825)航空宇航科学与技术"
                                 , "(0826)兵器科学与技术", "(0827)核科学与技术", "(0828)农业工程", "(0829)林业工程", "(0830)环境科学与工程", "(0831)生物医学工程", "(0832)食品科学与工程",
                                 "(0833)城乡规划学", "(0834)风景园林学", "(0835)软件工程", "(0836)生物工程", "(0837)安全科学与工程", "(0838)公安技术", "(0839)网络空间安全", "(0870)科学技术史",
                                 "(0871)管理科学与工程", "(0872)设计学")
    elif value == "(09)农学":
        comboxlist2["values"] = ("(0901)作物学", "(0902)园艺学", "(0903)农业资源与环境", "(0904)植物保护", "(0905)畜牧学", "(0906)兽医学", "(0907)林学",
                                 "(0908)水产", "(0909)草学", "(0970)科学技术史", "(0971)环境科学与工程", "(0972)食品科学与工程", "(0973)风景园林学")
    elif value == "(10)医学":
        comboxlist2["values"] = ("(1001)基础医学", "(1002)临床医学", "(1003)口腔医学", "(1004)公共卫生与预防医学", "(1005)中医学", "(1006)中西医结合", "(1007)药学",
                                 "(1008)中药学", "(1009)特种医学", "(1010)医学技术", "(1011)护理学", "(1071)科学技术史", "(1072)生物医学工程", "(1073)", "(1074)",)
    elif value == "(11)军事学":
        comboxlist2["values"] = ("(1101)军事思想及军事历史", "(1102)战略学", "(1103)战役学", "(1104)战术学", "(1105)军队指挥学",
                                 "(1106)军事管理学", "(1107)军队政治工作学", "(1108)军事后勤学", "(1109)军事装备学", "(1110)军事训练学")
    elif value == "(12)管理学":
        comboxlist2["values"] = ("(1201)管理科学与工程", "(1202)工商管理", "(1203)农林经济管理", "(1204)公共管理", "(1205)图书情报与档案管理")
    elif value == "(13)艺术学":
        comboxlist2["values"] = ("(1301)艺术学理论", "(1302)音乐与舞蹈学", "(1303)戏剧与影视学", "(1304)美术学", "(1305)设计学")
信息获取相关代码
def downing_infos(info_links):
    print(info_links)
    # input("下载")
    f = xlwt.Workbook(encoding="utf8")
    sheet01 = f.add_sheet(u'sheet1', cell_overwrite_ok=True)
    # 写标题
    sheet01.write(0, 0, '招生单位')  # excl里面:左边0:是横,右边:纵
    sheet01.write(0, 1, '院系所')
    sheet01.write(0, 2, '专业')
    sheet01.write(0, 3, '考试方式')
    sheet01.write(0, 4, '研究方向')
    sheet01.write(0, 5, '拟招人数')
    sheet01.write(0, 6, '学习方式')
    sheet01.write(0, 7, '政治')
    sheet01.write(0, 8, '外语')
    sheet01.write(0, 9, '业务课一')
    sheet01.write(0, 10, '业务课二')
    sheet01.write(0, 11, '所在地')

    header={
        "Cookie": "JSESSIONID=00915157320298F6A4E463EF80F4934F; _ga=GA1.3.1896669375.1599321681; _gid=GA1.3.1337310662.1599321681; zg_did=%7B%22did%22%3A%20%221745f002bbb14a-02130d69b55226-3323766-144000-1745f002bbc633%22%7D; aliyungf_tc=AQAAAOozbxkz6g0AIyMDcAwjIVeG9IbH; CHSICC_CLIENTFLAGZSML=fdc0c72e078135f955e18f6745458ca4; zg_adfb574f9c54457db21741353c3b0aa7=%7B%22sid%22%3A%201599353606922%2C%22updated%22%3A%201599356575353%2C%22info%22%3A%201599321680839%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22%22%2C%22landHref%22%3A%20%22https%3A%2F%2Fyz.chsi.com.cn%2Fzsml%2FqueryAction.do%22%7D",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
    }
    for info_link in info_links:
        url=info_link[1]
        resp=requests.get(url=url,headers=header)
        HTML = etree.HTML(resp.text)
        tbodys=HTML.xpath("//tbody[@class='zsml-res-items']")

        subject_zz=[]
        for tbody in tbodys:
            index=tbodys.index(tbody)
            try:
                find_huo=HTML.xpath("//tbody[@class='zsml-res-divider']/tr/td/div/span/text()")[0]
                if find_huo=="或":
                    print(find_huo)
            except:
                find_huo="无或"
                pass
            print(len(tbodys),tbodys.index(tbody))
            if len(tbodys)==index+1:
                text=tbody.xpath(".//td/text()")[0].strip()
                subject_zz.append(text)
            else:
                text=tbody.xpath(".//td/text()")[0].strip()
                subject_zz.append(text+" or ")
        # print(subject_zz)

        # print("".join(subject_zz))
        text_zz="".join(subject_zz)
        print(text_zz)

        #外语
        subject_wy = []
        for tbody in tbodys:
            index=tbodys.index(tbody)
            try:
                find_huo=HTML.xpath("//tbody[@class='zsml-res-divider']/tr/td/div/span/text()")[0]
                if find_huo=="或":
                    print(find_huo)
            except:
                find_huo="无或"
                pass

            print(len(tbodys),tbodys.index(tbody))
            if len(tbodys)==index+1:
                text=tbody.xpath(".//td[2]/text()")[0].strip()
                subject_wy.append(text)
            else:
                text=tbody.xpath(".//td[2]/text()")[0].strip()
                subject_wy.append(text+" or ")
        # print(subject_zz)

        # print("".join(subject_zz))
        text_wy="".join(subject_wy)
        print(text_wy)

        #业务课一
        subject_ywk1 = []
        for tbody in tbodys:
            index=tbodys.index(tbody)
            try:
                find_huo=HTML.xpath("//tbody[@class='zsml-res-divider']/tr/td/div/span/text()")[0]
                if find_huo=="或":
                    print(find_huo)
            except:
                find_huo="无或"
                pass

            print(len(tbodys),tbodys.index(tbody))
            if len(tbodys)==index+1:
                text=tbody.xpath(".//td[3]/text()")[0].strip()
                subject_ywk1.append(text)
            else:
                text=tbody.xpath(".//td[3]/text()")[0].strip()
                subject_ywk1.append(text+" or ")
        # print(subject_zz)

        # print("".join(subject_zz))
        text_ywk1="".join(subject_ywk1)
        print(text_ywk1)

        #业务课二
        subject_ywk2 = []
        for tbody in tbodys:
            index=tbodys.index(tbody)
            try:
                find_huo=HTML.xpath("//tbody[@class='zsml-res-divider']/tr/td/div/span/text()")[0]
                if find_huo=="或":
                    print(find_huo)
            except:
                find_huo="无或"
                pass

            print(len(tbodys),tbodys.index(tbody))
            if len(tbodys)==index+1:
                text=tbody.xpath(".//td[4]/text()")[0].strip()
                subject_ywk2.append(text)
            else:
                text=tbody.xpath(".//td[4]/text()")[0].strip()
                subject_ywk2.append(text+" or ")
        # print(subject_zz)

        # print("".join(subject_zz))
        text_ywk2="".join(subject_ywk2)
        print(text_ywk2)

        data = {

            "招生单位": HTML.xpath("//tbody/tr[1]/td[2]")[0].text,
            "院系所":HTML.xpath("//tbody/tr[2]/td[2]")[0].text,
            "专业":HTML.xpath("//tbody/tr[3]/td[2]")[0].text,
            "考试方式": HTML.xpath("//tbody/tr[1]/td[4]")[0].text,
            "研究方向": HTML.xpath("//tbody/tr[4]/td[2]")[0].text,
            "拟招人数": HTML.xpath("//table/tbody/tr[5]/td[2]")[0].text,
            "学习方式": HTML.xpath("//table/tbody/tr[3]/td[4]")[0].text,
            "政治":text_zz,
            "外语":text_wy,
            "业务课一":text_ywk1,
            "业务课二": text_ywk2,
            "所在地":info_link[0]
        }
        print(data)

        print(data,info_links.index(info_link))
        sheet01.write(info_links.index(info_link)+1, 0, data['招生单位'])
        sheet01.write(info_links.index(info_link) + 1, 1, data['院系所'])
        sheet01.write(info_links.index(info_link) + 1, 2, data["专业"])
        sheet01.write(info_links.index(info_link) + 1, 3, data['考试方式'])
        sheet01.write(info_links.index(info_link) + 1, 4, data['研究方向'])
        sheet01.write(info_links.index(info_link) + 1, 5, data['拟招人数'])
        sheet01.write(info_links.index(info_link) + 1, 6, data['学习方式'])
        sheet01.write(info_links.index(info_link) + 1, 7, data['政治'])
        sheet01.write(info_links.index(info_link) + 1, 8, data['外语'])
        sheet01.write(info_links.index(info_link) + 1, 9, data['业务课一'])
        sheet01.write(info_links.index(info_link) + 1, 10, data['业务课二'])
        sheet01.write(info_links.index(info_link) + 1, 11, data['所在地'])

    f.save(r"{}{}.xls".format(comboxlist2.get(),time.time()))

控制按钮程序部分:

def get_infos_spider(data):  #按钮控制程序
    # global var2
    url="https://yz.chsi.com.cn/zsml/queryAction.do"
    header={
        "Cookie": "JSESSIONID=00915157320298F6A4E463EF80F4934F; _ga=GA1.3.1896669375.1599321681; _gid=GA1.3.1337310662.1599321681; zg_did=%7B%22did%22%3A%20%221745f002bbb14a-02130d69b55226-3323766-144000-1745f002bbc633%22%7D; aliyungf_tc=AQAAAOozbxkz6g0AIyMDcAwjIVeG9IbH; CHSICC_CLIENTFLAGZSML=fdc0c72e078135f955e18f6745458ca4; zg_adfb574f9c54457db21741353c3b0aa7=%7B%22sid%22%3A%201599353606922%2C%22updated%22%3A%201599356575353%2C%22info%22%3A%201599321680839%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22%22%2C%22landHref%22%3A%20%22https%3A%2F%2Fyz.chsi.com.cn%2Fzsml%2FqueryAction.do%22%7D",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
    }
    resp=requests.post(url,headers=header,data=data)
    HTML=etree.HTML(resp.text)
    lis=HTML.xpath("//ul[@class='ch-page']/li")
    print(len(lis))

    num_pages=[]
    for li in lis:
        try:
            text=li.xpath(".//text()")[0]
            text=re.findall("\d+",text)[0]

            # print(text)
            if " " in text:
                print("跳过")
                pass
            else:
                print(text)
                num_pages.append(int(text))
        except:
            pass


    print(num_pages)
    print(max2(num_pages))

    school_links=[]
    for num_page in range(1,max2(num_pages)+1):
        data['pageno']=num_page
        print(data)
        get_shcool_link(data,school_links)
    print(school_links)
    # input("school_links:print over")
    info_links=[]
    for school_link in school_links:
        print(school_link[0],school_link[1])
        # input("school_links:print over2")
        get_infos_link(info_links,school_link)
    print(info_links)
    var2.set("老板,一共搜到{}条信息".format(len(info_links)))
    downing_infos(info_links)

考研信息链接

def get_infos_link(info_links,school_link):
    url = school_link[1]
    print(url)
    # input("信息链接:")

    headers = {
        "Cookie": "JSESSIONID=00915157320298F6A4E463EF80F4934F; _ga=GA1.3.1896669375.1599321681; _gid=GA1.3.1337310662.1599321681; zg_did=%7B%22did%22%3A%20%221745f002bbb14a-02130d69b55226-3323766-144000-1745f002bbc633%22%7D; aliyungf_tc=AQAAAOozbxkz6g0AIyMDcAwjIVeG9IbH; CHSICC_CLIENTFLAGZSML=fdc0c72e078135f955e18f6745458ca4; zg_adfb574f9c54457db21741353c3b0aa7=%7B%22sid%22%3A%201599353606922%2C%22updated%22%3A%201599356575353%2C%22info%22%3A%201599321680839%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22%22%2C%22landHref%22%3A%20%22https%3A%2F%2Fyz.chsi.com.cn%2Fzsml%2FqueryAction.do%22%7D",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
    }
    resp=requests.get(url=url,headers=headers)
    HTML=etree.HTML(resp.text)
    trs=HTML.xpath("//tbody/tr")
    for tr in trs:
        link=tr.xpath("./td/a[@target='_blank']/@href")[0]
        # print(link)
        info_links.append([school_link[0],"https://yz.chsi.com.cn/"+link])

学校界面信息链接

def get_shcool_link(data,school_links):
    url = "https://yz.chsi.com.cn/zsml/queryAction.do"
    headers={
        "Cookie": "JSESSIONID=00915157320298F6A4E463EF80F4934F; _ga=GA1.3.1896669375.1599321681; _gid=GA1.3.1337310662.1599321681; zg_did=%7B%22did%22%3A%20%221745f002bbb14a-02130d69b55226-3323766-144000-1745f002bbc633%22%7D; aliyungf_tc=AQAAAOozbxkz6g0AIyMDcAwjIVeG9IbH; CHSICC_CLIENTFLAGZSML=fdc0c72e078135f955e18f6745458ca4; zg_adfb574f9c54457db21741353c3b0aa7=%7B%22sid%22%3A%201599353606922%2C%22updated%22%3A%201599356575353%2C%22info%22%3A%201599321680839%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22%22%2C%22landHref%22%3A%20%22https%3A%2F%2Fyz.chsi.com.cn%2Fzsml%2FqueryAction.do%22%7D",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
    }
    resp=requests.post(url,headers=headers,data=data)
    HTML=etree.HTML(resp.text)
    trs=HTML.xpath("//tbody/tr")
    for tr in trs:
        link=tr.xpath('.//a/@href')[0]
        city=tr.xpath('./td[2]/text()')[0]
        school_links.append([city,"https://yz.chsi.com.cn"+link])

取最大值函数

def max2(list):
    a = list[0]
    for prime in range (1,len(list)):
        if list[prime] > list[0]:
         a = list[prime]
    return a

按钮触发执行的函数

def infos():
    # global var2
    print(comvalue0.get(),comvalue1.get(),comvalue2.get(),comvalue3.get())
    if comvalue0.get()=="不限":
        city=""
    else:
        city=re.findall('\d+',comboxlist0.get())[0]
    ssdm=city
    print(ssdm)

    if comvalue1.get()=="专硕":
        Category="zyxw"
        mldm=Category
        print(mldm)
    elif "(" in comvalue1.get():
        Category=re.findall('\d+',comboxlist1.get())[0]
        mldm = Category
        print(mldm)
        print(Category)

    Professional_field=re.findall('\d+',comboxlist2.get())[0]
    yjxkdm=Professional_field

    if comvalue3.get()=="全日制":
        learning_style=1
    elif comvalue3.get()=="非全日制":
        learning_style=2
    elif comvalue3.get() == "不限":
        learning_style = ""
    xxfs=learning_style
    print(xxfs)
    # learning_style
    print(city,Category,Professional_field,learning_style)
    print(ssdm, mldm, yjxkdm, xxfs)
    # print("".join(ssdm+" "+mldm+" "+yjxkdm+" "+xxfs))
    var2.set("".join(str(ssdm)+"  "+str(mldm)+"  "+str(yjxkdm)+"  "+str(xxfs)))
    data={
        "ssdm":ssdm,
        "dwmc":"",
        "mldm":mldm,
        "mlmc":"",
        "yjxkdm":yjxkdm,
        "zymc":"",
        "xxfs":xxfs,
    }
    get_infos_spider(data)

先给大家看看成品效果示意图:

界面示意图:

结果示意图:

好了,完成!点个赞吧!!!

你可能感兴趣的:(爬虫:研招网考研信息查询工具)