Wxpython GUI - 爬取猫眼电影并写入Mysql,并进行查询

本次实战目的是为了更好地学习MySQL和mongoBD 操作,需要从猫眼电影上爬取电影信息,加上简单的python GUI界面操作,效果图如下:


picture.png

废话不多说,首先导入本次项目需要导入的模块,以及主程序入口代码如下:

import requests
from bs4 import BeautifulSoup
import re
import pymysql
from pymongo import MongoClient
import time
import wx

#每次爬取之前都将新建表,将以前的表删除
def create_table_mysql():
    # 打开数据库连接
    db = pymysql.connect(host="localhost", user='root', password='Lizzie94', port=3306, db='Movies_mao')
    cursor = db.cursor()
    cursor.execute("drop table if EXISTS movie ")
    sql = """create table movie(
              id int UNSIGNED not null auto_increment,
              name char(50) not null,
              actor varchar(400) not null,
              time  DATE  not null,
              score  FLOAT,
              PRIMARY  KEY (id))ENGINE = InnoDB Default charset=UTF8MB4;
      """
    cursor.execute(sql)
    db.close()


"""下载网页"""
def crawurl(url):
    try:
        r = requests.get(url)
        if r.status_code == 200:
            return r.text
        else:
            print('request failed, status is {}'.format(r.status_code))
            return None
    except Exception as e:
        print(e)
        return None


"""解析网页"""
def parse(html):
    soup = BeautifulSoup(html, 'html.parser')
    detail = soup.find_all('div', "board-item-content")
    for item in detail:
        movie_name = item.find("p", class_="name").a.text
        movie_star = re.sub(r'\s+', '', item.find("p", class_="star").text).replace("主演:", "")
        movie_release_time = re.sub(r'\((.*)\)', '', item.find("p", class_="releasetime").text.replace("上映时间:", ""))
        movie_score = ''.join([item.find('i', class_="integer").text, item.find('i', class_="fraction").text])
        yield {'name': movie_name,
               'actor': movie_star,
               'time': movie_release_time,
               'score': movie_score
                }


"""写入Mysql数据库"""
def load_to_mysql(data):
    values = ",".join(["%s"] * len(data))
    keys = ",".join(data.keys())
    movie_item = tuple(data.values())
    # 打开数据库连接
    db = pymysql.connect(host="localhost", user='root', password='Lizzie94',port=3306,db= 'Movies_mao')
    cursor = db.cursor()
    # 插入数据
    sql_insert = "insert into movie({keys}) values ({values})".format(keys=keys,values=values)
    try:
        if cursor.execute(sql_insert, movie_item):
            print('insert successfully')
            db.commit()
    except Exception as e:
        print("failed", e.args)
        db.rollback()
    db.close()


"""写入MangoDB"""
def load_to_mangoDB(data):
    client = MongoClient('localhost')   # 连接客户端
    db = client.Mao_Movie               # 创建数据库“Mao_Movie" 若数据库不存在,存在则连接数据库
    post = db.Movie                     # 创建集合Movie,若集合不存在,存在则连接集合
    # post.remove(None)
    try:
        if post.insert(data):
            print('insert MongoDB successfully')
    except Exception as e:
        print('insert MongoDB failed', e.args)

"""点击 crawl_button 按钮触发主调用函数开始爬取电影"""
def main(event):
    base_url = 'https://maoyan.com/board/4?offset='
    page = 11
    # 爬数据之前先建表
    create_table_mysql()
    for i in range(1, page):
        url = base_url + str((i-1) * 10)
        html = crawurl(url)
        for item in parse(html):
            print(item)
            load_to_mysql(item)
            load_to_mangoDB(item)
    wx.MessageBox("crawl movies successfully", "Message", wx.OK | wx.ICON_INFORMATION)

以下代码是整个项目的主入口程序,需要用到wx建立一个GUI界面,按钮crawl_button 绑定main()事件,点击并开始触发开始爬取电影事件。

if __name__ == '__main__':
    t1 = time.time()
    #  界面代码
    app = wx.App()
    frame = wx.Frame(None, title="Spide movie", pos=(1000, 200), size=(500, 400))
    panel = wx.Panel(frame)

    lb_box = wx.BoxSizer(wx.HORIZONTAL)
    # 静态文本,放入水平容器中, 1:1 比例
    lb_srch_cont = wx.StaticText(panel, -1, '搜索内容:')
    lb_srch_type = wx.StaticText(panel, -1, '搜索类别:')
    lb_box.Add(lb_srch_cont, proportion=1, flag=wx.EXPAND | wx.ALL, border=3)
    lb_box.Add(lb_srch_type, proportion=1, flag=wx.EXPAND | wx.ALL, border=3)

    text_box = wx.BoxSizer(wx.HORIZONTAL)
    # 输入搜索内容框
    content_text = wx.TextCtrl(panel, -1)
    list_type = ['演员名字', '电影名字']
   # 下拉列表框
    type_combox = wx.ComboBox(panel, -1, choices=list_type)
    # 搜素按钮button
    srch_button = wx.Button(panel, label="搜索")
    # 绑定搜索事件
    srch_button.Bind(wx.EVT_BUTTON, hit_me)

    text_box.Add(content_text, proportion=3, flag=wx.EXPAND | wx.ALL, border=3)
    text_box.Add(type_combox, proportion=2, flag=wx.EXPAND | wx.ALL, border=3)
    text_box.Add(srch_button, proportion=1, flag=wx.EXPAND | wx.ALL, border=3)
    # 搜索之后的显示文本框
    srch_content = wx.TextCtrl(panel, style=wx.TE_MULTILINE|wx.HSCROLL)
    # 点击开始爬取按钮
    crawl_button = wx.Button(panel, label="开始爬取猫眼电影TOP100")
    # 绑定爬取事件,事件函数有且只有一个参数,叫event
    crawl_button.Bind(wx.EVT_BUTTON, main)
    v_box = wx.BoxSizer(wx.VERTICAL)
    v_box.Add(lb_box, proportion=1, flag=wx.EXPAND | wx.ALL, border=3)
    v_box.Add(text_box, proportion=1, flag=wx.EXPAND | wx.ALL, border=3)
    v_box.Add(srch_content, proportion=10, flag=wx.EXPAND | wx.ALL, border=3)
    v_box.Add(crawl_button, proportion=1, flag=wx.ALIGN_CENTER_HORIZONTAL | wx.ALL, border=3)

    panel.SetSizer(v_box)
    frame.Show()
    app.MainLoop()
    print('Total time:', t1-time.time())

点击搜索button, 获取输入文本框和下拉菜单值,进行数据库查询操作

# 点击搜索button事件
def hit_me(event):
    content = content_text.GetValue()
    type = type_combox.GetValue()
    if not (content and type):  # 若输入框或下拉框为空则显示错误信息
        wx.MessageBox("please input some values", "Message", wx.OK | wx.ICON_INFORMATION)
        return
    if type == "演员名字":
        field = 'actor'
    else:
        field = 'name'
    value = '%'+ content + '%'
    # Mysql 数据库查询操作
    # 打开数据库连接
    db = pymysql.connect(host="localhost", user='root', password='Lizzie94', port=3306, db='Movies_mao')
    cursor = db.cursor()
    try:
        cursor.execute("Select * from movie where {field} like '{value}' ".format(field=field, value=value))
        results = cursor.fetchall()
        all_row = ''
        for each in results:
            row = ' '.join(str(i) for i in each)
            all_row += row + '\n'
        srch_content.SetValue(all_row)
    except Exception as e:
        wx.MessageBox("selection from database error" , "Message", wx.OK | wx.ICON_INFORMATION)
        print(e)
    db.close()

最后的效果图如下:


picture2.png

picture3.png

picture4.png

未完待续,下次需要用异步方法来爬虫,本人基础不是很扎实,只能用同步方法来爬取数据。
最后附上 wxpyhon的中文学习资料: https://www.ctolib.com/docs/sfile/wxpy-in-action/12.html
英文学习地址:http://zetcode.com/wxpython/
https://www.wxpython.org/

你可能感兴趣的:(Wxpython GUI - 爬取猫眼电影并写入Mysql,并进行查询)