Python_BeautifulSoup_豆瓣周榜_爬取

封装类对象

class DB:
    def __init__(self, date,mc_href, pm, m):
        self.mc_href = mc_href
        self.pm = pm
        self.m = m
        self.date =date

    def __str__(self):  # 该方法可以根据自己的需求重写,没有固定的模式
         return "日期:{},地址:{},排名:{},影名:{}".format(self.date,self.mc_href, self.pm, self.m)

豆瓣电影,一周排名

import requests
from bs4 import BeautifulSoup
import bs4
import time  #时间
import pymysql #数据库
from DB import  *   # 导入封装类对象

#链接数据库
def mysql_date(date,mc_href,pm,m):
    # 连接database
    conn = pymysql.connect(host='localhost', user ='root', password ='root', database ='test', charset ='utf8')
    # 得到一个可以执行SQL语句的光标对象
    cursor = conn.cursor()
    sql = "INSERT INTO db(date,mc_href,pm,m) VALUES (%s, %s, %s, %s);"

    try:
        # 执行SQL语句
        cursor.execute(sql, [date,mc_href,pm,m])
        # 提交事务
        conn.commit()
    except Exception as e:
        # 有异常,回滚事务
        conn.rollback()
        print('执行失败')

    cursor.close()
    conn.close()





if __name__ == '__main__':
    # 指定路径
    url = 'https://movie.douban.com/'
    # 模拟浏览器
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'}
    # 登录
    response = requests.get(url=url, headers=headers, timeout=30)
    print('登录状态返回值:{0}'.format(response))
    encoding = response.apparent_encoding
    print('编码格式:{0}'.format(encoding))

    # 进行页面解析
    soup = BeautifulSoup(response.text, 'html.parser')
    # print(soup)

    # 获取一周口碑榜
    a = soup.find('div', class_='billboard-bd')
    for b in a.find_all('tr'):
        #获取排名信息
        pm = b.find('td', class_='order').get_text()
        #获取地址,名称
        mc = b.find('td', class_='title')
        m = mc.get_text()
        mc_href = mc.find('a').get('href')
        # print('地址:{0}'.format(mc_href),'','排名:{0}'.format(pm),' ','影名:{0}'.format(m))
        #格式化当前日期
        date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        # 封装对象
        persons = DB(date,mc_href,pm,m)
        print(persons)

        # 导入数据库
        mysql_date(date,mc_href,pm,m)

    print('执行成功')

你可能感兴趣的:(python_爬虫基础)