封装类对象
class DB:
def __init__(self, date,mc_href, pm, m):
self.mc_href = mc_href
self.pm = pm
self.m = m
self.date =date
def __str__(self): # 该方法可以根据自己的需求重写,没有固定的模式
return "日期:{},地址:{},排名:{},影名:{}".format(self.date,self.mc_href, self.pm, self.m)
豆瓣电影,一周排名
import requests
from bs4 import BeautifulSoup
import bs4
import time #时间
import pymysql #数据库
from DB import * # 导入封装类对象
#链接数据库
def mysql_date(date,mc_href,pm,m):
# 连接database
conn = pymysql.connect(host='localhost', user ='root', password ='root', database ='test', charset ='utf8')
# 得到一个可以执行SQL语句的光标对象
cursor = conn.cursor()
sql = "INSERT INTO db(date,mc_href,pm,m) VALUES (%s, %s, %s, %s);"
try:
# 执行SQL语句
cursor.execute(sql, [date,mc_href,pm,m])
# 提交事务
conn.commit()
except Exception as e:
# 有异常,回滚事务
conn.rollback()
print('执行失败')
cursor.close()
conn.close()
if __name__ == '__main__':
# 指定路径
url = 'https://movie.douban.com/'
# 模拟浏览器
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'}
# 登录
response = requests.get(url=url, headers=headers, timeout=30)
print('登录状态返回值:{0}'.format(response))
encoding = response.apparent_encoding
print('编码格式:{0}'.format(encoding))
# 进行页面解析
soup = BeautifulSoup(response.text, 'html.parser')
# print(soup)
# 获取一周口碑榜
a = soup.find('div', class_='billboard-bd')
for b in a.find_all('tr'):
#获取排名信息
pm = b.find('td', class_='order').get_text()
#获取地址,名称
mc = b.find('td', class_='title')
m = mc.get_text()
mc_href = mc.find('a').get('href')
# print('地址:{0}'.format(mc_href),'','排名:{0}'.format(pm),' ','影名:{0}'.format(m))
#格式化当前日期
date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
# 封装对象
persons = DB(date,mc_href,pm,m)
print(persons)
# 导入数据库
mysql_date(date,mc_href,pm,m)
print('执行成功')