爬取豆瓣电影top250储存在数据库中

from pyquery import PyQuery as pq
import pymysql


def get_all():
    for num in range(11):
        doc = pq(url='https://movie.douban.com/top250?start=%d&filter=' % (num*25))
        ol = doc.find('ol')
        a = ol.find('li').items()
        for t in a:
            film = {
                'title': " ".join(t(".title").text().split()),
                'directors': " ".join(t.find('p:first-child').text().split()),
                'comment': " ".join(t(".inq").text().split()),
                'score': " ".join(t(".rating_num").text().split())
            }
            yield film
            # with open('Douban_film.txt', 'a', encoding='utf-8') as f:
            #     f.write(json.dumps(film, ensure_ascii=False) + '\n')

# 存储数据
db = pymysql.connect(host='localhost', user='root', password='xxxx', port=3306, db='spiders', charset="utf8")
cursor = db.cursor()
cursor.execute('DROP TABLE IF EXISTS movies')
sql = """CREATE TABLE movies(
                            title VARCHAR(80) NOT NULL,
                            directors VARCHAR(150) NOT NULL,
                            comment VARCHAR(100) NOT NULL,
                            score VARCHAR(5) NOT NULL
                            )"""
cursor.execute(sql)
for i in get_all():
    table = 'movies'
    keys = ','.join(i.keys())
    values = ','.join(['%s'] * len(i))
    sql = f"INSERT INTO {table}({keys}) VALUES ({values})"
    try:
        cursor.execute(sql, tuple(i.values()))
        print('Success')
        db.commit()
    except:
        print('Failed')
        db.rollback()

# 查看数据
if __name__ == "__main__":
    db = pymysql.connect(host='localhost', user='root', password='xxxx', port=3306, db='spiders', charset="utf8")
    cursor = db.cursor()
    sql = 'SELECT *FROM movies'
    try:
        cursor.execute(sql)
        print('count:', cursor.rowcount)
        results = cursor.fetchall()
        for row in results:
            print(row)
    except:
        print('Error')

 

你可能感兴趣的:(爬取豆瓣电影top250储存在数据库中)