把电影天堂的最新电影名称和观看链接存储到数据库(所有页)

import requests
import json
import re
import time

import pymysql

# 连接数据库
class mysql_conn():
    def __init__(self):
        self.db = pymysql.connect('127.0.0.1','root','****','wang')
        self.cursor = self.db.cursor()
    def execute_modify_mysql(self,sql):
        self.cursor.execute(sql)
        self.db.commit()
    def __del__(self):
        self.db.close()

sq = mysql_conn()
for k in range(1,178):
    #主页
    url = 'http://www.ygdy8.net/html/gndy/dyzz/list_23_{}.html'.format(k)

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }
    response = requests.get(url,headers=headers)
    html = response.text
    # print(html)
    res = re.findall('',html)
    # print(res)
    for i in res:
        #详情页
        url2 = 'http://www.ygdy8.net'+ i
        # print(url)
        response2 = requests.get(url2,headers=headers)
        html2 = response2.content.decode('gbk')
        # print(html2)
        #影片名
        # res2 = re.search('(.*)',html2)
        # print(res2.group(1))
        # 影片链接匹配
        res3 = re.search('',html2)
        # print(res3.group(1))
        #影片链接
        move_url = res3.group(1)
        #影片名匹配
        move= re.search('\d/(.*).m',res3.group(1))
        # print(move.group(1))
        # 影片名
        move_name = move.group(1)
        sql = 'insert into dianying(name,url) values("{}","{}")'.format(move_name,move_url)
        # print(sql)
        sq.execute_modify_mysql(sql)
        time.sleep(2)
    print('第{}页存储完毕'.format(k))

你可能感兴趣的:(基础爬虫)