from multiprocessing.dummy import Pool
import traceback
import requests
import pymysql
import os
# 习惯函数名开头大写,变量名开头小写,还没适应Python写代码规范,见谅
# 数据库链接类
class KsMySql:
def __init__(self):
self.conn = pymysql.Connect(host='127.0.0.1', port=3306, user='root', password='1234567', db='pythonspider', charset='utf8mb4') # 普通链接
self.cursor = self.conn.cursor()
# 是否保存过了通过视频名称查找, 返回true为不存在,false为存在
def IsSaveVideoByName(self, filename):
try:
self.cursor.execute('select * from ksvideoinfo where filename = "%s"' %(filename))
result = self.cursor.fetchone()
return result is not None
except:
print('IsSaveVideoByName 查询错误')
traceback.print_exc()
return True
# 插入视频信息
def SaveVideoInfo(self, filename):
try:
self.cursor.execute('insert into ksvideoinfo(filename) values("%s")'%(filename))
self.conn.commit()
print('SaveVideoInfo 插入数据成功')
except Exception as e:
self.conn.rollback()
print('SaveVideoInfo 插入数据错误')
print(e)
traceback.print_exc()
def __del__(self):
self.cursor.close()
self.conn.close()
# 全局变量
ksmysql = KsMySql()# 数据库类实例
infolist = []
dirName = 'E:/AllWorkSpace1/Pytharm/pythonProjectPaWeb/Testdemo'# 保存目录
if not os.path.exists(dirName):
os.mkdir(dirName)
def Select():
name = '杨洋迪丽热巴《烟火星辰》,用歌声致敬中国航天'
# 需求3:数据库表中不能保存重复视频名字(这里只是模拟)
isSave = ksmysql.IsSaveVideoByName(name)
# 为了方便,默认为不存在,直接添加url到list中
mp4url = 'https://video.pearvideo.com/mp4/short/20220206/cont-1751191-15823342-hd.mp4'
infolist.append({'name': name, 'videoUrl': mp4url})
def SaveInfo(dic):
name = dic['name']
pathName = dirName + '/' + name + '.mp4'
url = dic['videoUrl']
try:
# if not os.path.exists(pathName):
mp4Data = requests.get(url=url).content # 从网络下载视频
with open(pathName, 'wb') as f:# 需求1:视频保存在本地
f.write(mp4Data)
print(name, "下载完成")
# else:
# print(name,'已存在,无需下载')
# 需求2:视频的名字保存在数据库表中
ksmysql.SaveVideoInfo(name)
except Exception as e:
print(name, '下载失败失败或者保存数据库失败')
print(e)
traceback.print_exc()
def Main():
pool1 = Pool(20) # 线程池
for cur in range(0, 100):
infolist.clear()
Select()
pool1.map(SaveInfo, infolist) # 使用多线程下载
pool1.close()
pool1.join()
Main()
如:
...同上
import threading
class KsMySql:
def __init__(self):
self.conn = pymysql.Connect(host='127.0.0.1', port=3306, user='root', password='tiger', db='pythonspider', charset='utf8mb4') # 普通链接
self.cursor = self.conn.cursor()
self.lock = threading.Lock()# 实例化
# 是否保存过了通过视频名称查找, 返回true为不存在,false为存在
def IsSaveVideoByName(self, filename):
try:
self.lock.acquire() # 上锁
self.cursor.execute('select * from ksvideoinfo where filename = "%s"' %(filename))
result = self.cursor.fetchone()
self.lock.release() # 解锁
return result is not None
except:
print('IsSaveVideoByName 查询错误')
traceback.print_exc()
return True
...同上
但经过我个人测试发现,没有用,还是会报新错,这个方法理论上是没问题的,但是在multiprocessing.dummy多线程情况下却不行。仅代表我个人想法,也许自己能力不足,哪里写错了
...
def SaveInfo(dic):
ksmysql = KsMySql()# 数据库类实例
name = dic['name']
pathName = dirName + '/' + name + '.mp4'
url = dic['videoUrl']
try:
# if not os.path.exists(pathName):
mp4Data = requests.get(url=url).content # 从网络下载视频
with open(pathName, 'wb') as f:# 需求1:视频保存在本地
f.write(mp4Data)
print(name, "下载完成")
# else:
# print(name,'已存在,无需下载')
# 需求2:视频的名字保存在数据库表中
ksmysql.SaveVideoInfo(name)
except Exception as e:
print(name, '下载失败失败或者保存数据库失败')
print(e)
traceback.print_exc()
...
可以完美解决,因为这样每个线程都有自己的数据库链接对象。
优点:简单、方便
缺点:每调用SaveInfo函数一次就建立一个数据库链接,并函数结束时关闭链接,可能性能有损
...
from dbutils.pooled_db import PooledDB
class KsMySql:
pool = None
def __init__(self):
# self.conn = pymysql.Connect(host='127.0.0.1', port=3306, user='root', password='tiger', db='pythonspider', charset='utf8mb4') # 普通链接,每实例化一个对象就会新建一个链接
self.conn = KsMySql.Getmysqlconn()# 从链接池中获取链接
self.cursor = self.conn.cursor()
# 静态方法
@staticmethod
def Getmysqlconn():
if KsMySql.pool is None:
mysqlInfo = {
"host": '127.0.0.1',
"user": 'root',
"passwd": 'tiger',
"db": 'pythonspider',
"port": 3306,
"charset": 'utf8mb4'
}
KsMySql.pool = PooledDB(creator=pymysql, mincached=1, maxcached=20, host=mysqlInfo['host'],
user=mysqlInfo['user'], passwd=mysqlInfo['passwd'], db=mysqlInfo['db'],
port=mysqlInfo['port'], charset=mysqlInfo['charset'], blocking=True)
print(KsMySql.pool)
# else:
# print('新KsMySql实例,从数据库链接池获取链接')
return KsMySql.pool.connection()
...
def __del__(self):
# 链接不是真正的被关闭,而是放回链接池中
self.cursor.close()
self.conn.close()
def SaveInfo(dic):
ksmysql = KsMySql()# 同样要写上实例化数据库类对象
...
...
注意
KsMySql.pool = PooledDB(creator=pymysql, mincached=1, maxcached=20, host=mysqlInfo['host'],
user=mysqlInfo['user'], passwd=mysqlInfo['passwd'], db=mysqlInfo['db'],
port=mysqlInfo['port'], charset=mysqlInfo['charset'], blocking=True)
'''
blocking参数,代表当链接都被占用了,是否等待新的空闲链接
True :等待, 可能影响程序速度
False:不等待,(个人猜测。。好像是代表同用已占有的数据库链接对象,会重复一开始的报错),反正会报错,最好写成True
'''
可以完美解决,因为这样每个线程也都有自己的数据库链接对象。
优点:从链接池中获取自己的链接,优化点性能把
缺点:代码稍微复杂,坑多。。。