#安装fitz包
pip install pymupdf
脚本如下所示:
import fitz
import re
import os
import time
import sys
arguments = sys.argv
for arg in arguments:
print(arg)
def file_name_list(base_dir):
for i, j, k in os.walk(base_dir):
name = [i.replace('.pdf', '') for i in k]
return name
def pdfExtractPic(filePath, pic_path):
ret = {}
try:
iNum = 0
with fitz.open(filePath) as doc:
for page in range(len(doc)):
for image in doc.get_page_images(page):
xref = image[0]
pix = fitz.Pixmap(doc, xref)
iNum += 1
fileName = ("%s\%d.png"%(pic_path, iNum))
if pix.n < 5:
pix.save(fileName)
else:
pix1 = fitz.Pixmap(fitz.csRGB, pix)
pix1.save(fileName)
pix1 = None
except Exception as e:
print(e)
return ret
def pdf2image(path, pic_path, image_name):
try:
with fitz.open(path) as pdf:
for pg in range(0, pdf.page_count):
page = pdf[pg]
# 设置缩放和旋转系数,zoom_x, zoom_y取相同值,表示等比例缩放
mat = fitz.Matrix(2, 2)
pm = page.get_pixmap(matrix=mat, alpha=False)
# if width or height > 2000 pixels, don't enlarge the image
if pm.width > 2000 or pm.height > 2000:
pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
page_num = pg + 1 # 页码从1开始
new_name = '%s_img_%d.png' % (image_name, page_num)
pm.save(os.path.join(pic_path, new_name))
except Exception as error:
print(error)
if __name__ == '__main__':
if len(arguments) < 2:
print("请输入pdf所在目录的路径.")
sys.exit(1)
base_dir = arguments[1]
name_list = file_name_list(base_dir)
start = time.time()
for name in name_list:
image_name = name
file_name = r'\%s.pdf' % name # PDF 文件路径
file_path = base_dir + file_name
pic_name = r'\pic\%s' % name # 存放图片的文件夹
pic_path = base_dir + pic_name
try:
os.makedirs(pic_path) # 根据路径,创建对应路径下的文件夹
except Exception as error:
print(error)
pdf2image(file_path, pic_path, image_name)
#print(pic_path)
pic_path += "\\sub\\"
try:
os.makedirs(pic_path) # 根据路径,创建对应路径下的文件夹
except Exception as error:
print(error)
pdfExtractPic(file_path, pic_path)
end = time.time()
print('task is over: %.2f' % (end-start))
如果执行的时候报错:
RuntimeError: Directory ‘static‘ does not exist
说明fitz安装错了版本,参考:PyMuPDF: AttributeError:模块‘fitz‘没有属性‘open’_attributeerror: module 'fitz' has no attribute 'op-CSDN博客