Imagemagick使用Ghostscript作为其依赖项之一,以便能够处理和转换PDF相关的图像。
安装完毕后,需要自行配置环境路径
使用示例:
python .\get_thumbnail.py --filepath .\paper.pdf --page [4-8,12-17,20,24,27-] --output-dir test-dir
输出:
支持灵活传入页面参数
[ 4 − 8 , 12 − 17 , 20 , 24 , 27 − ] [4-8,12-17,20,24,27-] [4−8,12−17,20,24,27−]
import subprocess
import argparse
from pathlib import Path
from math import sqrt
# 解析参数
parser = argparse.ArgumentParser()
parser.add_argument("--filepath", type=str, help="pdf文件路径")
parser.add_argument(
"--page",
type=str,
default="all",
help="指定要生成缩略图的pdf文件页面 e.[1-3,8,10,27-] 默认为全部页面",
)
parser.add_argument("--shape", type=str, default="cube", help="指定缩略图的形状 e.[8x3] 默认为正方形")
parser.add_argument("--output-dir", type=str, default="./images", help="指定输出文件夹")
args = parser.parse_args()
# 写一个函数,调用外部程序获取pdf文件总页数
def get_file_page_num(filepath: str):
result = subprocess.run(
["magick", "identify", "-format", "%n", filepath], stdout=subprocess.PIPE
)
# 获取命令行输出
output = result.stdout.decode("utf-8")
for i in range(1, 4):
if len(output) == i * int(output[:i]):
return int(output[:i])
# 预处理参数
# 获取目标页面总数目以及详细页码
def get_dst_page(filepath: str, page_str: str):
total_num = 0
detail_page = []
if page_str == "all":
total_num = get_file_page_num(filepath)
detail_page.append((1, total_num))
else:
for item in page_str.replace(" ", "")[1:-1].split(","):
if "-" not in item:
total_num += 1
detail_page.append(int(item))
if "-" in item and not item.endswith("-"):
start, end = list(map(lambda x: int(x), item.split("-")))
total_num += end - start + 1
detail_page.append((start, end))
if "-" in item and item.endswith("-"):
start = int(item[:-1])
end = get_file_page_num(filepath)
total_num += end - start
detail_page.append((start, end - 1))
return total_num, detail_page
def get_per_page_idx(detail_page_scope: list):
pages = []
for item in detail_page_scope:
if isinstance(item, tuple):
for i in range(item[0], item[1] + 1):
pages.append(i)
else:
pages.append(item)
return pages
# 自动推断比较合适的形状
def infer_shape(num):
H = int(sqrt(num / sqrt(2)))
W = int(num / H + 1)
if H * W < num:
H += 1
return W, H
def get_shape(num):
if args.shape == "cube":
shape = infer_shape(num)
return f"{shape[0]}x{shape[1]}"
else:
return args.shape.replace(" ", "")
if __name__ == "__main__":
if not Path(args.output_dir).exists():
Path(args.output_dir).mkdir()
total_num, detail_page = get_dst_page(args.filepath, args.page)
# 调用外部程序将每个指定的页面转换为缩略图
# 必须指定"-alpha remove",否则生成的图片背景永远是黑色
for item in detail_page:
if isinstance(item, tuple):
subprocess.run(
[
"magick",
"convert",
"-thumbnail",
"x800",
"-alpha",
"remove",
f"{args.filepath}[{item[0]}-{item[1]}]",
str(Path(args.output_dir) / "output.png"),
],
shell=True,
)
else:
subprocess.run(
[
"magick",
"convert",
"-thumbnail",
"x800",
"-alpha",
"remove",
f"{args.filepath}[{item}]",
str(Path(args.output_dir) / f"output-{item}.png"),
],
shell=True,
)
print("参与生成缩略图的页面为:", get_per_page_idx(detail_page))
image_list = list(
map(
lambda x: str(Path(args.output_dir) / f"output-{x}.png"),
get_per_page_idx(detail_page),
)
)
# 调用外部程序合并缩略图
subprocess.run(
[
"magick",
"montage",
*image_list,
"-geometry",
"+0+0",
"-border",
"3",
"-bordercolor",
"black",
"-tile",
f"{get_shape(total_num)}",
str(Path(args.output_dir) / "result.jpg"),
],
shell=True,
)