PDF转成PNG,之后PNG转成PDF

PDF2IMG
需要安装python的img2pdf和pdf2img的包,还需要安装https://github.com/oschwartz10612/poppler-windows/releases/,并且解压之后把路径+lib/bin添加到环境变量

import multiprocessing
import sys
from datetime import time
import time
from pdf2image import convert_from_path
import os
from tqdm import tqdm
def getFiles(path):
    Filelist = []
    for home, dirs, files in os.walk(path):
        for file in files:
            # 文件名列表,包含完整路径
            file_path = os.path.join(home, file).replace('\\', '/')
            Filelist.append(file_path)
            #Filelist.append(file)
    return Filelist

def report(outputpath, file):
	with open(str(outputpath+"/"+"convertreport.txt"), "a") as report:
		report.write(str(file+" has been converted. \n"))
	print(file+" has been converted.")
	return

def convert2(outputpath, pages, file):
	pagecount = 1
	for page in pages:
		outputname  = str(file+'_'+str(pagecount)+'.png')
		page.save(str(outputpath+"/"+outputname), 'PNG')
		pagecount += 1
	return

def convert(path, outputpath):
	'''Takes all files from a given directory with pdf files and turns them into jpg files. filename.pdf leads to filename_1.jpg, filename_2.png jpg.'''
	
	#outputpath = path+'_output'

	if os.path.exists(outputpath):
		pass
	else:
		#os.system("mkdir "+ outputpath)
		os.makedirs(outputpath)

	pages = convert_from_path(str(path), 500, size=(1300, 1500))
	dir_path, filename = os.path.split(path)
	convert2(outputpath, pages, filename)
	#report(outputpath, filename)

	#print("All files are converted!")
	return outputpath

def main():
	path = r'C:\Users\Administrator\Desktop\1/'#os.getcwd()#'G:/xiao/dataset_molcreateV2/data/1/'
	save_path = path
	time_start = time.time()
	files = getFiles(path)
	for file in tqdm(files):
		#finder = os.path.split(file)[1].split('.')[0]
		file_finder = save_path + 'pics/'#finder
		if not os.path.exists(file_finder):
			os.mkdir(file_finder)
		if file.endswith('.pdf'):
			convert(file, file_finder)
	# if len(sys.argv) != 2:
	# 	print("\"Usage of this function: convert.py input_path")
	# if len(sys.argv) == 2:
	# 	convert(sys.argv[1])
	# sys.exit(1)
	time_end = time.time()
	print('use time', time_end - time_start)

if __name__ == '__main__':
	main()



IMG2PDF

import os
import img2pdf
from PIL import Image
def doImg2Pdf(fileName):
    fileList = os.listdir(fileName)

    num = 14 #pdf有多少页
    if num > len(fileList):
        print('num长度需小于:', len(fileList))
        exit()
    if int(len(fileList) % num) == 0:
        num_file = int(len(fileList) / num)
    else:
        num_file = int(len(fileList) / num) + 1
    print(num_file)
    cnt = 0
    for n in range(1, num_file + 1):  # 创建文件夹
        with open("sample1_%s.pdf"%n, "wb") as f:
            pngList = []
            list_n = fileList[num * cnt:num * (cnt + 1)]
            for pngName in list_n:
                pngList.append(fileName + pngName)
            pfn_bytes = img2pdf.convert(pngList)
            f.write(pfn_bytes)
        cnt += 1
        print("转换完成")

doImg2Pdf(r'C:\Users\Administrator\Desktop\1\pics\\')```

你可能感兴趣的:(pdf,python)