用python导出mht文件中的JPG图片

import os

import sys

import re

import time

import base64



def extract_data(filename, dir):

	file = open(filename, "r")

	content = file.read()

	index = 0

	for match in re.finditer(r'(?=/9j)(.|\n)*?(?=(\n\n))', content):

		index = index + 1

		print("%s/%s.jpg" % (dir, index))

		newfile = open("%s/%s.jpg" % (dir, index), "wb")

		real = re.sub("\n", "", match.group())

		length = len(real)

		times = (4 - length % 4) % 4

		i = 0

		

		while i < times:

			real = "%sA===" % real

			i = i + 1

			

		#newfile.write(real)	

		newfile.write(base64.b64decode(real))

		newfile.close()

	file.close()

	

def export_file(file_path):

	r  = re.search(r"(?<=\\|/).*", file_path)

	file_name_ext = file_path

	while r != None:	

		r = r.group()

		file_name_ext = r

		r = re.search(r"(?<=\\|/).*", r)		

	

	pattern = "%s"  % file_name_ext

	pattern = re.sub("\[", "\\\[", pattern)

	pattern = re.sub("\]", "\\\]", pattern)

	pattern = re.sub("\(", "\\\(", pattern)

	pattern = re.sub("\)", "\\\)", pattern)

	pattern = ".*?(?=%s)" % pattern

	

	path = re.search(pattern, file_path)

	if path != None:

		path = path.group()

	else:

		path = ""

	dot = re.search(r"\.", file_name_ext)

	file_name = file_name_ext

	if dot != None:

		file_name = re.search(r".*?(?=\.)", file_name_ext)

		file_name = file_name.group()

	else:

		file_name = file_name + "_dir"

		

	full_path = path + file_name

	bExist = os.path.exists(full_path)

	if bExist != True:

		os.mkdir(full_path)		

	else:

		print("The Directory \"%s\" Exists!\n" % full_path)

	extract_data(file_path, full_path)

	

def main():

	print(os.name)

	

	index = 0

	for arg in sys.argv:

		index += 1

		if index == 1:

			continue

		if index == 2:

			export_file(arg)

	

	if index == 1:

		file_path = "E:\\Scripts\\test.mht"

		export_file(file_path)

		

if __name__ == "__main__":

	main()

	print("just wait for 2 seconds!\n")

	time.sleep(2)

	

你可能感兴趣的:(python)