【JAVA】mht文件转html

网上搜索获得mht文件转html,稍作了修改,即可使用,在此做个笔记。

public class Mht2HtmlUtil {
	
	public static void main(String[] args) {
		Mht2HtmlUtil.mht2html("d:\\51job_111.mht", "d:\\test.htm");
	}
	
	/**
	 * 将 mht文件转换成 html文件
	 * 
	 * @param s_SrcMht
	 * @param s_DescHtml
	 */
	public static void mht2html(String s_SrcMht, String s_DescHtml) {
		try {
			InputStream fis = new FileInputStream(s_SrcMht);
			Session mailSession = Session.getDefaultInstance(
					System.getProperties(), null);
			MimeMessage msg = new MimeMessage(mailSession, fis);
			Object content = msg.getContent();
			if (content instanceof Multipart) {
				MimeMultipart mp = (MimeMultipart) content;
				MimeBodyPart bp1 = (MimeBodyPart) mp.getBodyPart(0);

				// 获取mht文件内容代码的编码
				String strEncodng = getEncoding(bp1);

				// 获取mht文件的内容
				String strText = getHtmlText(bp1, strEncodng);
				if (strText == null)
					return;

				// 创建以mht文件名称的文件夹,主要用来保存资源文件。
				File parent = null;
				if (mp.getCount() > 1) {
					parent = new File(new File(s_DescHtml).getAbsolutePath()
							+ ".files");
					parent.mkdirs();
					if (!parent.exists()) { // 创建文件夹失败的话则退出
						return;
					}
				}

				// FOR中代码 主要是保存资源文件及替换路径
				for (int i = 1; i < mp.getCount(); ++i) {
					MimeBodyPart bp = (MimeBodyPart) mp.getBodyPart(i);
					// 获取资源文件的路径
					// 例(获取: http://xxx.com/abc.jpg)
					String strUrl = getResourcesUrl(bp);
					if (strUrl == null || strUrl.length() == 0)
						continue;

					DataHandler dataHandler = bp.getDataHandler();
					MimePartDataSource source = (MimePartDataSource) dataHandler
							.getDataSource();

					// 获取资源文件的绝对路径
					String FilePath = parent.getAbsolutePath() + File.separator
							+ getName(strUrl, i);
					File resources = new File(FilePath);

					// 保存资源文件
					if (SaveResourcesFile(resources, bp.getInputStream())) {
						// 将远程地址替换为本地地址 如图片、JS、CSS样式等等
						strText = strText.replace(strUrl,
								resources.getAbsolutePath());
					}
				}

				// 最后保存HTML文件
				SaveHtml(strText, s_DescHtml, strEncodng);
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	/**
	 * 获取mht文件内容中资源文件的名称
	 * 
	 * @param strName
	 * @param ID
	 * @return
	 */
	public static String getName(String strName, int ID) {
		char separator1 = '/';
		char separator2 = '\\';
		// 将换行替换
		strName = strName.replaceAll("\r\n", "");

		// 获取文件名称
		if (strName.lastIndexOf(separator1) >= 0) {
			return strName.substring(strName.lastIndexOf(separator1) + 1);
		}
		if (strName.lastIndexOf(separator2) >= 0) {
			return strName.substring(strName.lastIndexOf(separator2) + 1);
		}
		return "";
	}

	/**
	 * 将提取出来的html内容写入保存的路径中。
	 * 
	 * @param strText
	 * @param strHtml
	 * @param strEncodng
	 */
	public static boolean SaveHtml(String s_HtmlTxt, String s_HtmlPath,
			String s_Encode) {
		try {
			Writer out = null;
			out = new OutputStreamWriter(
					new FileOutputStream(s_HtmlPath, false), s_Encode);
			out.write(s_HtmlTxt);
			out.close();
		} catch (Exception e) {
			return false;
		}
		return true;
	}

	/**
	 * 保存网页中的JS、图片、CSS样式等资源文件
	 * 
	 * @param SrcFile
	 *            源文件
	 * @param inputStream
	 *            输入流
	 * @return
	 */
	private static boolean SaveResourcesFile(File SrcFile,
			InputStream inputStream) {
		if (SrcFile == null || inputStream == null) {
			return false;
		}

		BufferedInputStream in = null;
		FileOutputStream fio = null;
		BufferedOutputStream osw = null;
		try {
			in = new BufferedInputStream(inputStream);
			fio = new FileOutputStream(SrcFile);
			osw = new BufferedOutputStream(new DataOutputStream(fio));
			int index = 0;
			byte[] a = new byte[1024];
			while ((index = in.read(a)) != -1) {
				osw.write(a, 0, index);
			}
			osw.flush();
			return true;
		} catch (Exception e) {
			e.printStackTrace();
			return false;
		} finally {
			try {
				if (osw != null)
					osw.close();
				if (fio != null)
					fio.close();
				if (in != null)
					in.close();
				if (inputStream != null)
					inputStream.close();
			} catch (Exception e) {
				e.printStackTrace();
				return false;
			}
		}
	}

	/**
	 * 获取mht文件里资源文件的URL路径
	 * 
	 * @param bp
	 * @return
	 */
	private static String getResourcesUrl(MimeBodyPart bp) {
		if (bp == null) {
			return null;
		}
		try {
			Enumeration list = bp.getAllHeaders();
			while (list.hasMoreElements()) {
				javax.mail.Header head = (javax.mail.Header) list.nextElement();
				if (head.getName().compareTo("Content-Location") == 0) {
					return head.getValue();
				}
			}
			return null;
		} catch (MessagingException e) {
			return null;
		}
	}

	/**
	 * 获取mht文件中的内容代码
	 * 
	 * @param bp
	 * @param strEncoding
	 *            该mht文件的编码
	 * @return
	 */
	private static String getHtmlText(MimeBodyPart bp, String strEncoding) {
		InputStream textStream = null;
		BufferedInputStream buff = null;
		BufferedReader br = null;
		Reader r = null;
		try {
			textStream = bp.getInputStream();
			buff = new BufferedInputStream(textStream);
			r = new InputStreamReader(buff, strEncoding);
			br = new BufferedReader(r);
			StringBuffer strHtml = new StringBuffer("");
			String strLine = null;
			while ((strLine = br.readLine()) != null) {
				System.out.println(strLine);
				strHtml.append(strLine + "\r\n");
			}
			br.close();
			r.close();
			textStream.close();
			return strHtml.toString();
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (br != null)
					br.close();
				if (buff != null)
					buff.close();
				if (textStream != null)
					textStream.close();
			} catch (Exception e) {
			}
		}
		return null;
	}

	/**
	 * 获取mht网页文件中内容代码的编码
	 * 
	 * @param bp
	 * @return
	 */
	private static String getEncoding(MimeBodyPart bp) {
		if (bp == null) {
			return null;
		}
		try {
			Enumeration list = bp.getAllHeaders();
			while (list.hasMoreElements()) {
				javax.mail.Header head = (javax.mail.Header) list.nextElement();
				if (head.getName().equalsIgnoreCase("Content-Type")) {
					String strType = head.getValue();
					int pos = strType.indexOf("charset=");
					if (pos >= 0) {
						String strEncoding = strType.substring(pos + 8,
								strType.length());
						if (strEncoding.startsWith("\"")
								|| strEncoding.startsWith("\'")) {
							strEncoding = strEncoding.substring(1,
									strEncoding.length());
						}
						if (strEncoding.endsWith("\"")
								|| strEncoding.endsWith("\'")) {
							strEncoding = strEncoding.substring(0,
									strEncoding.length() - 1);
						}
						if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {
							strEncoding = "gbk";
						}
						return strEncoding;
					}
				}
			}
		} catch (MessagingException e) {
			e.printStackTrace();
		}
		return null;
	}
}

 

你可能感兴趣的:(java,html)