Java mht转换成html

因项目需要用到mht在线预览,在网上找了一堆代码之后,相差无几。还不能正常编译通过。最后决定自己分析代码加以改正,修改其中一些BUG,最后正常编译通过,测试正常。与大家分享。
所需的Jar最下面即可下载。
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.Enumeration;
import javax.activation.DataHandler;
import javax.mail.MessagingException;  
import javax.mail.Multipart;  
import javax.mail.Session;  
import javax.mail.internet.MimeBodyPart;  
import javax.mail.internet.MimeMessage;  
import javax.mail.internet.MimeMultipart;  
import javax.mail.internet.MimePartDataSource; 

public class HtmlApplication{
public static void main(String[] args){
		HtmlApplication.mht2html("C:\\Documents and Settings\\Administrator\\桌面\\test2.mht", "C:\\test2\\test.html");
	}

/**
 * 将 mht文件转换成 html文件
 * @param s_SrcMht
 * @param s_DescHtml
 */
public static void mht2html(String s_SrcMht, String s_DescHtml) {
	try {  
		InputStream fis = new FileInputStream(s_SrcMht);
		Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
		MimeMessage msg = new MimeMessage(mailSession, fis);
		Object content = msg.getContent();
		if (content instanceof Multipart){  
			MimeMultipart mp = (MimeMultipart)content;  
			MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0);
			
			//获取mht文件内容代码的编码
			String strEncodng = getEncoding(bp1);
			
			//获取mht文件的内容
			String strText = getHtmlText(bp1, strEncodng);  
			if (strText == null)  
				return;
			
			//创建以mht文件名称的文件夹,主要用来保存资源文件。
			File parent = null;
			if (mp.getCount() > 1) {
				parent = new File(new File(s_DescHtml).getAbsolutePath() + ".files");
				parent.mkdirs();
				if (!parent.exists()){   //创建文件夹失败的话则退出
					return;
				}
			}
			
			//FOR中代码 主要是保存资源文件及替换路径
			for (int i = 1; i < mp.getCount(); ++i) {  
				MimeBodyPart bp = (MimeBodyPart)mp.getBodyPart(i);
				//获取资源文件的路径 
				//例(获取: http://xxx.com/abc.jpg)
				String strUrl = getResourcesUrl(bp);
				if (strUrl==null || strUrl.length()==0)  
					continue;
				
				DataHandler dataHandler = bp.getDataHandler();  
				MimePartDataSource source = (MimePartDataSource)dataHandler.getDataSource();
				
				//获取资源文件的绝对路径
				String FilePath = parent.getAbsolutePath() + File.separator + getName(strUrl, i);
				File resources = new File(FilePath);
				
				//保存资源文件
				if (SaveResourcesFile(resources, bp.getInputStream())){
					//将远程地址替换为本地地址  如图片、JS、CSS样式等等
					strText = strText.replace(strUrl, resources.getAbsolutePath()); 
				}
			}
			
			//最后保存HTML文件
			SaveHtml(strText, s_DescHtml, strEncodng);
		}  
	} catch (Exception e) {  
	e.printStackTrace();  
	}  
}

/**
 * 获取mht文件内容中资源文件的名称
 * @param strName
 * @param ID
 * @return
 */
public static String getName(String strName, int ID) {  
	char separator1 = '/';
	char separator2 = '\\';
	//将换行替换
	strName = strName.replaceAll("\r\n", "");
	
	//获取文件名称
	if( strName.lastIndexOf(separator1) >= 0){
		return strName.substring(strName.lastIndexOf(separator1) + 1);
	}
	if( strName.lastIndexOf(separator2) >= 0){
		return strName.substring(strName.lastIndexOf(separator2) + 1);
	}
	return "";
}


/**
 * 将提取出来的html内容写入保存的路径中。
 * @param strText
 * @param strHtml
 * @param strEncodng
 */
public static boolean SaveHtml(String s_HtmlTxt, String s_HtmlPath , String s_Encode) {  
	try{
        Writer out = null;
        out = new OutputStreamWriter(new FileOutputStream(s_HtmlPath, false), s_Encode);
        out.write(s_HtmlTxt);
        out.close();
    }catch(Exception e){
    	return false;
    }
    return true;
}  


/**
 * 保存网页中的JS、图片、CSS样式等资源文件
 * @param SrcFile     源文件
 * @param inputStream 输入流
 * @return
 */
private static boolean SaveResourcesFile(File SrcFile, InputStream inputStream) {  
	if (SrcFile == null || inputStream == null) {  
		return false;   
	}  
  
	BufferedInputStream in = null;  
	FileOutputStream fio = null;  
	BufferedOutputStream osw = null;  
	try {  
		in = new BufferedInputStream(inputStream);  
		fio = new FileOutputStream(SrcFile);  
		osw = new BufferedOutputStream(new DataOutputStream(fio));  
		int index = 0;  
		byte[] a = new byte[1024];  
		while ((index = in.read(a)) != -1) {  
			osw.write(a, 0, index);
		}
		osw.flush();  
		return true;  
	} catch (Exception e) {     
		e.printStackTrace();  
		return false;  
	} finally{  
		try {  
		if (osw != null)  
			osw.close();  
		if (fio != null)  
			fio.close();  
		if (in != null)  
			in.close();  
		if (inputStream != null)  
			inputStream.close();  
		} catch (Exception e) {  
			e.printStackTrace();
			return false;
		} 
	}  
}  


/**
 * 获取mht文件里资源文件的URL路径
 * @param bp
 * @return
 */
private static String getResourcesUrl(MimeBodyPart bp) { 
	if(bp==null){
		return null;
	}
	try {  
		Enumeration list = bp.getAllHeaders();  
		while (list.hasMoreElements()) {  
			javax.mail.Header head = (javax.mail.Header)list.nextElement();  
			if (head.getName().compareTo("Content-Location") == 0) {  
				return head.getValue();
			}  
		}
		return null;
	} catch (MessagingException e) {  
		return null;
	}  
} 


/**
 * 获取mht文件中的内容代码
 * @param bp
 * @param strEncoding 该mht文件的编码
 * @return
 */
private static String getHtmlText(MimeBodyPart bp, String strEncoding) {  
	InputStream textStream = null;  
	BufferedInputStream buff = null;  
	BufferedReader br = null;  
	Reader r = null;  
	try {  
		textStream = bp.getInputStream();  
		buff = new BufferedInputStream(textStream);  
		r = new InputStreamReader(buff, strEncoding); 
		br = new BufferedReader(r);  
		StringBuffer strHtml = new StringBuffer("");  
		String strLine = null;  
		while ((strLine = br.readLine()) != null) {  
			strHtml.append(strLine + "\r\n");  
		}  
		br.close();  
		r.close();  
		textStream.close();  
		return strHtml.toString();  
	} catch (Exception e) {  
	e.printStackTrace();  
	} finally{
		try{  
			if (br != null)  
			br.close();  
			if (buff != null)  
			buff.close();  
			if (textStream != null)  
			textStream.close();  
		}catch(Exception e){  
		}  
	}  
	return null;  
}

/**
 * 获取mht网页文件中内容代码的编码
 * @param bp
 * @return
 */
private static String getEncoding(MimeBodyPart bp) {
	if(bp==null){
		return null;
	}
	try {  
		Enumeration list = bp.getAllHeaders();  
		while (list.hasMoreElements()) {  
			javax.mail.Header head = (javax.mail.Header)list.nextElement();  
			if (head.getName().compareTo("Content-Type") == 0) {  
				String strType = head.getValue();  
				int pos = strType.indexOf("charset=");  
				if (pos>=0) {  
					String strEncoding = strType.substring(pos + 8, strType.length());  
					if(strEncoding.startsWith("\"") || strEncoding.startsWith("\'")){
						strEncoding = strEncoding.substring(1 , strEncoding.length());
					}
					if(strEncoding.endsWith("\"") || strEncoding.endsWith("\'")){
						strEncoding = strEncoding.substring(0 , strEncoding.length()-1);
					}
					if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {  
						strEncoding = "gbk";  
					}  
					return strEncoding;  
				}  
			}
		}  
	} catch (MessagingException e) {  
		e.printStackTrace();  
	}
	return null; 
}
}

你可能感兴趣的:(java)