JAVA处理html标签

package com.nfa;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class UtilHtml {
	
	/*删除所有HTML标签,返回纯文本*/
	public static String deleteAll(String html){
		if(isBlank(html)){
			return "";
		}
		Pattern p = Pattern.compile("<([^>]*)>");
		Matcher m = p.matcher(html);
		return m.replaceAll("");
	}
	
	/*删除指定单标签*/
	public static String deleteTagsOfOne(String tag,String html){
        String regxp = "<\\s*" + tag + "\\s+([^>]*)\\s*>";   
        Pattern p = Pattern.compile(regxp);   
        Matcher m = p.matcher(html);   
        StringBuffer sb = new StringBuffer();   
        boolean rs = m.find();   
        while (rs) {   
            m.appendReplacement(sb, "");   
            rs = m.find();   
        }   
        m.appendTail(sb);   
        return sb.toString();   	
	}
	
	/*删除指定成对标签*/
	public static String deleteTagsOfTwo(String tag,String html){
		if(isBlank(html)){
			return html;
		}
		Pattern p = Pattern.compile("<"+tag+"[^>]*?>[\\s\\S]*?<\\/"+tag+">");
		Matcher m = p.matcher(html);
		return m.replaceAll("");		
	}	
	
	/*删除指定的属性*/
	public static String deleteAttr(String attr,String html){
		if(isBlank(html)){
			return html;
		}
		Pattern p = Pattern.compile(attr+"=\"([^\"]+)\"");
		Matcher m = p.matcher(html);
		return m.replaceAll("");
	}
	
	/*删除所有空白字符*/
	public static String deleteAllBlank(String html){
		if(isBlank(html)){
			return html;
		}
		Pattern p = Pattern.compile("[\\s| ]");
		Matcher m = p.matcher(html);
		return m.replaceAll("");
	}
	
	private static boolean isBlank(String str){
		if(str!=null && str.trim().length()>0){
			return false;
		}else{
			return true;
		}
	}
}

你可能感兴趣的:(java,html)