html过滤

  以下例子转自一个网友并非本人所写,因为觉得好所以贴出来一起分享
import java.util.regex.Pattern;
  public class Test
  {
  
  public static void main(String[] args)
  {
  String ww="<html>sss<body>ss</body>ssss</html>";
  String ff=html2Text(ww);
  System.out.println(ff);
  }
 public static String html2Text(String inputString) {
		String htmlStr = inputString; // 含html标签的字符串
		String textStr = "";
		java.util.regex.Pattern p_script;
		java.util.regex.Matcher m_script;
		java.util.regex.Pattern p_style;
		java.util.regex.Matcher m_style;
		java.util.regex.Pattern p_html;
		java.util.regex.Matcher m_html;
		try {
			String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; // 定义script的正则表达式{或<script>]*?>[\s\S]*?<\/script>
			// }
			String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>"; // 定义style的正则表达式{或<style>]*?>[\s\S]*?<\/style>
			// }
			String regEx_html = "<[^>]+>"; // 定义HTML标签的正则表达式

			p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
			m_script = p_script.matcher(htmlStr);
			htmlStr = m_script.replaceAll(""); // 过滤script标签

			p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
			m_style = p_style.matcher(htmlStr);
			htmlStr = m_style.replaceAll(""); // 过滤style标签

			p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
			m_html = p_html.matcher(htmlStr);
			htmlStr = m_html.replaceAll(""); // 过滤html标签

			textStr = htmlStr;

		} catch (Exception e) {
			System.err.println("Html2Text: " + e.getMessage());
		}

		return textStr;
}
  }
 
  这是struts中的html过滤
public static String filter(String value)
    {
        if(value == null || value.length() == 0)
            return value;
        StringBuffer result = null;
        String filtered = null;
        for(int i = 0; i < value.length(); i++)
        {
            filtered = null;
            switch(value.charAt(i))
            {
            case 60: // '<'
                filtered = "<";
                break;

            case 62: // '>'
                filtered = ">";
                break;

            case 38: // '&'
                filtered = "&";
                break;

            case 34: // '"'
                filtered = """;
                break;

            case 39: // '\''
                filtered = "'";
                break;
            }
            if(result == null)
            {
                if(filtered != null)
                {
                    result = new StringBuffer(value.length() + 50);
                    if(i > 0)
                        result.append(value.substring(0, i));
                    result.append(filtered);
                }
            } else
            if(filtered == null)
                result.append(value.charAt(i));
            else
                result.append(filtered);
        }

        return result != null ? result.toString() : value;
    }
 

你可能感兴趣的:(java,html,正则表达式,struts)