Java 过滤html代码

(1)java 如何escape HTML代码

如何转义HTML标签

/**
	 * 去除HTML字串中的控制字符及不可视字符
	 * 
	 * @param str
	 *            HTML字串
	 * @return 返回的字串
	 */
	public static String escapeHTML(String str) {
		int length = str.length();
		int newLength = length;
		boolean someCharacterEscaped = false;
		for (int i = 0; i < length; i++) {
			char c = str.charAt(i);
			int cint = 0xffff & c;
			if (cint < 32)
				switch (c) {
				case 11:
				default:
					newLength--;
					someCharacterEscaped = true;
					break;

				case '\t':
				case '\n':
				case '\f':
				case '\r':
					break;
				}
			else
				switch (c) {
				case '"':
					newLength += 5;
					someCharacterEscaped = true;
					break;

				case '&':
				case '\'':
					newLength += 4;
					someCharacterEscaped = true;
					break;

				case '<':
				case '>':
					newLength += 3;
					someCharacterEscaped = true;
					break;
				}
		}
		if (!someCharacterEscaped)
			return str;

		StringBuffer sb = new StringBuffer(newLength);
		for (int i = 0; i < length; i++) {
			char c = str.charAt(i);
			int cint = 0xffff & c;
			if (cint < 32)
				switch (c) {
				case '\t':
				case '\n':
				case '\f':
				case '\r':
					sb.append(c);
					break;
				}
			else
				switch (c) {
				case '"':
					sb.append("&quot;");
					break;

				case '\'':
					sb.append("&apos;");
					break;

				case '&':
					sb.append("&amp;");
					break;

				case '<':
					sb.append("&lt;");
					break;

				case '>':
					sb.append("&gt;");
					break;

				default:
					sb.append(c);
					break;
				}
		}
		return sb.toString();
	}

 测试:

@Test
	public void test_001(){

		String input="<html><input type=\"button\" onlick=\"abc()\" > </html>";
		System.out.println(input);
		System.out.println(StringUtil.escapeHTML(input));
	}

 运行结果:

 

 

(2)java 如何去除html标签,只留下文本

/**
	 * 删除input字符串中的html格式
	 * 
	 * @param input
	 * @param length
	 *            显示的字符的个数
	 * @return
	 */
	public static String splitAndFilterString(String input, int length) {
		if (input == null || input.trim().equals("")) {
			return "";
		}
		// 去掉所有html元素,
		String str = input.replaceAll("\\&[a-zA-Z]{1,10};", "").replaceAll(
				"<[^>]*>", "");
		str = str.replaceAll("[(/>)<]", "");
		int len = str.length();
		if (len <= length) {
			return str;
		} else {
			str = str.substring(0, length);
			str += "......";
		}
		return str;
	}

	/**
	 * 返回纯文本,去掉html的所有标签,并且去掉空行
	 * 
	 * @param input
	 * @return
	 */
	public static String splitAndFilterString(String input) {
		if (input == null || input.trim().equals("")) {
			return "";
		}
		// 去掉所有html元素,
		String str = input.replaceAll("\\&[a-zA-Z]{1,10};", "").replaceAll(
				"<[^>]*>", "");
		str = str.replaceAll("[(/>)<]", "");
		return SystemHWUtil.deleteCRLF(str);
	}

/***
	 * Delete all spaces
	 * 
	 * @param input
	 * @return
	 */
	public static String deleteAllCRLF(String input) {
		return input.replaceAll("((\r\n)|\n)[\\s\t ]*", "").replaceAll(
				"^((\r\n)|\n)", "");
	}

	/**
	 * delete CRLF; delete empty line ;delete blank lines
	 * 
	 * @param input
	 * @return
	 */
	public static String deleteCRLF(String input) {
		input = SystemHWUtil.deleteCRLFOnce(input);
		return SystemHWUtil.deleteCRLFOnce(input);
	}

 见类com\common\util\SystemHWUtil.java

源代码见附件

你可能感兴趣的:(html,escape,过滤HTML,去掉html的所有标签,编码html,转义html标签)