时间复杂度O(1)的高效敏感词过滤算法,AC算法的JAVA实现

如果有人感兴趣的话,再具体讲解思路吧!

import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;

/**
 * created by jiayaoguang at 2017年9月28日 敏感词过滤
 */
public class ACFind {

	private final TrieNode root;

	public static void main(String[] args) {

		String[] keywords = new String[] { "人能世界那1大那大好人坏人1", "世界那1大那大好人坏人1", "能世界那1大那大好人坏人1", "界那1大那大好人坏人1",
				"那1大那大好人坏人1", "1大那大好人坏人", "坏1人", "世界那大", "世界那么大" };
		ACFind find = new ACFind(keywords);
		int index = find.find("人能世界那1大那大好人坏人");
		System.out.println(index);

	}

	public ACFind(String[] keywords) {
		root = buildTree(keywords);
		// printTree(root);
		addFailNode(root);
	}

	/**
	 * 查找是否包含目标字符串
	 */
	public int find(String text) {
		int len = text.length();
		TrieNode node = root;

		for (int index = 0; index < len; index++) {
			char c = text.charAt(index);

			while (node != null && node.getSonNode(c) == null) {
				node = node.getFailNode();
			}

			node = (node == null ? root : node.getSonNode(c));

			TrieNode temp = node;

			while (temp != null) {
				if (temp.isWordEnd()) {
					return index;
				}
				temp = temp.getFailNode();
			}
		}
		return -1;
	}

	/**
	 * 初始化字典树
	 */
	private TrieNode buildTree(String[] keywords) {
		final TrieNode root = new TrieNode(' ');
		for (String word : keywords) {
			TrieNode temp = root;
			for (char ch : word.toCharArray()) {
				if (temp.containsSonNode(ch)) {
					temp = temp.getSonNode(ch);
				} else {
					TrieNode newNode = new TrieNode(ch);
					temp.addSonNode(newNode);
					temp = newNode;
				}
			}
			temp.setWordEnd(true);
		}
		return root;
	}

	/**
	 * 添加节点
	 */
	private void addWordToTree(TrieNode rootNode, String word) {
		if (word == null || word.length() == 0) {
			return;
		}
		for (char c : word.toCharArray()) {
			if (rootNode.containsSonNode(c)) {
				rootNode = rootNode.getSonNode(c);
			} else {
				TrieNode newNode = new TrieNode(c);
				rootNode.addSonNode(newNode);
				rootNode = newNode;
			}
		}
		rootNode.setWordEnd(true);
	}

	public void printTree(TrieNode root) {
		Queue queue = new LinkedList<>();
		queue.offer(root);
		TrieNode enterNode = new TrieNode('\n');
		queue.add(enterNode);
		while (!queue.isEmpty()) {
			TrieNode parent = queue.poll();
			System.out.print(parent.value + ";");
			if (parent == enterNode && queue.size() > 1) {
				queue.offer(enterNode);
				continue;
			}
			queue.addAll(parent.getSonsNode());
		}

	}

	/**
	 * BFS遍历树,给每一个节点建立FailNode
	 */
	private void addFailNode(final TrieNode root) {
		root.setFailNode(null);
		Queue queue = new LinkedList<>();
		queue.add(root);
		while (!queue.isEmpty()) {
			TrieNode parent = queue.poll();
			TrieNode temp;
			for (TrieNode child : parent.getSonsNode()) {
				if (parent == root) {
					child.setFailNode(root);
				} else {
					temp = parent.getFailNode();
					while (temp != null) {

						TrieNode node = temp.getSonNode(child.getValue());
						if (node != null) {
							child.setFailNode(node);
							break;
						}
						temp = temp.getFailNode();
					}
					if (temp == null) {
						child.setFailNode(root);
					}
				}
				queue.add(child);
			}
		}
	}

	class TrieNode {

		private TrieNode failNode;

		private char value;

		private boolean isWordEnd = false;

		private Map sons;

		public TrieNode(char value) {

			this.value = value;
			sons = new HashMap();
		}

		// 添加子节点
		public void addSonNode(TrieNode node) {
			sons.put(node.value, node);
		}

		// 获取子节点中指定字符节点
		public TrieNode getSonNode(char ch) {
			return sons.get(ch);
		}

		// 判断子节点中是否存在该字符
		public boolean containsSonNode(char ch) {
			return sons.containsKey(ch);
		}

		// 获取字符
		public char getValue() {
			return value;
		}

		// 设置失败指针并且返回
		public void setFailNode(TrieNode failNode) {
			this.failNode = failNode;
		}

		public TrieNode getFailNode() {
			return failNode;
		}

		// 获取所有的孩子节点
		public Collection getSonsNode() {
			return sons.values();
		}

		public boolean isWordEnd() {
			return isWordEnd;
		}

		public void setWordEnd(boolean isWordEnd) {
			this.isWordEnd = isWordEnd;
		}

	}
}

你可能感兴趣的:(数据结构与算法)