关于实现二分搜索树,可以看前面的文章
手敲数据结构——二分搜索树
Map接口
public interface Map {
void put(K key,V value);
V remove(K key);
boolean contains(K key);
V get(K key);
void set(K key,V newValue);
int getSize();
boolean isEmpty();
}
实现代码
public class BSTMap, V> implements Map {
private class Node {
public K key;
public V value;
public Node left, right;
public Node(K key, V value) {
this.key = key;
this.value = value;
left = null;
right = null;
}
}
private Node root;
private int size;
public BSTMap() {
root = null;
size = 0;
}
@Override
public void put(K key, V value) {
root = add(root, key, value);
}
private Node add(Node node, K key, V value) {
if (node == null) {
size++;
return new Node(key, value);
}
if (key.compareTo(node.key) < 0) {
node.left = add(node.left, key, value);
} else if (key.compareTo(node.key) > 0) {
node.right = add(node.right, key, value);
} else {
node.value = value;
}
return node;
}
//返回以node为根节点的二分搜索树中,Key所在的节点
private Node getNode(Node node, K key) {
if (node == null) {
return null;
}
if (key.compareTo(node.key) < 0) {
return getNode(node.left, key);
} else if (key.compareTo(node.key) > 0) {
return getNode(node.right, key);
} else {
return node;
}
}
@Override
public V remove(K key) {
Node node = getNode(root, key);
if (node == null) return null;
root = remove(root,key);
return node.value;
}
private Node remove(Node node, K key) {
if (node == null) return null;
if (key.compareTo(node.key) < 0) {
node.left = remove(node.left, key) ;
return node;
} else if (key.compareTo(node.key) > 0) {
node.right = remove(node.right, key);
return node;
} else {
if (node.left == null) {
Node right = node.right;
node.right = null;
size--;
return right;
} else if (node.right == null) {
Node left = node.left;
node.left = null;
size--;
return left;
} else {
//找到右子树的最小节点
Node successor = minimum(node.right);
successor.right = removeMin(node.right);
successor.left = node.left;
node.left = node.right = null;
return successor;
}
}
}
// 返回以node为根的二分搜索树的最小值所在的节点
private Node minimum(Node node){
if(node.left == null)
return node;
return minimum(node.left);
}
// 寻找二分搜索树的最小元素
public V minimum(){
if(size == 0)
throw new IllegalArgumentException("BST is empty!");
return minimum(root).value;
}
// 从二分搜索树中删除最小值所在节点, 返回最小值
public V removeMin(){
V ret = minimum();
root = removeMin(root);
return ret;
}
// 删除掉以node为根的二分搜索树中的最小节点
// 返回删除节点后新的二分搜索树的根
private Node removeMin(Node node){
if(node.left == null){
Node rightNode = node.right;
node.right = null;
size --;
return rightNode;
}
node.left = removeMin(node.left);
return node;
}
@Override
public boolean contains(K key) {
return getNode(root, key) != null;
}
@Override
public V get(K key) {
Node node = getNode(root, key);
if (node == null) return null;
return node.value;
}
@Override
public void set(K key, V newValue) {
Node node = getNode(root, key);
if (node == null)
throw new IllegalArgumentException("key doesn't exist");
node.value = newValue;
}
@Override
public int getSize() {
return size;
}
@Override
public boolean isEmpty() {
return size == 0;
}
}
测试用例
统计《傲慢与偏见》的不重复单词的个数,以及某个单词出现的频率。
public static void main(String[] args) {
System.out.println("Pride and Prejudice");
ArrayList words1 = new ArrayList<>();
if(FileOperation.readFile("pride-and-prejudice.txt", words1)) {
System.out.println("Total words: " + words1.size());
LinkedListMap map = new LinkedListMap<>();
for (String word : words1){
if (map.contains(word))
map.set(word,map.get(word)+1);
else
map.put(word,1);
}
System.out.println("Total different words: " + map.getSize());
System.out.println("Frequency of PRIDE: " + map.get("pride"));
System.out.println("Frequency of PREJUDICE: " + map.get("prejudice"));
}
System.out.println();
}
输出,如下:
Pride and Prejudice
Total words: 125901
Total different words: 6530
Frequency of PRIDE: 53
Frequency of PREJUDICE: 11
其中的FileOperation就是读取文件,将单词放到list中,如下:
// 文件相关操作
public class FileOperation {
// 读取文件名称为filename中的内容,并将其中包含的所有词语放进words中
public static boolean readFile(String filename, ArrayList words){
if (filename == null || words == null){
System.out.println("filename is null or words is null");
return false;
}
// 文件读取
Scanner scanner;
try {
File file = new File(filename);
if(file.exists()){
FileInputStream fis = new FileInputStream(file);
scanner = new Scanner(new BufferedInputStream(fis), "UTF-8");
scanner.useLocale(Locale.ENGLISH);
}
else
return false;
}
catch(IOException ioe){
System.out.println("Cannot open " + filename);
return false;
}
// 简单分词
// 这个分词方式相对简陋, 没有考虑很多文本处理中的特殊问题
// 在这里只做demo展示用
if (scanner.hasNextLine()) {
String contents = scanner.useDelimiter("\\A").next();
int start = firstCharacterIndex(contents, 0);
for (int i = start + 1; i <= contents.length(); )
if (i == contents.length() || !Character.isLetter(contents.charAt(i))) {
String word = contents.substring(start, i).toLowerCase();
words.add(word);
start = firstCharacterIndex(contents, i);
i = start + 1;
} else
i++;
}
return true;
}
// 寻找字符串s中,从start的位置开始的第一个字母字符的位置
private static int firstCharacterIndex(String s, int start){
for( int i = start ; i < s.length() ; i ++ )
if( Character.isLetter(s.charAt(i)) )
return i;
return s.length();
}
}