1.不支持添加重复元素
2.底层可以采用BST和LinkedList来实现
3.应用: 客户统计 ,词汇量统计
Set接口
public interface Set<E> {
void add(E e);
void remove(E e);
boolean contains(E e);
int getSize();
boolean isEmpty();
}
基于二叉搜索树的集合实现
public class BSTSet<E extends Comparable<E>> implements Set<E>{
private BST<E> bst;
public BSTSet(){
bst = new BST<>();
}
@Override
public void add(E e) {
bst.add(e);
}
@Override
public void remove(E e) {
bst.remove(e);
}
@Override
public boolean contains(E e) {
return bst.contains(e);
}
@Override
public int getSize() {
return bst.getSize();
}
@Override
public boolean isEmpty() {
return bst.isEmpty();
}
}
读取文本文件
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.Locale;
import java.io.File;
import java.io.BufferedInputStream;
import java.io.IOException;
// 文件相关操作
public class FileOperation {
// 读取文件名称为filename中的内容,并将其中包含的所有词语放进words中
public static boolean readFile(String filename, ArrayList<String> words){
if (filename == null || words == null){
System.out.println("filename is null or words is null");
return false;
}
// 文件读取
Scanner scanner;
try {
File file = new File(filename);
if(file.exists()){
FileInputStream fis = new FileInputStream(file);
scanner = new Scanner(new BufferedInputStream(fis), "UTF-8");
scanner.useLocale(Locale.ENGLISH);
}
else
return false;
}
catch(IOException ioe){
System.out.println("Cannot open " + filename);
return false;
}
// 简单分词
// 这个分词方式相对简陋, 没有考虑很多文本处理中的特殊问题
// 在这里只做demo展示用
if (scanner.hasNextLine()) {
String contents = scanner.useDelimiter("\\A").next();
int start = firstCharacterIndex(contents, 0);
for (int i = start + 1; i <= contents.length(); )
if (i == contents.length() || !Character.isLetter(contents.charAt(i))) {
String word = contents.substring(start, i).toLowerCase();
words.add(word);
start = firstCharacterIndex(contents, i);
i = start + 1;
} else
i++;
}
return true;
}
// 寻找字符串s中,从start的位置开始的第一个字母字符的位置
private static int firstCharacterIndex(String s, int start){
for( int i = start ; i < s.length() ; i ++ )
if( Character.isLetter(s.charAt(i)) )
return i;
return s.length();
}
}
检测文本中多少个不同的单词
public class Main {
public static void main(String[] args) {
System.out.println("Pride and Prejudice");
ArrayList<String> words1 = new ArrayList<>();
if(FileOperation.readFile("/Users/apple/Desktop/AL/Set/src/pride-and-prejudice.txt", words1)) {
System.out.println("Total words: " + words1.size());
BSTSet<String> set1 = new BSTSet<>();
for (String word : words1)
set1.add(word);
System.out.println("Total different words: " + set1.getSize());
}
System.out.println();
System.out.println("A Tale of Two Cities");
ArrayList<String> words2 = new ArrayList<>();
if(FileOperation.readFile("/Users/apple/Desktop/AL/Set/src/a-tale-of-two-cities.txt", words2)){
System.out.println("Total words: " + words2.size());
BSTSet<String> set2 = new BSTSet<>();
for(String word: words2)
set2.add(word);
System.out.println("Total different words: " + set2.getSize());
}
}
}
处理速度比bst实现的要慢很多
import java.util.ArrayList;
public class LinkedListSet<E> implements Set<E> {
private LinkedList<E> list ;
public LinkedListSet(){
list = new LinkedList<>();
}
@Override
public void add(E e) {
if (!list.contains(e))
list.addFirst(e);
}
@Override
public void remove(E e) {
list.removeElement(e);
}
@Override
public boolean contains(E e) {
return list.contains(e);
}
@Override
public int getSize() {
return list.getSize();
}
@Override
public boolean isEmpty() {
return list.isEmpty();
}
public static void main(String[] args) {
System.out.println("Pride and Prejudice");
ArrayList<String> words1 = new ArrayList<>();
if(FileOperation.readFile("/Users/apple/Desktop/AL/Set/src/pride-and-prejudice.txt", words1)) {
System.out.println("Total words: " + words1.size());
LinkedListSet<String> set1 = new LinkedListSet<>();
for (String word : words1)
set1.add(word);
System.out.println("Total different words: " + set1.getSize());
}
System.out.println();
System.out.println("A Tale of Two Cities");
ArrayList<String> words2 = new ArrayList<>();
if(FileOperation.readFile("/Users/apple/Desktop/AL/Set/src/a-tale-of-two-cities.txt", words2)){
System.out.println("Total words: " + words2.size());
LinkedListSet<String> set2 = new LinkedListSet<>();
for(String word: words2)
set2.add(word);
System.out.println("Total different words: " + set2.getSize());
}
}
}
基于搜索树实现的集合是有序集合
基于hash实现的集合是无序集合,速度上更快
支持重复元素
底层可以用BST和LinkedList来实现
应用: 统计词频 , 统计每个人总得分等
Map
public interface Map<K,V> {
void add(K key, V value);
V remove(K key);
boolean contains(K key);
V get(K key);
void set(K key,V newValue);
int getSize();
boolean isEmpty();
}
基于LinkedList的Map实现
public class LinkedListMap<K, V> implements Map<K, V> {
private class Node{
public K key;
public V value;
public Node next;
public Node(K key,V value,Node next){
this.key = key;
this.value = value;
this.next = next;
}
public Node(K key){
this(key,null,null);
}
public Node(){
this(null,null,null);
}
@Override
public String toString() {
return key.toString()+" : " + value.toString();
}
}
private Node dummyHead;
private int size;
public LinkedListMap(){
dummyHead = new Node();
size = 0;
}
private Node getNode(K key){
Node res = dummyHead.next;
while (res!=null){
if (res.key.equals(key))
return res;
res = res.next;
}
return null;
}
@Override
public void add(K key, V value) {
Node node = getNode(key);
if (node == null){
dummyHead.next = new Node(key, value, dummyHead.next);
size++;
}
else {
node.value = value;
}
}
@Override
public V remove(K key) {
Node prev = dummyHead;
while (prev.next != null){
if (prev.next.key.equals(key))
break;
prev = prev.next;
}
if (prev.next != null){
Node delNode = prev.next;
prev.next = prev.next.next;
delNode.next = null;
size--;
return delNode.value;
}
return null;
}
@Override
public boolean contains(K key) {
return getNode(key) != null;
}
@Override
public V get(K key) {
Node node = getNode(key);
return getNode(key) == null ? null : node.value;
}
@Override
public void set(K key, V newValue) {
Node node = getNode(key);
if (node != null)
node.value = newValue;
else
throw new IllegalArgumentException("key no extens");
}
@Override
public int getSize() {
return size;
}
@Override
public boolean isEmpty() {
return size==0;
}
}
基于BST实现的映射
import java.util.ArrayList;
public class BSTMap<K extends Comparable<K>, V> implements Map<K, V> {
private class Node{
public K key;
public V value;
public Node left, right;
public Node(K key, V value){
this.key = key;
this.value = value;
left = null;
right = null;
}
}
private Node root;
private int size;
public BSTMap(){
root = null;
size = 0;
}
@Override
public int getSize(){
return size;
}
@Override
public boolean isEmpty(){
return size == 0;
}
// 向二分搜索树中添加新的元素(key, value)
@Override
public void add(K key, V value){
root = add(root, key, value);
}
// 向以node为根的二分搜索树中插入元素(key, value),递归算法
// 返回插入新节点后二分搜索树的根
private Node add(Node node, K key, V value){
if(node == null){
size ++;
return new Node(key, value);
}
if(key.compareTo(node.key) < 0)
node.left = add(node.left, key, value);
else if(key.compareTo(node.key) > 0)
node.right = add(node.right, key, value);
else // key.compareTo(node.key) == 0
node.value = value;
return node;
}
// 返回以node为根节点的二分搜索树中,key所在的节点
private Node getNode(Node node, K key){
if(node == null)
return null;
if(key.equals(node.key))
return node;
else if(key.compareTo(node.key) < 0)
return getNode(node.left, key);
else // if(key.compareTo(node.key) > 0)
return getNode(node.right, key);
}
@Override
public boolean contains(K key){
return getNode(root, key) != null;
}
@Override
public V get(K key){
Node node = getNode(root, key);
return node == null ? null : node.value;
}
@Override
public void set(K key, V newValue){
Node node = getNode(root, key);
if(node == null)
throw new IllegalArgumentException(key + " doesn't exist!");
node.value = newValue;
}
// 返回以node为根的二分搜索树的最小值所在的节点
private Node minimum(Node node){
if(node.left == null)
return node;
return minimum(node.left);
}
// 删除掉以node为根的二分搜索树中的最小节点
// 返回删除节点后新的二分搜索树的根
private Node removeMin(Node node){
if(node.left == null){
Node rightNode = node.right;
node.right = null;
size --;
return rightNode;
}
node.left = removeMin(node.left);
return node;
}
// 从二分搜索树中删除键为key的节点
@Override
public V remove(K key){
Node node = getNode(root, key);
if(node != null){
root = remove(root, key);
return node.value;
}
return null;
}
private Node remove(Node node, K key){
if( node == null )
return null;
if( key.compareTo(node.key) < 0 ){
node.left = remove(node.left , key);
return node;
}
else if(key.compareTo(node.key) > 0 ){
node.right = remove(node.right, key);
return node;
}
else{ // key.compareTo(node.key) == 0
// 待删除节点左子树为空的情况
if(node.left == null){
Node rightNode = node.right;
node.right = null;
size --;
return rightNode;
}
// 待删除节点右子树为空的情况
if(node.right == null){
Node leftNode = node.left;
node.left = null;
size --;
return leftNode;
}
// 待删除节点左右子树均不为空的情况
// 找到比待删除节点大的最小节点, 即待删除节点右子树的最小节点
// 用这个节点顶替待删除节点的位置
Node successor = minimum(node.right);
successor.right = removeMin(node.right);
successor.left = node.left;
node.left = node.right = null;
return successor;
}
}
public static void main(String[] args){
System.out.println("Pride and Prejudice");
ArrayList<String> words = new ArrayList<>();
if(FileOperation.readFile("pride-and-prejudice.txt", words)) {
System.out.println("Total words: " + words.size());
BSTMap<String, Integer> map = new BSTMap<>();
for (String word : words) {
if (map.contains(word))
map.set(word, map.get(word) + 1);
else
map.add(word, 1);
}
System.out.println("Total different words: " + map.getSize());
System.out.println("Frequency of PRIDE: " + map.get("pride"));
System.out.println("Frequency of PREJUDICE: " + map.get("prejudice"));
}
System.out.println();
}
}