String是平常使用最多的结构之一,String提供来很多方便的接口来处理字符串,同时其支持正则表达式,所以字符串处理能力很强,下面我们简单介绍下String的内部实现,重点关注下常用的一些方法实现,String的一些方法涉及到字符编码,这里不介绍字符编码相关的。
//String类是final的,即不可继承 public final class String implements java.io.Serializable, Comparable<String>, CharSequence { private final char value[];//内部存储结构,即以数组来存储其内容 private int hash;//标识一个字符串 //实现空字符串 public String() { this.value = new char[0]; } //用字符串构造字符串的 public String(String original) { this.value = original.value; this.hash = original.hash; } //用字符数组构造字符串 public String(char value[]) { this.value = Arrays.copyOf(value, value.length); } //如果以字节数组构造,则涉及到字符编码的问题 public String(byte bytes[], Charset charset) { this(bytes, 0, bytes.length, charset); } //获取长度信息,因为数组提供了长度信息,所以直接获取,否则可以提供一个长度信息 public int length() { return value.length; } //判断是否为空 public boolean isEmpty() { return value.length == 0; } //String也不是完全不可变的,这里可以直接修改内容 public char charAt(int index) { if ((index < 0) || (index >= value.length)) { throw new StringIndexOutOfBoundsException(index); } return value[index]; } //比较两字符串 public boolean equals(Object anObject) { if (this == anObject) {//判断是否指向同一个对象 return true; } if (anObject instanceof String) {//判断是否是String类的对象 String anotherString = (String) anObject; int n = value.length;//判断长度信息 if (n == anotherString.value.length) { char v1[] = value; char v2[] = anotherString.value; int i = 0; while (n-- != 0) { if (v1[i] != v2[i])//判断内容 return false; i++; } return true; } } return false; } //比较两字符串的大小,比较过程和equals类似 public int compareTo(String anotherString) { int len1 = value.length; int len2 = anotherString.value.length; int lim = Math.min(len1, len2); char v1[] = value; char v2[] = anotherString.value; int k = 0; while (k < lim) { char c1 = v1[k]; char c2 = v2[k]; if (c1 != c2) { return c1 - c2; } k++; } return len1 - len2; } //忽略大小写的一种判断方法 public int compare(String s1, String s2) { int n1 = s1.length(); int n2 = s2.length(); int min = Math.min(n1, n2); for (int i = 0; i < min; i++) { char c1 = s1.charAt(i); char c2 = s2.charAt(i); if (c1 != c2) {//判断是否相等 c1 = Character.toUpperCase(c1);//如果不相等,则都转换成大写进行判断 c2 = Character.toUpperCase(c2); if (c1 != c2) { c1 = Character.toLowerCase(c1);//转换成小写判断 c2 = Character.toLowerCase(c2); if (c1 != c2) { return c1 - c2; } } } } return n1 - n2; } //比较两字符串是否相等 public boolean regionMatches(int toffset, String other, int ooffset, int len) { char ta[] = value; int to = toffset; char pa[] = other.value; int po = ooffset; if ((ooffset < 0) || (toffset < 0) //判断参数有效性 || (toffset > (long)value.length - len) || (ooffset > (long)other.value.length - len)) { return false; } while (len-- > 0) { //比较内容 if (ta[to++] != pa[po++]) { return false; } } return true; } //判断是否以字串开始 public boolean startsWith(String prefix, int toffset) { char ta[] = value; int to = toffset; char pa[] = prefix.value; int po = 0; int pc = prefix.value.length; if ((toffset < 0) || (toffset > value.length - pc)) {//判断参数有效性 return false; } while (--pc >= 0) { //比较内容 if (ta[to++] != pa[po++]) { return false; } } return true; } //startWith的逆操作 public boolean endsWith(String suffix) { return startsWith(suffix, value.length - suffix.value.length); } //查找字符 public int indexOf(int ch, int fromIndex) { final int max = value.length; if (fromIndex < 0) {//参数有效性判断 fromIndex = 0; } else if (fromIndex >= max) { return -1; } //if是执行字符有效性判断 if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) { final char[] value = this.value;//获取字符串内容 for (int i = fromIndex; i < max; i++) { if (value[i] == ch) {//比较内容 return i; } } return -1; } else { return indexOfSupplementary(ch, fromIndex); } } //删除空格 public String trim() { int len = value.length; int st = 0; char[] val = value; while ((st < len) && (val[st] <= ' ')) {//删除头部空格 st++; } while ((st < len) && (val[len - 1] <= ' ')) {//删除尾部空格 len--; } return ((st > 0) || (len < value.length)) ? substring(st, len) : this;//重新构造字符串 } //返回字符数组 public char[] toCharArray() { char result[] = new char[value.length]; System.arraycopy(value, 0, result, 0, value.length);//执行内存copy return result; } //执行字符串分割操作 public String[] split(String regex, int limit) { char ch = 0; //如果分割符是这些字符,则自行进行分割 if (((regex.value.length == 1 && ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) || (regex.length() == 2 && regex.charAt(0) == '\\' && (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 && ((ch-'a')|('z'-ch)) < 0 && ((ch-'A')|('Z'-ch)) < 0)) && (ch < Character.MIN_HIGH_SURROGATE || ch > Character.MAX_LOW_SURROGATE)) { int off = 0; int next = 0; boolean limited = limit > 0; ArrayList<String> list = new ArrayList<>();//借助链表来存储分割的元素 while ((next = indexOf(ch, off)) != -1) {//定位元素 if (!limited || list.size() < limit - 1) { list.add(substring(off, next));//从主串里面substring分割元素 off = next + 1; } else {//判断模式是否启用,而且已经使用的次数大于limit list.add(substring(off, value.length)); off = value.length; break; } } //没有该字符,则返回完整的串 if (off == 0) return new String[]{this}; //模式阀值未超过,则添加剩余的串 if (!limited || list.size() < limit) list.add(substring(off, value.length)); int resultSize = list.size(); if (limit == 0) while (resultSize > 0 && list.get(resultSize - 1).length() == 0) resultSize--; String[] result = new String[resultSize]; return list.subList(0, resultSize).toArray(result);//返回字符数组 } //否则直接调用正则表达式进行分割 return Pattern.compile(regex).split(this, limit); }