Java源码阅读之String

Summary:

  • String中的字符串是存储在一个char[]数组中的;hash值的结果受char数组中的值影响;
  • 构造器参数如果不是String,则都是需要由系统生成一个数组;如果参数给定了一个char[]还会最终调用native方法System.arrayCopy();
  • length()、isEmpty()、charAt()等对当前String对象无任何影响的操作都是可以看成对char数组的操作
  • getChars、toCharArray、copyValueOf等最终都需要调用System.arrayCopy()方法;
  • 其它方法:
    • equals() {实现:两个char[]数据逐个比较}
    • replace() {实现:先拷贝char[]数据然后逐个检测替换}
    • indexOf() {实现:没有采用kmp算法,使用的是穷举法}
    • split() {实现:利用indexOf和substring方法配合使用}

Fields:

private final char value[];
private int hash; // Default to 0

Constructor:

//建立一个长度为0的数组
public String() {
        this.value = new char[0];
}
//改变了下成员变量的引用所指的对象而已没有新建对象
public String(String original) {
        this.value = original.value;
        this.hash = original.hash;
}
//调用Arrays.copyOf方法
//Arrays.copyOf方法最终调用java.lang.System.arraycopy方法;
//java.lang.System.arraycopy方法声明为public static native void arraycopy();
public String(char value[]) {
        this.value = java.util.Arrays.copyOf(value, value.length);
}
    public static char[] copyOf(char[] original, int newLength) {
        char[] copy = new char[newLength];
        System.arraycopy(original, 0, copy, 0,
                         Math.min(original.length, newLength));
        return copy;
    }

length():

//等同于返回数组的长度
public int length() {
    return value.length;
}   

isEmpty():

//等同于判断数组的长度是否为0
public boolean isEmpty() {
        return value.length == 0;
}

charAt():

//等同于数组的随机访问操作
public char charAt(int index) {
        if ((index < 0) || (index >= value.length)) {
            throw new StringIndexOutOfBoundsException(index);
        }
        return value[index];
}

getChars():

//调用System.arraycopy方法,参数为本String的char[]
void getChars(char dst[], int dstBegin) {
        System.arraycopy(value, 0, dst, dstBegin, value.length);
}

toCharArray():

//调用System.arraycopy方法,参数为本String的char[]
public char[] toCharArray() {
        // Cannot use Arrays.copyOf because of class initialization order issues
        char result[] = new char[value.length];
        System.arraycopy(value, 0, result, 0, value.length);
        return result;
}

copyValueOf():

//调用Sring构造器
public static String copyValueOf(char data[], int offset, int count) {
        return new String(data, offset, count);
}

equals():

//首先判断是否是同一个引用
//接着判断长度是否相同
//最后逐个比较char数组的字符是否相同
public boolean equals(Object anObject) {
        if (this == anObject) {
            return true;
        }
        if (anObject instanceof String) {
            String anotherString = (String)anObject;
            int n = value.length;
            if (n == anotherString.value.length) {
                char v1[] = value;
                char v2[] = anotherString.value;
                int i = 0;
                while (n-- != 0) {
                    if (v1[i] != v2[i])
                        return false;
                    i++;
                }
                return true;
            }
        }
        return false;
}

hashCode():

//在返回hash之前如果hash为0且char数组长度大于0则求出hash值之后再返回hash数值
public int hashCode() {
        int h = hash;
        if (h == 0 && value.length > 0) {
            char val[] = value;            
            for (int i = 0; i < value.length; i++) {
                h = 31 * h + val[i];
            }
            hash = h;
        }
        return h;
}

indexOf():

//该字符跟数组中的每个字符从左往右比较
//lastIndexOf一样只不过是从右往左比较
public int indexOf(int ch, int fromIndex) {
        final int max = value.length;
        if (fromIndex < 0) {
            fromIndex = 0;
        } else if (fromIndex >= max) {
            // Note: fromIndex might be near -1>>>1.
            return -1;
        }

        if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
            // handle most cases here (ch is a BMP code point or a
            // negative value (invalid code point))
            final char[] value = this.value;
            for (int i = fromIndex; i < max; i++) {
                if (value[i] == ch) {
                    return i;
                }
            }
            return -1;
        } else {
            return indexOfSupplementary(ch, fromIndex);
        }
}
//indexOf最终是调用下面的第二个static方法来进行求解的
//求解步骤大概是:
//首先搜索到第一个字符所在的位置,之后逐个比较;
//这里并没有使用kmp算法因此是一个可以优化的地方
public int indexOf(String str, int fromIndex) {
        return indexOf(value, 0, value.length,
                str.value, 0, str.value.length, fromIndex);
}
static int indexOf(char[] source, int sourceOffset, int sourceCount,
            char[] target, int targetOffset, int targetCount,
            int fromIndex) {
        if (fromIndex >= sourceCount) {
            return (targetCount == 0 ? sourceCount : -1);
        }
        if (fromIndex < 0) {
            fromIndex = 0;
        }
        if (targetCount == 0) {
            return fromIndex;
        }

        char first = target[targetOffset];
        int max = sourceOffset + (sourceCount - targetCount);

        for (int i = sourceOffset + fromIndex; i <= max; i++) {
            /* Look for first character. */
            if (source[i] != first) {
                while (++i <= max && source[i] != first);
            }

            /* Found first character, now look at the rest of v2 */
            if (i <= max) {
                int j = i + 1;
                int end = j + targetCount - 1;
                for (int k = targetOffset + 1; j < end && source[j]
                        == target[k]; j++, k++);

                if (j == end) {
                    /* Found whole string. */
                    return i - sourceOffset;
                }
            }
        }
        return -1;
}

contains():

//通过indexOf方法的返回值判断
public boolean contains(CharSequence s) {
        return indexOf(s.toString()) > -1;
}

substring():

//最终是调用new String构造器,构造器参考前面的叙述
public String substring(int beginIndex, int endIndex) {
        if (beginIndex < 0) {
            throw new StringIndexOutOfBoundsException(beginIndex);
        }
        if (endIndex > value.length) {
            throw new StringIndexOutOfBoundsException(endIndex);
        }
        int subLen = endIndex - beginIndex;
        if (subLen < 0) {
            throw new StringIndexOutOfBoundsException(subLen);
        }
        return ((beginIndex == 0) && (endIndex == value.length)) ? this
                : new String(value, beginIndex, subLen);
}

replace():

//通过拷贝原String中的数组数据,随后对新数组更新字符
//最后通过新数组构造一个String返回,原String对象等待被回收
public String replace(char oldChar, char newChar) {
        if (oldChar != newChar) {
            int len = value.length;
            int i = -1;
            char[] val = value; /* avoid getfield opcode */

            while (++i < len) {
                if (val[i] == oldChar) {
                    break;
                }
            }
            if (i < len) {
                char buf[] = new char[len];
                for (int j = 0; j < i; j++) {
                    buf[j] = val[j];
                }
                while (i < len) {
                    char c = val[i];
                    buf[i] = (c == oldChar) ? newChar : c;
                    i++;
                }
                return new String(buf, true);
            }
        }
        return this;
}

split():

//根据regex数据来将String划分成多个子串
//limit为-1则进行进行任意次比较,
//limit为0则进行进行任意次比较,但是会将最后长度为0的空串删除
//limit大于0则最进行最多limit-1次比较,返回子串个数不超过n;
public String[] split(String regex, int limit) {
        /* fastpath if the regex is a
         (1)one-char String and this character is not one of the
            RegEx's meta characters ".$|()[{^?*+\\", or
         (2)two-char String and the first char is the backslash and
            the second is not the ascii digit or ascii letter.
         */
        char ch = 0;
        if (    (
                    //regex长度为1,且这个字符不是正则表达式中的某个符号
                    //或者,长度为2且第一个字符还是'\'且第二个字符不是asci不是字母也不是数字
                    //(这里判断是否在某个区间的方法采用或运算,(data-low)|(data-high)<0则不在该范围内,这里或运算相当于是取最小值)
                    (regex.value.length == 1 &&".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
                    (    regex.length() == 2 && 
                        regex.charAt(0) == '\\' &&
                        (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
                        ((ch-'a')|('z'-ch)) < 0 &&
                        ((ch-'A')|('Z'-ch)) < 0
                    )
                ) &&
                (
                    //ch字符不在Character.MIN_HIGH_SURROGATE和Character.MAX_LOW_SURROGATE之间
                    ch < Character.MIN_HIGH_SURROGATE ||
                    ch > Character.MAX_LOW_SURROGATE
                )
           )
        {
            int off = 0;
            int next = 0;
            boolean limited = limit > 0;
            ArrayList<String> list = new ArrayList<>();
            while ((next = indexOf(ch, off)) != -1) {
                if (!limited || list.size() < limit - 1) {
                    list.add(substring(off, next));
                    off = next + 1;
                } else {    // last one
                    //assert (list.size() == limit - 1);
                    list.add(substring(off, value.length));
                    off = value.length;
                    break;
                }
            }
            // If no match was found, return this
            if (off == 0)
                return new String[]{this};

            // Add remaining segment
            if (!limited || list.size() < limit)
                list.add(substring(off, value.length));

            // Construct result
            int resultSize = list.size();
            if (limit == 0) {
                while (resultSize > 0 && list.get(resultSize - 1).length() == 0) {
                    resultSize--;
                }
            }
            String[] result = new String[resultSize];
            return list.subList(0, resultSize).toArray(result);
        }
        return Pattern.compile(regex).split(this, limit);
}


你可能感兴趣的:(java,String,OpenJDK,jdk源代码)