String源码阅读笔记

一、String内部的实际的数据存储结构是char数组,源码(1.8)

public final class String implements java.io.Serializable, Comparable, CharSequence{

 //用于存储字符串值

     /** The value is used for character storage. */

private final charvalue[];


//缓存字符串的哈希值

    /** Cache the hash code for the string */

private inthash; // Default to 0

}

二、String 五类构造函数:

public String(byte[] bytes) {

    this(bytes, 0, bytes.length);

}

public String(char[] value) {

    this.value = Arrays.copyOf(value, value, length);

}

public String(String original) {

    this.value = original.value;

    this.hash = original.hash;

}

public String(StringBuffer buffer) {

    synchronized (buffer) {

        this.value = Arrays.copyOf(buffer.getValue(), buffer.length());

    }

}

public String StringBuilder builder) {

    this.value = Arrays.copyOf(builder.getValue(), builder.length());

}

三、equals方法:

public boolean equals(Object anObject) {

//对象与自身引用相同直接返回true

    if (this == anObject) {

        return true;

    }

//对象如果不为String类型,直接返回false

    if (anObject instanceof String) {

        String anotherString = (String)anObject;

        int n = value.length;

//如果对象的长度与本身的长度不一样,直接返回false

        if (n == anotherString.value.length) {

            char v1[] = value;

            char v2[] = anotherString.value;

            int i = 0;

//循环每个字符进行对比,完全相同则true,否则为false

            while (n-- != 0) {

                if (v1[i] != v2[i])

                    return false;

                i++;

            }

            return true;

        }

    }

    return false;

}

四、compareTo方法:

public int compareTo(String anotherString) {

    int len1 = value.length;

    int len2 = anotherString.value.length;

    int lim = Math.min(len1, len2);

    char v1[] = value;

    char v2[] = anotherString.value;

    int k = 0;

//从0开始,到长度最短的长度,逐字符比较

    while (k < lim) {

        char c1 = v1[k];

        char c2 = v2[k];

//当两字符不相等时,用自身当前对比位置的字符ascii码-被比较字符串当前位置字符的ascii码

        if (c1 != c2) {

            return c1 - c2;

        }

        k++;

    }

//如果短的完全包含在长的字符串中,则直接用本身的长度-被比较字符串的长度

    return len1 - len2;

}

五、indexOf方法:

public int indexOf(String str) {

    return indexOf(str, 0);

}

//重载的方法

public int indexOf(String str, int fromIndex) {

return indexOf(value, 0, value.length,str.value, 0, str.value.length, fromIndex);

}

//内部实际方法,仅供包内访问

staticint indexOf(char[] source, int sourceOffset, int sourceCount,char[] target, int targetOffset, int targetCount, int fromIndex) {

//如果开始下标大于等于源字符串总长度时,查找字符长度为0时返回源字符串长度,否则返回-1

    if (fromIndex >= sourceCount) {

        return (targetCount == 0 ? sourceCount : -1);

    }

    //开始下标小于0,则强制从0开始

    if (fromIndex < 0) {

        fromIndex = 0;

    }

//查找字符串长度为0,则返回开始下标

    if (targetCount == 0) {

        return fromIndex;

    }

//通过查找目标偏移位置找出第一个字符

    char first = target[targetOffset];

//最大下标 = 源偏移位置 + (源长度 - 目标长度)

    int max = sourceOffset + (sourceCount - targetCount);


//从起始下标开始到最大下标逐个字符查找

    for (int i = sourceOffset + fromIndex; i <= max; i++) {

//起始下标不是第一个要找的字符,则先找到第一个字符

        /* Look for first character. */

        if (source[i] != first) {

            while (++i <= max && source[i] != first);

        }


//找到第一个字符后,开始找剩下的字符

        /* Found first character, now look at the rest of v2 */

        if (i <= max) {

            int j = i + 1;

            int end = j + targetCount - 1;

for (int k = targetOffset + 1; j < end && source[j]  == target[k]; j++, k++);

            if (j == end) {

                /* Found whole string. */

                return i - sourceOffset;

            }

        }

    }

    return -1;

}

六、contains方法:

public boolean contains(CharSequence s) {

//调用indexOf方法,如果>-1表示包含,否则不包含

    return indexOf(s.toString()) > -1;

}

七、toLowerCase方法:

public String toLowerCase(Locale locale) {

//默认为 zh_CN    

if (locale == null) {

        throw new NullPointerException();

    }

    int firstUpper;

    final int len = value.length;

//从第一个字符开始判断是否本身就为小写字符,把开头的小写字符串直接拷到目标result字符数组中

    /* Now check if there are any characters that need to be changed. */

    scan: {

        for (firstUpper = 0 ; firstUpper < len; ) {

            char c = value[firstUpper];

//判断字符是否在高代理区间, 常规字母不在高代理区间

            if ((c >= Character.MIN_HIGH_SURROGATE)

                    && (c <= Character.MAX_HIGH_SURROGATE)) {

                int supplChar = codePointAt(firstUpper);

                if (supplChar != Character.toLowerCase(supplChar)) {

                    break scan;

                }

                firstUpper += Character.charCount(supplChar);

            } else {


                if (c != Character.toLowerCase(c)) {

                    break scan;

                }

                firstUpper++;

            }

        }

        return this;

    }

    char[] result = new char[len];

    int resultOffset = 0;  /* result may grow, so i+resultOffset

                            * is the write location in result */

    /* Just copy the first few lowerCase characters. */

    System.arraycopy(value, 0, result, 0, firstUpper);

    String lang = locale.getLanguage();

    boolean localeDependent =

            (lang == "tr" || lang == "az" || lang == "lt");

    char[] lowerCharArray;

    int lowerChar;

    int srcChar;

    int srcCount;

    for (int i = firstUpper; i < len; i += srcCount) {

        srcChar = (int)value[i];

        if ((char)srcChar >= Character.MIN_HIGH_SURROGATE

                && (char)srcChar <= Character.MAX_HIGH_SURROGATE) {

            srcChar = codePointAt(i);

            srcCount = Character.charCount(srcChar);

        } else {

            srcCount = 1;

        }

        if (localeDependent ||

            srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA

            srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE

            lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);

        } else {

//调用Character的toLowerCase方法转换为小写字符

            lowerChar = Character.toLowerCase(srcChar);

        }

        if ((lowerChar == Character.ERROR)

                || (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {

            if (lowerChar == Character.ERROR) {

                lowerCharArray =

                        ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);

            } else if (srcCount == 2) {

                resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;

                continue;

            } else {

                lowerCharArray = Character.toChars(lowerChar);

            }

            /* Grow result if needed */

            int mapLen = lowerCharArray.length;

            if (mapLen > srcCount) {

                char[] result2 = new char[result.length + mapLen - srcCount];

                System.arraycopy(result, 0, result2, 0, i + resultOffset);

                result = result2;

            }

            for (int x = 0; x < mapLen; ++x) {

                result[i + resultOffset + x] = lowerCharArray[x];

            }

            resultOffset += (mapLen - srcCount);

        } else {

            result[i + resultOffset] = (char)lowerChar;

        }

    }

    return new String(result, 0, len + resultOffset);

}

八、length()方法:

public int length() {

//返回字符数组的长度,也就是多少个字符

    return value.length;

}

九、trim()方法:

public String trim() {

    int len = value.length;

    int st = 0;

    char[] val = value;    /* avoid getfield opcode */

//从0开始判断字符ascii是否小于等于空格字符

    while ((st < len) && (val[st] <= ' ')) {

        st++;

    }

//从最后一个字符开始判断字符ascii是否小于等于空格字符

    while ((st < len) && (val[len - 1] <= ' ')) {

        len--;

    }

//如果前或后存在空白字符,则substring截取中间非空白字符串

    return ((st > 0) || (len < value.length)) ? substring(st, len) : this;

}

十、replace()方法:

public String replace(char oldChar, char newChar) {

//若旧字符与新字符相同,直接返回原始字符串

    if (oldChar != newChar) {

        int len = value.length;

        int i = -1;

        char[] val = value; /* avoid getfield opcode */

//找到第一个需要替换的字符,因为不用变的字符可以直接拷贝到暂存字符数组

        while (++i < len) {

            if (val[i] == oldChar) {

                break;

            }

        }

        if (i < len) {

            char buf[] = new char[len];

            for (int j = 0; j < i; j++) {

                buf[j] = val[j];

            }

//从第1个需要替换字符开始到最后个字符,通过判断替换赋值给暂存字符数组

            while (i < len) {

                char c = val[i];

                buf[i] = (c == oldChar) ? newChar : c;

                i++;

            }

  //返回一个新的字符串

            return new String(buf, true);

        }

    }

    return this;

}

//当仅替换单个字符,一定要使用上面的方法,否则性能相差10位,2毫秒与0.2毫秒的区别

public String replace(CharSequence target, CharSequence replacement) {

//通过正则表达式替换

returnPattern.compile(target.toString(), Pattern.LITERAL).matcher(

            this).replaceAll(Matcher.quoteReplacement(replacement.toString()));

}

//字符串替换,replaceAll 比 replace高

public String replaceAll(String regex, String replacement) {

returnPattern.compile(regex).matcher(this).replaceAll(replacement);

}

十一、split方法:

public String[] split(String regex) {

    return split(regex, 0);

}

public String[] split(String regex, int limit) {

    /* fastpath if the regex is a

     (1)one-char String and this character is not one of the

        RegEx's meta characters ".$|()[{^?*+\\", or

     (2)two-char String and the first char is the backslash and

        the second is not the ascii digit or ascii letter.

     */

    char ch = 0;

    if (((regex.value.length == 1 &&

         ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||

         (regex.length() == 2 &&

          regex.charAt(0) == '\\' &&

          (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&

          ((ch-'a')|('z'-ch)) < 0 &&

          ((ch-'A')|('Z'-ch)) < 0)) &&

        (ch < Character.MIN_HIGH_SURROGATE ||

         ch > Character.MAX_LOW_SURROGATE))

    {

        int off = 0;

        int next = 0;

        boolean limited = limit > 0;

//通过ArrayList暂存分隔后的字符串

        ArrayList list = new ArrayList<>();

        while ((next = indexOf(ch, off)) != -1) {

            if (!limited || list.size() < limit - 1) {

                list.add(substring(off, next));

                off = next + 1;

            } else {    // last one

                //assert (list.size() == limit - 1);

                list.add(substring(off, value.length));

                off = value.length;

                break;

            }

        }

//如果没有找到分隔符,则返回原始字符串

        // If no match was found, return this

        if (off == 0)

            return new String[]{this};

        // Add remaining segment

        if (!limited || list.size() < limit)

            list.add(substring(off, value.length));

        // Construct result

        int resultSize = list.size();

//如果limit参数为0,当分隔后最后一个字符串长度为0,则忽略掉

        if (limit == 0) {

            while (resultSize > 0 && list.get(resultSize - 1).length() == 0) {

                resultSize--;

            }

        }

        String[] result = new String[resultSize];

        return list.subList(0, resultSize).toArray(result);

    }

//当分隔表达式不是一个字符或正则表达式开头,则直接使用Pattern的split方法进行分隔

returnPattern.compile(regex).split(this, limit);

}

十二、startWith方法:

public boolean startsWith(String prefix, int toffset) {

    char ta[] = value;

    int to = toffset;

    char pa[] = prefix.value;

    int po = 0;

    int pc = prefix.value.length;

    // Note: toffset might be near -1>>>1.

    if ((toffset < 0) || (toffset > value.length - pc)) {

        return false;

    }

//循环判断前缀字符个数次,跳过toffset个字符之后,挨个prefix字符与字符串字符相比,不相等则返回false

    while (--pc >= 0) {

        if (ta[to++] != pa[po++]) {

            return false;

        }

    }

    return true;

}

十三、join方法(jdk1.8才开始有此方法):

public static String join(CharSequence delimiter, CharSequence... elements) {

//判断delimiter,若为null则抛出空指针异常

    Objects.requireNonNull(delimiter);

    Objects.requireNonNull(elements);

    // Number of elements not likely worth Arrays.stream overhead.

    StringJoiner joiner = new StringJoiner(delimiter);

//StringJoiner内部通过一个prefix, StringBuilder,suffix组成,add的时候实际是append(prefix).append(element), toString()的时候添加suffix

    for (CharSequence cs: elements) {

        joiner.add(cs);

    }

    return joiner.toString();

}

你可能感兴趣的:(String源码阅读笔记)