String源码阅读笔记

一、String内部的实际的数据存储结构是char数组，源码（1.8）

public final class String implements java.io.Serializable, Comparable, CharSequence{

//用于存储字符串值

/** The value is used for character storage. */

private final charvalue[];

//缓存字符串的哈希值

/** Cache the hash code for the string */

private inthash; // Default to 0

}

二、String 五类构造函数：

public String(byte[] bytes) {

this(bytes, 0, bytes.length);

}

public String(char[] value) {

this.value = Arrays.copyOf(value, value, length);

}

public String(String original) {

this.value = original.value;

this.hash = original.hash;

}

public String(StringBuffer buffer) {

synchronized (buffer) {

this.value = Arrays.copyOf(buffer.getValue(), buffer.length());

}

public String StringBuilder builder) {

this.value = Arrays.copyOf(builder.getValue(), builder.length());

}

三、equals方法：

public boolean equals(Object anObject) {

//对象与自身引用相同直接返回true

if (this == anObject) {

return true;

}

//对象如果不为String类型,直接返回false

if (anObject instanceof String) {

String anotherString = (String)anObject;

int n = value.length;

//如果对象的长度与本身的长度不一样,直接返回false

if (n == anotherString.value.length) {

char v1[] = value;

char v2[] = anotherString.value;

int i = 0;

//循环每个字符进行对比,完全相同则true,否则为false

while (n-- != 0) {

if (v1[i] != v2[i])

return false;

i++;

}

return true;

}

return false;

}

四、compareTo方法：

public int compareTo(String anotherString) {

int len1 = value.length;

int len2 = anotherString.value.length;

int lim = Math.min(len1, len2);

char v1[] = value;

char v2[] = anotherString.value;

int k = 0;

//从0开始，到长度最短的长度，逐字符比较

while (k < lim) {

char c1 = v1[k];

char c2 = v2[k];

//当两字符不相等时，用自身当前对比位置的字符ascii码-被比较字符串当前位置字符的ascii码

if (c1 != c2) {

return c1 - c2;

}

k++;

}

//如果短的完全包含在长的字符串中，则直接用本身的长度-被比较字符串的长度

return len1 - len2;

}

五、indexOf方法：

public int indexOf(String str) {

return indexOf(str, 0);

}

//重载的方法

public int indexOf(String str, int fromIndex) {

return indexOf(value, 0, value.length,str.value, 0, str.value.length, fromIndex);

}

//内部实际方法，仅供包内访问

staticint indexOf(char[] source, int sourceOffset, int sourceCount,char[] target, int targetOffset, int targetCount, int fromIndex) {

//如果开始下标大于等于源字符串总长度时，查找字符长度为0时返回源字符串长度，否则返回-1

if (fromIndex >= sourceCount) {

return (targetCount == 0 ? sourceCount : -1);

}

//开始下标小于0，则强制从0开始

if (fromIndex < 0) {

fromIndex = 0;

}

//查找字符串长度为0，则返回开始下标

if (targetCount == 0) {

return fromIndex;

}

//通过查找目标偏移位置找出第一个字符

char first = target[targetOffset];

//最大下标 = 源偏移位置 + （源长度 - 目标长度）

int max = sourceOffset + (sourceCount - targetCount);

//从起始下标开始到最大下标逐个字符查找

for (int i = sourceOffset + fromIndex; i <= max; i++) {

//起始下标不是第一个要找的字符，则先找到第一个字符

/* Look for first character. */

if (source[i] != first) {

while (++i <= max && source[i] != first);

}

//找到第一个字符后，开始找剩下的字符

/* Found first character, now look at the rest of v2 */

if (i <= max) {

int j = i + 1;

int end = j + targetCount - 1;

for (int k = targetOffset + 1; j < end && source[j] == target[k]; j++, k++);

if (j == end) {

/* Found whole string. */

return i - sourceOffset;

}

return -1;

}

六、contains方法：

public boolean contains(CharSequence s) {

//调用indexOf方法，如果>-1表示包含，否则不包含

return indexOf(s.toString()) > -1;

}

七、toLowerCase方法：

public String toLowerCase(Locale locale) {

//默认为 zh_CN

if (locale == null) {

throw new NullPointerException();

}

int firstUpper;

final int len = value.length;

//从第一个字符开始判断是否本身就为小写字符，把开头的小写字符串直接拷到目标result字符数组中

/* Now check if there are any characters that need to be changed. */

scan: {

for (firstUpper = 0 ; firstUpper < len; ) {

char c = value[firstUpper];

//判断字符是否在高代理区间, 常规字母不在高代理区间

if ((c >= Character.MIN_HIGH_SURROGATE)

&& (c <= Character.MAX_HIGH_SURROGATE)) {

int supplChar = codePointAt(firstUpper);

if (supplChar != Character.toLowerCase(supplChar)) {

break scan;

}

firstUpper += Character.charCount(supplChar);

} else {

if (c != Character.toLowerCase(c)) {

break scan;

}

firstUpper++;

}

return this;

}

char[] result = new char[len];

int resultOffset = 0; /* result may grow, so i+resultOffset

* is the write location in result */

/* Just copy the first few lowerCase characters. */

System.arraycopy(value, 0, result, 0, firstUpper);

String lang = locale.getLanguage();

boolean localeDependent =

(lang == "tr" || lang == "az" || lang == "lt");

char[] lowerCharArray;

int lowerChar;

int srcChar;

int srcCount;

for (int i = firstUpper; i < len; i += srcCount) {

srcChar = (int)value[i];

if ((char)srcChar >= Character.MIN_HIGH_SURROGATE

&& (char)srcChar <= Character.MAX_HIGH_SURROGATE) {

srcChar = codePointAt(i);

srcCount = Character.charCount(srcChar);

} else {

srcCount = 1;

}

if (localeDependent ||

srcChar == '\u03A3' || // GREEK CAPITAL LETTER SIGMA

srcChar == '\u0130') { // LATIN CAPITAL LETTER I WITH DOT ABOVE

lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);

} else {

//调用Character的toLowerCase方法转换为小写字符

lowerChar = Character.toLowerCase(srcChar);

}

if ((lowerChar == Character.ERROR)

|| (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {

if (lowerChar == Character.ERROR) {

lowerCharArray =

ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);

} else if (srcCount == 2) {

resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;

continue;

} else {

lowerCharArray = Character.toChars(lowerChar);

}

/* Grow result if needed */

int mapLen = lowerCharArray.length;

if (mapLen > srcCount) {

char[] result2 = new char[result.length + mapLen - srcCount];

System.arraycopy(result, 0, result2, 0, i + resultOffset);

result = result2;

}

for (int x = 0; x < mapLen; ++x) {

result[i + resultOffset + x] = lowerCharArray[x];

}

resultOffset += (mapLen - srcCount);

} else {

result[i + resultOffset] = (char)lowerChar;

}

return new String(result, 0, len + resultOffset);

}

八、length()方法：

public int length() {

//返回字符数组的长度，也就是多少个字符

return value.length;

}

九、trim()方法：

public String trim() {

int len = value.length;

int st = 0;

char[] val = value; /* avoid getfield opcode */

//从0开始判断字符ascii是否小于等于空格字符

while ((st < len) && (val[st] <= ' ')) {

st++;

}

//从最后一个字符开始判断字符ascii是否小于等于空格字符

while ((st < len) && (val[len - 1] <= ' ')) {

len--;

}

//如果前或后存在空白字符，则substring截取中间非空白字符串

return ((st > 0) || (len < value.length)) ? substring(st, len) : this;

}

十、replace()方法：

public String replace(char oldChar, char newChar) {

//若旧字符与新字符相同，直接返回原始字符串

if (oldChar != newChar) {

int len = value.length;

int i = -1;

char[] val = value; /* avoid getfield opcode */

//找到第一个需要替换的字符，因为不用变的字符可以直接拷贝到暂存字符数组

while (++i < len) {

if (val[i] == oldChar) {

break;

}

if (i < len) {

char buf[] = new char[len];

for (int j = 0; j < i; j++) {

buf[j] = val[j];

}

//从第1个需要替换字符开始到最后个字符，通过判断替换赋值给暂存字符数组

while (i < len) {

char c = val[i];

buf[i] = (c == oldChar) ? newChar : c;

i++;

}

//返回一个新的字符串

return new String(buf, true);

}

return this;

}

//当仅替换单个字符，一定要使用上面的方法，否则性能相差10位，2毫秒与0.2毫秒的区别

public String replace(CharSequence target, CharSequence replacement) {

//通过正则表达式替换

returnPattern.compile(target.toString(), Pattern.LITERAL).matcher(

this).replaceAll(Matcher.quoteReplacement(replacement.toString()));

}

//字符串替换，replaceAll 比 replace高

public String replaceAll(String regex, String replacement) {

returnPattern.compile(regex).matcher(this).replaceAll(replacement);

}

十一、split方法：

public String[] split(String regex) {

return split(regex, 0);

}

public String[] split(String regex, int limit) {

/* fastpath if the regex is a

(1)one-char String and this character is not one of the

RegEx's meta characters ".$|()[{^?*+\\", or

(2)two-char String and the first char is the backslash and

the second is not the ascii digit or ascii letter.

char ch = 0;

if (((regex.value.length == 1 &&

".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||

(regex.length() == 2 &&

regex.charAt(0) == '\\' &&

(((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&

((ch-'a')|('z'-ch)) < 0 &&

((ch-'A')|('Z'-ch)) < 0)) &&

(ch < Character.MIN_HIGH_SURROGATE ||

ch > Character.MAX_LOW_SURROGATE))

{

int off = 0;

int next = 0;

boolean limited = limit > 0;

//通过ArrayList暂存分隔后的字符串

ArrayList list = new ArrayList<>();

while ((next = indexOf(ch, off)) != -1) {

if (!limited || list.size() < limit - 1) {

list.add(substring(off, next));

off = next + 1;

} else { // last one

//assert (list.size() == limit - 1);

list.add(substring(off, value.length));

off = value.length;

break;

}

//如果没有找到分隔符，则返回原始字符串

// If no match was found, return this

if (off == 0)

return new String[]{this};

// Add remaining segment

if (!limited || list.size() < limit)

list.add(substring(off, value.length));

// Construct result

int resultSize = list.size();

//如果limit参数为0，当分隔后最后一个字符串长度为0，则忽略掉

if (limit == 0) {

while (resultSize > 0 && list.get(resultSize - 1).length() == 0) {

resultSize--;

}

String[] result = new String[resultSize];

return list.subList(0, resultSize).toArray(result);

}

//当分隔表达式不是一个字符或正则表达式开头，则直接使用Pattern的split方法进行分隔

returnPattern.compile(regex).split(this, limit);

}

十二、startWith方法：

public boolean startsWith(String prefix, int toffset) {

char ta[] = value;

int to = toffset;

char pa[] = prefix.value;

int po = 0;

int pc = prefix.value.length;

// Note: toffset might be near -1>>>1.

if ((toffset < 0) || (toffset > value.length - pc)) {

return false;

}

//循环判断前缀字符个数次，跳过toffset个字符之后，挨个prefix字符与字符串字符相比，不相等则返回false

while (--pc >= 0) {

if (ta[to++] != pa[po++]) {

return false;

}

return true;

}

十三、join方法（jdk1.8才开始有此方法）：

public static String join(CharSequence delimiter, CharSequence... elements) {

//判断delimiter，若为null则抛出空指针异常

Objects.requireNonNull(delimiter);

Objects.requireNonNull(elements);

// Number of elements not likely worth Arrays.stream overhead.

StringJoiner joiner = new StringJoiner(delimiter);

//StringJoiner内部通过一个prefix, StringBuilder,suffix组成，add的时候实际是append(prefix).append(element), toString()的时候添加suffix

for (CharSequence cs: elements) {

joiner.add(cs);

}

return joiner.toString();

}

String源码阅读笔记

你可能感兴趣的:(String源码阅读笔记)