目录
简介
字段
创建string
构造器
参数为string,char
参数为代码点
参数为byte
参数为stringbuilder和stringbuffer
ValueOf
intern
String的创建后的等于
CharSequence的方法
length和isEmpty
charAt
subSequence和subString
比较方法
compareTo
无视大小写的比较
基本方法
toString
equals
hashcode
得到代码点,代码点数量,代码点偏移量
得到bytes数组
string是java中的字符串类
/**
*
* String类代表字符串。java程序中的所有字符串常量,例如"abc",是这个类的实例。
*
*
字符串是常量,它们的值不能在被创造后改变。字符串缓冲区支持可变的字符串。
* 字符串对象是不可变的,因为它们能被共享。例如:
*
*
* String str = "abc";
*
* 等价于
*
* char data[] = {'a', 'b', 'c'};
* String str = new String(data);
*
* 下面有更多的字符串如何被使用的例子:
*
* System.out.println("abc");
* String cde = "cde";
* System.out.println("abc" + cde);
* String c = "abc".substring(2,3);
* String d = cde.substring(1, 2);
*
*
* String类包含了各种方法,包括检查序列中单独的字符,比较字符串,
* 查询字符串,抽取子序列,创造一个所有字符变为大小或小写的字符串副本。
* 大小写映射基于Character类指定的Unicode的标准版本。
*
*
java语言为字符串连接符号(+)和其他对象转为字符串提供了特殊的支持。
* 字符串连接由StringBuilder或者StringBuffer类和它们的方法来实现。
* 字符串转换由方法toString来实现,这个方法被Ojbect定义,并且被java中所有的类继承,可以看java语言规范。
*
*
除非另有说明,传入这个类的构造器或方法一个null参数会导致抛出NullPointerException
*
*
字符串代表UTF-16格式的字符串,其中补充字符有代理对标识(看Character类的Unicode Character Representation)。
* 索引值对应char的代码单元,所以一个补充字符在String中占用两个位置(两个代码单元)。
* 而正常字符和补充字符都对应一个代码点,但可能有1-2个代码单元。
* 就是说每个字符可能对应1-2个char,有的字符可能在char数组中占据两个位置。
*
*
String类提供了处理Unicode代码点和处理Unicode代码单元的方法。
*
* @author Lee Boynton
* @author Arthur van Hoff
* @author Martin Buchholz
* @author Ulf Zibis
* @see java.lang.Object#toString()
* @see java.lang.StringBuffer
* @see java.lang.StringBuilder
* @see java.nio.charset.Charset
* @since JDK1.0
*/
public final class String
implements java.io.Serializable, Comparable, CharSequence
/** 用来保存字符的值 ,注意:这是一个char序列,而且是final的,不可更改*/
private final char value[];
/** 缓存字符串的hash码*/
private int hash; // 默认为0
/** use serialVersionUID from JDK 1.0.2 for interoperability */
private static final long serialVersionUID = -6849794470754667710L;
/**
* 类字符串在序列化流协议中使用特殊的大小写。
* 一个字符串实例根据类的序列化规范来写入ObjectOutputStream。
*/
private static final ObjectStreamField[] serialPersistentFields =
new ObjectStreamField[0];
/**
* 初始化一个新建的String对象,从而它代表一个空的字符串序列。
* 注意这个构造器是不需要的,因为字符串是不可变的。
*/
public String() {
this.value = "".value;
}
/**
* 初始化一个新建的String对象,从而它代表与参数相同的字符串序列。
* 换言而之,新建的字符串是参数字符串的拷贝。
* 除非需要一个original的独一无二的拷贝,不需要使用这个构造器,因为字符串是不可更改的
*
* @param original
* A {@code String}
*/
public String(String original) {
this.value = original.value;
this.hash = original.hash;
}
/**
* 分配一个新字符串,从而它代表的字符串序列包含了字符序列参数。
* 字符序列的内容被拷贝了。
* 之后对参数字符序列的修改不影响新建的字符串。
*
* @param value
* The initial value of the string
*/
public String(char value[]) {
//value值为一个复制的新数组。
this.value = Arrays.copyOf(value, value.length);
}
/**
* 分配一个新的String,它包含了参数字符数组的一个子数组。
* offset参数是子数组的第一个字符的位置,count参数指定了子数组的长度。
* 子数组的内容被复制。之后对字符数组的修改不影响新建的字符串。
*
* @param value
* Array that is the source of characters
*
* @param offset
* The initial offset
*
* @param count
* The length
*
* @throws IndexOutOfBoundsException
* If the {@code offset} and {@code count} arguments index
* characters outside the bounds of the {@code value} array
*/
public String(char value[], int offset, int count) {
if (offset < 0) {
//排除offset<0
throw new StringIndexOutOfBoundsException(offset);
}
if (count <= 0) {
if (count < 0) {
//如果count<0,报错
throw new StringIndexOutOfBoundsException(count);
}
if (offset <= value.length) {
//如果count==0而且offset<=value.length
//建一个空字符串
this.value = "".value;
return;
}
}
// Note: offset or count might be near -1>>>1.
if (offset > value.length - count) {
//如果offset+count>value.length,报错
throw new StringIndexOutOfBoundsException(offset + count);
}
//将value数组的一部分复制到一个新的char数组
this.value = Arrays.copyOfRange(value, offset, offset+count);
}
/**
* 分配一个新的String,包含从一个Unicode代码点数组参数的子数组的字符。
* offset参数是子数组的第一个代码点的位置,count参数指定了子数组的长度。
* 子数组的内容被转换为char,之后对int数组的修改不影响新建的string
*
* @param codePoints Unicode代码点来源数组
*
* @param offset
* The initial offset
*
* @param count
* The length
*
* @throws IllegalArgumentException
* If any invalid Unicode code point is found in {@code
* codePoints}
*
* @throws IndexOutOfBoundsException
* If the {@code offset} and {@code count} arguments index
* characters outside the bounds of the {@code codePoints} array
*
* @since 1.5
*/
public String(int[] codePoints, int offset, int count) {
if (offset < 0) {
throw new StringIndexOutOfBoundsException(offset);
}
if (count <= 0) {
if (count < 0) {
throw new StringIndexOutOfBoundsException(count);
}
if (offset <= codePoints.length) {
//如果count<=0而且offset处于正常返回,返回空字符串
this.value = "".value;
return;
}
}
// 注意: offset 或 count 可能接近Integer.max
if (offset > codePoints.length - count) {
//如果offset+count>length
throw new StringIndexOutOfBoundsException(offset + count);
}
final int end = offset + count;
// Pass 1: 计算char数组的准确大小
int n = count; //初始大小为count
for (int i = offset; i < end; i++) {
int c = codePoints[i];
if (Character.isBmpCodePoint(c))
//如果代码点是BMP代码点,就为一个代码单元,大小不变
continue;
else if (Character.isValidCodePoint(c))
//如果不是BMP,但是是合法的代码点,就为两个代码单元,大小+1
n++;
//都不是的话,说明代码点是非法的
else throw new IllegalArgumentException(Integer.toString(c));
}
// Pass 2: 分配并填充char[]
final char[] v = new char[n];
for (int i = offset, j = 0; i < end; i++, j++) {
int c = codePoints[i];
if (Character.isBmpCodePoint(c))
//如果c是BMP代码点,直接填充
v[j] = (char)c;
else
//否则j为c的高代理代码单元,j++为低代理代码单元
Character.toSurrogates(c, v, j++);
}
//最后value为填充完的char数组
this.value = v;
}
/**
* Allocates a new {@code String} constructed from a subarray of an array
* of 8-bit integer values.
*
* The {@code offset} argument is the index of the first byte of the
* subarray, and the {@code count} argument specifies the length of the
* subarray.
*
*
Each {@code byte} in the subarray is converted to a {@code char} as
* specified in the method above.
*
* @deprecated This method does not properly convert bytes into characters.
* As of JDK 1.1, the preferred way to do this is via the
* {@code String} constructors that take a {@link
* java.nio.charset.Charset}, charset name, or that use the platform's
* default charset.
*
* @param ascii
* The bytes to be converted to characters
*
* @param hibyte
* The top 8 bits of each 16-bit Unicode code unit
*
* @param offset
* The initial offset
* @param count
* The length
*
* @throws IndexOutOfBoundsException
* If the {@code offset} or {@code count} argument is invalid
*
* @see #String(byte[], int)
* @see #String(byte[], int, int, java.lang.String)
* @see #String(byte[], int, int, java.nio.charset.Charset)
* @see #String(byte[], int, int)
* @see #String(byte[], java.lang.String)
* @see #String(byte[], java.nio.charset.Charset)
* @see #String(byte[])
*/
@Deprecated
public String(byte ascii[], int hibyte, int offset, int count) {
checkBounds(ascii, offset, count);
char value[] = new char[count];
if (hibyte == 0) {
for (int i = count; i-- > 0;) {
value[i] = (char)(ascii[i + offset] & 0xff);
}
} else {
hibyte <<= 8;
for (int i = count; i-- > 0;) {
value[i] = (char)(hibyte | (ascii[i + offset] & 0xff));
}
}
this.value = value;
}
/**
* Allocates a new {@code String} containing characters constructed from
* an array of 8-bit integer values. Each character cin the
* resulting string is constructed from the corresponding component
* b in the byte array such that:
*
*
* c == (char)(((hibyte & 0xff) << 8)
* | (b & 0xff))
*
*
* @deprecated This method does not properly convert bytes into
* characters. As of JDK 1.1, the preferred way to do this is via the
* {@code String} constructors that take a {@link
* java.nio.charset.Charset}, charset name, or that use the platform's
* default charset.
*
* @param ascii
* The bytes to be converted to characters
*
* @param hibyte
* The top 8 bits of each 16-bit Unicode code unit
*
* @see #String(byte[], int, int, java.lang.String)
* @see #String(byte[], int, int, java.nio.charset.Charset)
* @see #String(byte[], int, int)
* @see #String(byte[], java.lang.String)
* @see #String(byte[], java.nio.charset.Charset)
* @see #String(byte[])
*/
@Deprecated
public String(byte ascii[], int hibyte) {
this(ascii, hibyte, 0, ascii.length);
}
/**通过方法,检查byte数组是否能满足offset和length,
* @param bytes
* @param offset
* @param length
*/
private static void checkBounds(byte[] bytes, int offset, int length) {
if (length < 0)
throw new StringIndexOutOfBoundsException(length);
if (offset < 0)
throw new StringIndexOutOfBoundsException(offset);
if (offset > bytes.length - length)
//bytes.length 要 >=offset+length
throw new StringIndexOutOfBoundsException(offset + length);
}
/**
* Constructs a new {@code String} by decoding the specified subarray of
* bytes using the specified charset. The length of the new {@code String}
* is a function of the charset, and hence may not be equal to the length
* of the subarray.
*
* The behavior of this constructor when the given bytes are not valid
* in the given charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param offset
* The index of the first byte to decode
*
* @param length
* The number of bytes to decode
* @param charsetName
* The name of a supported {@linkplain java.nio.charset.Charset
* charset}
*
* @throws UnsupportedEncodingException
* If the named charset is not supported
*
* @throws IndexOutOfBoundsException
* If the {@code offset} and {@code length} arguments index
* characters outside the bounds of the {@code bytes} array
*
* @since JDK1.1
*/
public String(byte bytes[], int offset, int length, String charsetName)
throws UnsupportedEncodingException {
if (charsetName == null)
throw new NullPointerException("charsetName");
checkBounds(bytes, offset, length);
this.value = StringCoding.decode(charsetName, bytes, offset, length);
}
/**
* Constructs a new {@code String} by decoding the specified subarray of
* bytes using the specified {@linkplain java.nio.charset.Charset charset}.
* The length of the new {@code String} is a function of the charset, and
* hence may not be equal to the length of the subarray.
*
*
This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement string. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param offset
* The index of the first byte to decode
*
* @param length
* The number of bytes to decode
*
* @param charset
* The {@linkplain java.nio.charset.Charset charset} to be used to
* decode the {@code bytes}
*
* @throws IndexOutOfBoundsException
* If the {@code offset} and {@code length} arguments index
* characters outside the bounds of the {@code bytes} array
*
* @since 1.6
*/
public String(byte bytes[], int offset, int length, Charset charset) {
if (charset == null)
throw new NullPointerException("charset");
checkBounds(bytes, offset, length);
this.value = StringCoding.decode(charset, bytes, offset, length);
}
/**
* Constructs a new {@code String} by decoding the specified array of bytes
* using the specified {@linkplain java.nio.charset.Charset charset}. The
* length of the new {@code String} is a function of the charset, and hence
* may not be equal to the length of the byte array.
*
*
The behavior of this constructor when the given bytes are not valid
* in the given charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param charsetName
* The name of a supported {@linkplain java.nio.charset.Charset
* charset}
*
* @throws UnsupportedEncodingException
* If the named charset is not supported
*
* @since JDK1.1
*/
public String(byte bytes[], String charsetName)
throws UnsupportedEncodingException {
//用指定的charsetName解码
this(bytes, 0, bytes.length, charsetName);
}
/**
* Constructs a new {@code String} by decoding the specified array of
* bytes using the specified {@linkplain java.nio.charset.Charset charset}.
* The length of the new {@code String} is a function of the charset, and
* hence may not be equal to the length of the byte array.
*
*
This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement string. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param charset
* The {@linkplain java.nio.charset.Charset charset} to be used to
* decode the {@code bytes}
*
* @since 1.6
*/
public String(byte bytes[], Charset charset) {
this(bytes, 0, bytes.length, charset);
}
/**
* Constructs a new {@code String} by decoding the specified subarray of
* bytes using the platform's default charset. The length of the new
* {@code String} is a function of the charset, and hence may not be equal
* to the length of the subarray.
*
*
The behavior of this constructor when the given bytes are not valid
* in the default charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param offset
* The index of the first byte to decode
*
* @param length
* The number of bytes to decode
*
* @throws IndexOutOfBoundsException
* If the {@code offset} and the {@code length} arguments index
* characters outside the bounds of the {@code bytes} array
*
* @since JDK1.1
*/
public String(byte bytes[], int offset, int length) {
//检查长度
checkBounds(bytes, offset, length);
//先用UTF-8,报错再用ISO-8859-1解码,从byte转为char
this.value = StringCoding.decode(bytes, offset, length);
}
/**
* Constructs a new {@code String} by decoding the specified array of bytes
* using the platform's default charset. The length of the new {@code
* String} is a function of the charset, and hence may not be equal to the
* length of the byte array.
*
*
The behavior of this constructor when the given bytes are not valid
* in the default charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @since JDK1.1
*/
public String(byte bytes[]) {
this(bytes, 0, bytes.length);
}
/**
* Allocates a new string that contains the sequence of characters
* currently contained in the string buffer argument. The contents of the
* string buffer are copied; subsequent modification of the string buffer
* does not affect the newly created string.
*
* @param buffer
* A {@code StringBuffer}
*/
public String(StringBuffer buffer) {
synchronized(buffer) {
this.value = Arrays.copyOf(buffer.getValue(), buffer.length());
}
}
/**
* Allocates a new string that contains the sequence of characters
* currently contained in the string builder argument. The contents of the
* string builder are copied; subsequent modification of the string builder
* does not affect the newly created string.
*
* This constructor is provided to ease migration to {@code
* StringBuilder}. Obtaining a string from a string builder via the {@code
* toString} method is likely to run faster and is generally preferred.
*
* @param builder
* A {@code StringBuilder}
*
* @since 1.5
*/
public String(StringBuilder builder) {
this.value = Arrays.copyOf(builder.getValue(), builder.length());
}
这里基本都是根据参数的toString方法,最后都是new一个string
/**
* Returns the string representation of the {@code Object} argument.
*
* @param obj an {@code Object}.
* @return if the argument is {@code null}, then a string equal to
* {@code "null"}; otherwise, the value of
* {@code obj.toString()} is returned.
* @see java.lang.Object#toString()
*/
public static String valueOf(Object obj) {
return (obj == null) ? "null" : obj.toString();
}
/**
* Returns the string representation of the {@code char} array
* argument. The contents of the character array are copied; subsequent
* modification of the character array does not affect the returned
* string.
*
* @param data the character array.
* @return a {@code String} that contains the characters of the
* character array.
*/
public static String valueOf(char data[]) {
return new String(data);
}
/**
* Returns the string representation of a specific subarray of the
* {@code char} array argument.
*
* The {@code offset} argument is the index of the first
* character of the subarray. The {@code count} argument
* specifies the length of the subarray. The contents of the subarray
* are copied; subsequent modification of the character array does not
* affect the returned string.
*
* @param data the character array.
* @param offset initial offset of the subarray.
* @param count length of the subarray.
* @return a {@code String} that contains the characters of the
* specified subarray of the character array.
* @exception IndexOutOfBoundsException if {@code offset} is
* negative, or {@code count} is negative, or
* {@code offset+count} is larger than
* {@code data.length}.
*/
public static String valueOf(char data[], int offset, int count) {
return new String(data, offset, count);
}
/**
* Equivalent to {@link #valueOf(char[], int, int)}.
*
* @param data the character array.
* @param offset initial offset of the subarray.
* @param count length of the subarray.
* @return a {@code String} that contains the characters of the
* specified subarray of the character array.
* @exception IndexOutOfBoundsException if {@code offset} is
* negative, or {@code count} is negative, or
* {@code offset+count} is larger than
* {@code data.length}.
*/
public static String copyValueOf(char data[], int offset, int count) {
return new String(data, offset, count);
}
/**
* Equivalent to {@link #valueOf(char[])}.
*
* @param data the character array.
* @return a {@code String} that contains the characters of the
* character array.
*/
public static String copyValueOf(char data[]) {
return new String(data);
}
/**
* Returns the string representation of the {@code boolean} argument.
*
* @param b a {@code boolean}.
* @return if the argument is {@code true}, a string equal to
* {@code "true"} is returned; otherwise, a string equal to
* {@code "false"} is returned.
*/
public static String valueOf(boolean b) {
return b ? "true" : "false";
}
/**
* Returns the string representation of the {@code char}
* argument.
*
* @param c a {@code char}.
* @return a string of length {@code 1} containing
* as its single character the argument {@code c}.
*/
public static String valueOf(char c) {
char data[] = {c};
return new String(data, true);
}
/**
* Returns the string representation of the {@code int} argument.
*
* The representation is exactly the one returned by the
* {@code Integer.toString} method of one argument.
*
* @param i an {@code int}.
* @return a string representation of the {@code int} argument.
* @see java.lang.Integer#toString(int, int)
*/
public static String valueOf(int i) {
return Integer.toString(i);
}
/**
* Returns the string representation of the {@code long} argument.
*
* The representation is exactly the one returned by the
* {@code Long.toString} method of one argument.
*
* @param l a {@code long}.
* @return a string representation of the {@code long} argument.
* @see java.lang.Long#toString(long)
*/
public static String valueOf(long l) {
return Long.toString(l);
}
/**
* Returns the string representation of the {@code float} argument.
*
* The representation is exactly the one returned by the
* {@code Float.toString} method of one argument.
*
* @param f a {@code float}.
* @return a string representation of the {@code float} argument.
* @see java.lang.Float#toString(float)
*/
public static String valueOf(float f) {
return Float.toString(f);
}
/**
* Returns the string representation of the {@code double} argument.
*
* The representation is exactly the one returned by the
* {@code Double.toString} method of one argument.
*
* @param d a {@code double}.
* @return a string representation of the {@code double} argument.
* @see java.lang.Double#toString(double)
*/
public static String valueOf(double d) {
return Double.toString(d);
}
/**
* 返回一个对于这个string对象标准的代表。
*
* 一个字符串的池,初始为空,有string类私人维护。
*
* 当调用intern方法时,如果池子中已经已经含有一个与这个字符串相同的string
* (根据equals方法),那么返回池子里的字符串。
* 否则,这个字符串对象被加入到池子,返回这个字符串对象的引用。
*
* 对于任意两个字符串s和t,当且仅当s.equals(t)返回true,s.intern() == t.intern()为true
*
* 所有的字面量字符串和值为string的常量都是被interned的。
* string字面量在java语言规范的3.10.5被定义
*
注意:只有设置字面量或者对字符串intern后,才会放入池子。如果只是new一个字符串,不会放入池子
*
注意:如果str="a"+"bc",编译器会自动合成,视str为字面量,池子里有"abc"。
* 但如果str="a"+new String("bc")或者str=a+b 这种情况,编译器不会自动合成,是str为一个新的string变量,池子里没有"abc"。
*
*
* @return a string that has the same contents as this string, but is
* guaranteed to be from a pool of unique strings.
*/
public native String intern();
package test.t05new;
public class Test1 {
public static void main(String[] args){
String aString="123";
String bString="123";
System.out.println(aString==bString);
System.out.println(aString.equals(bString));
System.out.println("-------------------");
aString="123";
bString=new String("123");
System.out.println(aString==bString);
System.out.println(aString.equals(bString));
System.out.println("-------------------");
aString=new String("123");
bString=new String("123");
System.out.println(aString==bString);
System.out.println(aString.equals(bString));
System.out.println("-------------------");
aString=String.valueOf(123);
bString="123";
System.out.println(aString==bString);
System.out.println(aString.equals(bString));
System.out.println("-------------------");
aString="123".intern();
bString="123";
System.out.println(aString==bString);
System.out.println(aString.equals(bString));
System.out.println("-------------------");
aString=new String("123").intern();
bString="123";
System.out.println(aString==bString);
System.out.println(aString.equals(bString));
}
}
true
true
-------------------
false
true
-------------------
false
true
-------------------
false
true
-------------------
true
true
-------------------
true
true
可以看到new String和valueOf都是新建的String对象,"xxx"和String.intern的结果都是从string缓存区中得到的。
/**
* 返回字符串的长度,长度与字符串中Unicode的代码单元的长度相同。
*
* @return the length of the sequence of characters represented by this
* object.
*/
public int length() {
return value.length;
}
/**
* 当且仅当,length()返回0时,返回true
*
* @return {@code true} if {@link #length()} is {@code 0}, otherwise
* {@code false}
*
* @since 1.6
*/
public boolean isEmpty() {
return value.length == 0;
}
/**
* 返回指定位置的char值。位置的范围从0到length()-1.
* 第一个char值的索引为0,下一个为1,以此类推。
* 如果指定的char值是一个代理(高代理或低代理),返回这个代理值。
*
* @param index the index of the {@code char} value.
* @return the {@code char} value at the specified index of this string.
* The first {@code char} value is at index {@code 0}.
* @exception IndexOutOfBoundsException if the {@code index}
* argument is negative or not less than the length of this
* string.
*/
public char charAt(int index) {
if ((index < 0) || (index >= value.length)) {
throw new StringIndexOutOfBoundsException(index);
}
return value[index];
}
/**
* 返回一个是这个字符串的子字符串的字符串。
* 子字符串,以指定的beginIndex(包含)开始,到字符串的末尾(包含)结束。
* 因此子字符串的length为length()-beginIndex
*
* Examples:
*
* "unhappy".substring(2) returns "happy"
* "Harbison".substring(3) returns "bison"
* "emptiness".substring(9) returns "" (an empty string)
*
*
* @param beginIndex the beginning index, inclusive.
* @return the specified substring.
* @exception IndexOutOfBoundsException if
* {@code beginIndex} is negative or larger than the
* length of this {@code String} object.
*/
public String substring(int beginIndex) {
if (beginIndex < 0) {
throw new StringIndexOutOfBoundsException(beginIndex);
}
int subLen = value.length - beginIndex;
if (subLen < 0) {
throw new StringIndexOutOfBoundsException(subLen);
}
//如果begin为0,返回自己
//否则将value中的一部分复制到一个新数组,再建立一个新的string
return (beginIndex == 0) ? this : new String(value, beginIndex, subLen);
}
/**
* 返回一个是这个字符串的子字符串的字符串。
* 子字符串,以指定的beginIndex(包含)开始,到endIndex - 1(包含)结束。
* 因此子字符串的length为endIndex-beginIndex
*
* 例子
*
* "hamburger".substring(4, 8) returns "urge"
* "smiles".substring(1, 5) returns "mile"
*
*
* @param beginIndex the beginning index, inclusive.
* @param endIndex the ending index, exclusive.
* @return the specified substring.
* @exception IndexOutOfBoundsException if the
* {@code beginIndex} is negative, or
* {@code endIndex} is larger than the length of
* this {@code String} object, or
* {@code beginIndex} is larger than
* {@code endIndex}.
*/
public String substring(int beginIndex, int endIndex) {
if (beginIndex < 0) {
//最小为0
throw new StringIndexOutOfBoundsException(beginIndex);
}
if (endIndex > value.length) {
//最大为value.length
throw new StringIndexOutOfBoundsException(endIndex);
}
int subLen = endIndex - beginIndex;
if (subLen < 0) {
//end>=begin
throw new StringIndexOutOfBoundsException(subLen);
}
//如果begin==0 而且end ==value.length,返回自己
//否则将value中的一部分复制到一个新数组,再建立一个新的string
return ((beginIndex == 0) && (endIndex == value.length)) ? this
: new String(value, beginIndex, subLen);
}
/**
* Returns a character sequence that is a subsequence of this sequence.
*
* An invocation of this method of the form
*
*
* str.subSequence(begin, end)
*
* behaves in exactly the same way as the invocation
*
*
* str.substring(begin, end)
*
* @apiNote
* This method is defined so that the {@code String} class can implement
* the {@link CharSequence} interface.
*
* @param beginIndex the begin index, inclusive.
* @param endIndex the end index, exclusive.
* @return the specified subsequence.
*
* @throws IndexOutOfBoundsException
* if {@code beginIndex} or {@code endIndex} is negative,
* if {@code endIndex} is greater than {@code length()},
* or if {@code beginIndex} is greater than {@code endIndex}
*
* @since 1.4
* @spec JSR-51
*/
public CharSequence subSequence(int beginIndex, int endIndex) {
return this.substring(beginIndex, endIndex);
}
/**
* 以字典序,比较两个字符串。
* 比较基于字符串每个字符的Unicode值。
* 如果这个字符串在字典上,在参数字符串之前,返回一个负数。
* 如果在参数之后,返回一个整数。
* 当字符串相同时,返回0。
* 当equals(Object)方法返回true时,才返回0。
*
*
* 下面是字典排序的定义。如果两个字符串不同,那么它们要么在一些位置上有不同的字符,或者它们的长度不同。
* 如果它们在一个或多个位置上的字符不同,让k是这种位置的最小index,然后哪个字符串在位置k上有更小的值,
* 这个根据小于号<决定,在字典上优先于另一个字符串。
* 这种情况下,compareTo返回两个字符串在k上的char的char,即
*
*
* this.charAt(k)-anotherString.charAt(k)
*
*
* 如果它们没有不同的地方,则短的字符串在字典上优先于长的字符串。
* 这种情况下,compareTo返回两个字符串的长度差
*
* this.length()-anotherString.length()
*
*
* @param anotherString the {@code String} to be compared.
* @return the value {@code 0} if the argument string is equal to
* this string; a value less than {@code 0} if this string
* is lexicographically less than the string argument; and a
* value greater than {@code 0} if this string is
* lexicographically greater than the string argument.
*/
public int compareTo(String anotherString) {
int len1 = value.length;
int len2 = anotherString.value.length;
int lim = Math.min(len1, len2);
char v1[] = value;
char v2[] = anotherString.value;
int k = 0;
while (k < lim) {
char c1 = v1[k];
char c2 = v2[k];
if (c1 != c2) {
//第一个不同的地方,返回this.charAt(k)-anotherString.charAt(k)
return c1 - c2;
}
k++;
}
//否则返回this.length()-anotherString.length()
//如果返回0,一定是字符串都相同,而且长度也相同,那么就equals了
return len1 - len2;
}
/**
* 无视大小写的comparator。这个comparator是可序列化的。
* 注意:这个comparator不能考虑地区因素,会导致在特定地区不满意的排序。
* java.text包提供了Collators来允许地区敏感的排序
*
* @see java.text.Collator#compare(String, String)
* @since 1.2
*/
public static final Comparator CASE_INSENSITIVE_ORDER
= new CaseInsensitiveComparator();
private static class CaseInsensitiveComparator
implements Comparator, java.io.Serializable {
// use serialVersionUID from JDK 1.2.2 for interoperability
private static final long serialVersionUID = 8575799808933029326L;
public int compare(String s1, String s2) {
int n1 = s1.length();
int n2 = s2.length();
int min = Math.min(n1, n2);
for (int i = 0; i < min; i++) {
char c1 = s1.charAt(i);
char c2 = s2.charAt(i);
if (c1 != c2) {
c1 = Character.toUpperCase(c1);
c2 = Character.toUpperCase(c2);
if (c1 != c2) {
//先变为大写,如果不相同,变为小写
c1 = Character.toLowerCase(c1);
c2 = Character.toLowerCase(c2);
if (c1 != c2) {
// No overflow because of numeric promotion
//如果小写也不同,则在小写形态下,c1 - c2
return c1 - c2;
}
}
}
}
//都相同,返回长度差
return n1 - n2;
}
/** Replaces the de-serialized object. */
private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
}
/**
* 比较两个字符串,以字典序,无视大小写差异。
* 其中通过Character.toLowerCase(Character.toUpperCase(character))来消除大小写差异。
*
*
* 注意:这个comparator不能考虑地区因素,会导致在特定地区不满意的排序。
* java.text包提供了Collators来允许地区敏感的排序
*
* @param str the {@code String} to be compared.
* @return a negative integer, zero, or a positive integer as the
* specified String is greater than, equal to, or less
* than this String, ignoring case considerations.
* @see java.text.Collator#compare(String, String)
* @since 1.2
*/
public int compareToIgnoreCase(String str) {
return CASE_INSENSITIVE_ORDER.compare(this, str);
}
/**
* 返回自己
*
* @return the string itself.
*/
public String toString() {
return this;
}
/**
* 与指定的对象比较。当且仅当参数不为null而且是一个string,而且代表着与这个对象相同的字符序列,才返回true
*
* @param anObject
* The object to compare this {@code String} against
*
* @return {@code true} if the given object represents a {@code String}
* equivalent to this string, {@code false} otherwise
*
* @see #compareTo(String)
* @see #equalsIgnoreCase(String)
*/
public boolean equals(Object anObject) {
if (this == anObject) {
//先比较引用
return true;
}
if (anObject instanceof String) {
//是string类型
String anotherString = (String)anObject;
int n = value.length;
if (n == anotherString.value.length) {
//两者的length也相同
char v1[] = value;
char v2[] = anotherString.value;
int i = 0;
while (n-- != 0) {
if (v1[i] != v2[i])
//如果有一个char不同就返回false
return false;
i++;
}
//都相同了,返回true
return true;
}
}
return false;
}
/**
* 返回字符串的hashcode。字符串的hashcode以下面方式计算
*
* s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
*
* s[i]是字符串第i个字符,n是字符串的长度,^代表取幂。
* (空字符串的hashcode为0)
*
* @return a hash code value for this object.
*/
public int hashCode() {
int h = hash;
if (h == 0 && value.length > 0) {
//如果没有被初始化,而且长度>0
char val[] = value;
for (int i = 0; i < value.length; i++) {
//每次前面的hash*31+自己char
h = 31 * h + val[i];
}
//给hash赋值,hash不再为0
hash = h;
}
return h;
}
/**
* 返回指定index的代码点(character)。
* index从0到length()-1。
*
*
如果指定index的char值在高代理范围内,
* 而且后面的index= value.length)) {
//0<=index返回指定index之前的代码点。index范围为1到length()
*
* 如果在index-1位置的char值在低代理范围,index-2是非负的,而且index-2的char值在高代理范围,
* 那么返回代理对对应的补充代码点。
* 如果index-1位置的char值不成对的低代理或高代理,返回代理值。
*
* @param index the index following the code point that should be returned
* @return the Unicode code point value before the given index.
* @exception IndexOutOfBoundsException if the {@code index}
* argument is less than 1 or greater than the length
* of this string.
* @since 1.5
*/
public int codePointBefore(int index) {
int i = index - 1;
if ((i < 0) || (i >= value.length)) {
//1<=index<=length
throw new StringIndexOutOfBoundsException(index);
}
/*static int codePointBeforeImpl(char[] a, int index, int start) {
char c2 = a[--index];
if (isLowSurrogate(c2) && index > start) {
//index-1为低代理
char c1 = a[--index];
if (isHighSurrogate(c1)) {
//index-2位高代理
//返回对应代码点
return toCodePoint(c1, c2);
}
}
return c2;
}*/
return Character.codePointBeforeImpl(value, index, 0);
}
/**
* 返回字符串,指定范围text内的代码点的数量。
* text从beginIndex开始(包含),endIndex-1结束(包含)。
* 因此text的长度为endIndex-beginIndex。
* text范围内的不成对的代理,计算为1个代码点(即成对的代理,2个char,计算为1个代码点)
*
* @param beginIndex the index to the first {@code char} of
* the text range.
* @param endIndex the index after the last {@code char} of
* the text range.
* @return the number of Unicode code points in the specified text
* range
* @exception IndexOutOfBoundsException if the
* {@code beginIndex} is negative, or {@code endIndex}
* is larger than the length of this {@code String}, or
* {@code beginIndex} is larger than {@code endIndex}.
* @since 1.5
*/
public int codePointCount(int beginIndex, int endIndex) {
if (beginIndex < 0 || endIndex > value.length || beginIndex > endIndex) {
throw new IndexOutOfBoundsException();
}
/*static int codePointCountImpl(char[] a, int offset, int count) {
int endIndex = offset + count;
int n = count;
for (int i = offset; i < endIndex; ) {
if (isHighSurrogate(a[i++]) && i < endIndex &&
isLowSurrogate(a[i])) {
//如果是成对代理,就n--
n--;
i++;
}
}
return n;
}*/
return Character.codePointCountImpl(value, beginIndex, endIndex - beginIndex);
}
/**
* 返回在string中从给定的index,通过codePointOffset个代码点,经过的偏移量。
* 不成对的代理,计算为1个代码点
*
*
* @param index the index to be offset
* @param codePointOffset the offset in code points
* @return the index within this {@code String}
* @exception IndexOutOfBoundsException if {@code index}
* is negative or larger then the length of this
* {@code String}, or if {@code codePointOffset} is positive
* and the substring starting with {@code index} has fewer
* than {@code codePointOffset} code points,
* or if {@code codePointOffset} is negative and the substring
* before {@code index} has fewer than the absolute value
* of {@code codePointOffset} code points.
* @since 1.5
*/
public int offsetByCodePoints(int index, int codePointOffset) {
if (index < 0 || index > value.length) {
throw new IndexOutOfBoundsException();
}
return Character.offsetByCodePointsImpl(value, 0, value.length,
index, codePointOffset);
}
/**
* Encodes this {@code String} into a sequence of bytes using the named
* charset, storing the result into a new byte array.
*
* The behavior of this method when this string cannot be encoded in
* the given charset is unspecified. The {@link
* java.nio.charset.CharsetEncoder} class should be used when more control
* over the encoding process is required.
*
* @param charsetName
* The name of a supported {@linkplain java.nio.charset.Charset
* charset}
*
* @return The resultant byte array
*
* @throws UnsupportedEncodingException
* If the named charset is not supported
*
* @since JDK1.1
*/
public byte[] getBytes(String charsetName)
throws UnsupportedEncodingException {
if (charsetName == null) throw new NullPointerException();
return StringCoding.encode(charsetName, value, 0, value.length);
}
/**
* Encodes this {@code String} into a sequence of bytes using the given
* {@linkplain java.nio.charset.Charset charset}, storing the result into a
* new byte array.
*
*
This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement byte array. The
* {@link java.nio.charset.CharsetEncoder} class should be used when more
* control over the encoding process is required.
*
* @param charset
* The {@linkplain java.nio.charset.Charset} to be used to encode
* the {@code String}
*
* @return The resultant byte array
*
* @since 1.6
*/
public byte[] getBytes(Charset charset) {
if (charset == null) throw new NullPointerException();
return StringCoding.encode(charset, value, 0, value.length);
}
/**
* 将字符串编码为一个byte数组,使用平台默认的charset(UTF-8)
* 当默认charset没有被指定时,这个字符串不能被编码。
* 当需要更多的控制编码过程,应该使用CharsetEncoder类。
*
* @return The resultant byte array
*
* @since JDK1.1
*/
public byte[] getBytes() {
return StringCoding.encode(value, 0, value.length);
}