关注Java细节 -- String类 (一)






1. public String(); 2. public String(String original); 3. public String(char value[]); 4. public String(char value[], int offset, int count); 5. public String(int[] codePoints, int offset, int count); 6. public String(byte ascii[], int hibyte, int offset, int count); @Deprecated 7. public String(byte ascii[], int hibyte); @Deprecated 8. public String(byte bytes[], int offset, int length, String charsetName) throws UnsupportedEncodingException; 9. public String(byte bytes[], String charsetName) throws UnsupportedEncodingException; 10. public String(byte bytes[], int offset, int length); 11. public String(byte bytes[]); 12. public String(StringBuffer buffer); 13. public String(StringBuilder builder); 14. String(int offset, int count, char value[]);







/** The value is used for character storage. */ private final char value[]; /** The offset is the first index of the storage that is used. */ private final int offset; /** The count is the number of characters in the String. */ private final int count; /** Cache the hash code for the string */ private int hash; // Default to 0 /** use serialVersionUID from JDK 1.0.2 for interoperability */ private static final long serialVersionUID = -6849794470754667710L; /** * Class String is special cased within the Serialization Stream Protocol. * * A String instance is written initially into an ObjectOutputStream in the * following format: * <pre> * <code>TC_STRING</code> (utf String) * </pre> * The String is written by method <code>DataOutput.writeUTF</code>. * A new handle is generated to refer to all future references to the * string instance within the stream. */ private static final ObjectStreamField[] serialPersistentFields = new ObjectStreamField[0]; /** * A Comparator that orders <code>String</code> objects as by * <code>compareToIgnoreCase</code>. This comparator is serializable. * <p> * Note that this Comparator does <em>not</em> take locale into account, * and will result in an unsatisfactory ordering for certain locales. * The java.text package provides <em>Collators</em> to allow * locale-sensitive ordering. * * @see java.text.Collator#compare(String, String) * @since 1.2 */ public static final Comparator<String> CASE_INSENSITIVE_ORDER = new CaseInsensitiveComparator();   








private static class CaseInsensitiveComparator implements Comparator<String>, java.io.Serializable { // use serialVersionUID from JDK 1.2.2 for interoperability private static final long serialVersionUID = 8575799808933029326L; public int compare(String s1, String s2) { int n1=s1.length(), n2=s2.length(); for (int i1=0, i2=0; i1<n1 && i2<n2; i1++, i2++) { char c1 = s1.charAt(i1); char c2 = s2.charAt(i2); if (c1 != c2) { c1 = Character.toUpperCase(c1); c2 = Character.toUpperCase(c2); if (c1 != c2) { c1 = Character.toLowerCase(c1); c2 = Character.toLowerCase(c2); if (c1 != c2) { return c1 - c2; } } } } return n1 - n2; } }


1. public String()

1. public String() { this.offset = 0; this.count = 0; this.value = new char[0]; }


用JDK中的原话来说这个构造函数就是:Note that use of this constructor is unnecessary since Strings are immutable。


2. public String(String original)

public String(String original){ int size = original.count; char[] originalValue = original.value; char[] v; if (originalValue.length > size) { // The array representing the String is bigger than the new // String itself. Perhaps this constructor is being called // in order to trim the baggage, so make a copy of the array. v = new char[size]; System.arraycopy(originalValue, original.offset, v, 0, size); } else { // The array representing the String is the same // size as the String, so no point in making a copy. v = originalValue; } this.offset = 0; this.count = size; this.value = v; }



这里我们看到,在拷贝的过程中,用到了 System.arraycopy(originalValue, original.offset, v, 0, size);


/** * * @param src the source array. * @param srcPos starting position in the source array. * @param dest the destination array. * @param destPos starting position in the destination data. * @param length the number of array elements to be copied. * @exception IndexOutOfBoundsException if copying would cause * access of data outside array bounds. * @exception ArrayStoreException if an element in the <code>src</code> * array could not be stored into the <code>dest</code> array * because of a type mismatch. * @exception NullPointerException if either <code>src</code> or * <code>dest</code> is <code>null</code>. */ public static native void arraycopy(Object src, int srcPos, Object dest, int destPos, int length);   


3. public String(char value[])

public String(char value[]) { int size = value.length; char[] v = new char[size]; System.arraycopy(value, 0, v, 0, size); this.offset = 0; this.count = size; this.value = v; }



4. public String(char value[], int offset, int count)

public String(char value[], int offset, int count) { if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count < 0) { throw new StringIndexOutOfBoundsException(count); } // Note: offset or count might be near -1>>>1. if (offset > value.length - count) { throw new StringIndexOutOfBoundsException(offset + count); } char[] v = new char[count]; System.arraycopy(value, offset, v, 0, count); this.offset = 0; this.count = count; this.value = v; }



5. public String(int[] codePoints, int offset, int count) 

public String(int[] codePoints, int offset, int count) { if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count < 0) { throw new StringIndexOutOfBoundsException(count); } // Note: offset or count might be near -1>>>1. if (offset > codePoints.length - count) { throw new StringIndexOutOfBoundsException(offset + count); } int expansion = 0; int margin = 1; char[] v = new char[count + margin]; int x = offset; int j = 0; for (int i = 0; i < count; i++) { int c = codePoints[x++]; if (c < 0) { throw new IllegalArgumentException(); } if (margin <= 0 && (j+1) >= v.length) { if (expansion == 0) { expansion = (((-margin + 1) * count) << 10) / i; expansion >>= 10; if (expansion <= 0) { expansion = 1; } } else { expansion *= 2; } char[] tmp = new char[Math.min(v.length+expansion, count*2)]; margin = (tmp.length - v.length) - (count - i); System.arraycopy(v, 0, tmp, 0, j); v = tmp; } if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) { v[j++] = (char) c; } else if (c <= Character.MAX_CODE_POINT) { Character.toSurrogates(c, v, j); j += 2; margin--; } else { throw new IllegalArgumentException(); } } this.offset = 0; this.value = v; this.count = j; }


Unicode 是一个勇敢的成就。它把在这个星球上的每一个合理的文字系统整合成了一个单一的字符集。
很多人还存在这样的误解: Unicode 仅仅是 16 位的这么简单,每个字符占 16 位,所以一共有 65536 个可能的字符。

实际上,Unicode 理解字符的方式是截然不同的,而这是我们必须了解的。
到目前为止,我们都曾经认为:一个字符对应到一些在磁盘上或内存中储存的位(bits). 如: A -> 0100 0001
而在 Unicode 中, 一个字符实际上对应一种叫做 code point 的东西。
比如 A 这个字符,是抽象的(原文:platonic,柏拉图式的,理想的)一个概念。
无论是 Times New Roman 或者 Helvetica 或者其他的什么字体中,都代表同一个字符。但是它和小写的字母 a 不同。
但是在其他的语言,比如希伯莱语(Hebrew) 或者德语(German), 阿拉伯语(Arabian) 中,同一个字母的不同的字形代表的含义是否

每一个字母表中的每一个抽象的字母,都被赋予了一个数字,比如 U+0645. 这个叫做 code point.
U+ 表示: Unicode, 数字是 16 进制的。
你可以通过 charmap 命令来查看所有这些编码。(Windows 2000/XP 中). 或者访问 Unicode 的网站(http://www.unicode.org)
Unicode 中 code point 的数字的大小是没有限制的,而且也早就超过了 65535. 所以不是每个字符都能存储在两个字节中。
那么,一个字符串 "Hello", 在 Unicode 中会表示成 5 个 code points :
  U+0048 U+0065 U+006C U+006C U+006F


8. public String(byte bytes[], int offset, int length, String charsetName)
 throws UnsupportedEncodingException

public String(byte bytes[], int offset, int length, String charsetName) throws UnsupportedEncodingException{ if (charsetName == null) throw new NullPointerException("charsetName"); checkBounds(bytes, offset, length); char[] v = StringCoding.decode(charsetName, bytes, offset, length); this.offset = 0; this.count = v.length; this.value = v; }   




另外,在这段函数中我们看到了一个函数: checkBounds(bytes, offset, length);



private static void checkBounds(byte[] bytes, int offset, int length) { if (length < 0) throw new StringIndexOutOfBoundsException(length); if (offset < 0) throw new StringIndexOutOfBoundsException(offset); if (offset > bytes.length - length) throw new StringIndexOutOfBoundsException(offset + length); }


9. public String(byte bytes[], String charsetName)  throws UnsupportedEncodingException


public String(byte bytes[], String charsetName) throws UnsupportedEncodingException{ this(bytes, 0, bytes.length, charsetName); }




10. public String(byte bytes[], int offset, int length)


 public String(byte bytes[], int offset, int length){ checkBounds(bytes, offset, length); char[] v = StringCoding.decode(bytes, offset, length); this.offset = 0; this.count = v.length; this.value = v; }





11. public String(byte bytes[])


 public String(byte bytes[]){ this(bytes, 0, bytes.length); }




12. public String(StringBuffer buffer)


public String(StringBuffer buffer){ String result = buffer.toString(); this.value = result.value; this.count = result.count; this.offset = result.offset; }




13. public String(StringBuilder builder)


 public String(StringBuilder builder){ String result = builder.toString(); this.value = result.value; this.count = result.count; this.offset = result.offset; }






14. String(int offset, int count, char value[])


String(int offset, int count, char value[]){ this.value = value; this.offset = offset; this.count = count; }












作者:  Derek Jiang

email:  [email protected]



