(3)String name = "ab"; name = name + "c";两条语句总共创建了多少个字符串对象?
String s1 = "a"; String s2 = s1 + "b"; String s3 = "ab"; System.out.println(s2 == s3);//false
下面来看一下JDK 7中String类的两个重要的变量:
public final class String implements java.io.Serializable, Comparable<String>, CharSequence { /** The value is used for character storage. */ private final char value[]; }在JDK 6中其实还有另外的两个变量offset和count:
public final class String implements java.io.Serializable, Comparable<String>, CharSequence{ private final char value[]; // 用来存储字符串转换而来的字符数组 private final int offset; // 字符串起始字符在字符数组的位置 private final int count; // 字符串分解成字符数组后字符的数目 }
public String(String original) { this.value = original.value; this.hash = original.hash; }使用如上的构造函数使用original.value的形式来创建一个字符串,但是通常不建议这样做,可能会创建两个字符串对象,消耗太大。继续来看其他的一些常用构造函数。
public String(char value[]) { this.value = Arrays.copyOf(value, value.length); } public String(char value[], int offset, int count) { if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count < 0) { throw new StringIndexOutOfBoundsException(count); } // Note: offset or count might be near -1>>>1. if (offset > value.length - count) { throw new StringIndexOutOfBoundsException(offset + count); } this.value = Arrays.copyOfRange(value, offset, offset+count); } public String(int[] codePoints, int offset, int count) { if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count < 0) { throw new StringIndexOutOfBoundsException(count); } // Note: offset or count might be near -1>>>1. if (offset > codePoints.length - count) { throw new StringIndexOutOfBoundsException(offset + count); } final int end = offset + count; // Pass 1: Compute precise size of char[] int n = count; for (int i = offset; i < end; i++) { int c = codePoints[i]; if (Character.isBmpCodePoint(c)) continue; else if (Character.isValidCodePoint(c)) n++; else throw new IllegalArgumentException(Integer.toString(c)); } // Pass 2: Allocate and fill in char[] final char[] v = new char[n]; for (int i = offset, j = 0; i < end; i++, j++) { int c = codePoints[i]; if (Character.isBmpCodePoint(c)) v[j] = (char)c; else Character.toSurrogates(c, v, j++); } this.value = v; }编写一个测试程序,如下:
char data[] = {'a', 'b', 'c'}; String str1 = new String(data); String str2 = new String(data,0,2); char data1[] = {0x4E2D, 0x56FD}; String str3 = new String(data1); System.out.println(str1);//abc System.out.println(str2);//ab System.out.println(str3);//中国还有以byte[]数组为参数创建字符串的构造函数,其中最主要的两个如下:
public String(byte bytes[], int offset, int length) { checkBounds(bytes, offset, length); char[] v = StringCoding.decode(bytes, offset, length); this.offset = 0; this.count = v.length; this.value = v; } public String(byte bytes[], int offset, int length, String charsetName) throws UnsupportedEncodingException { if (charsetName == null) throw new NullPointerException("charsetName"); checkBounds(bytes, offset, length); char[] v = StringCoding.decode(charsetName, bytes, offset, length); this.offset = 0; this.count = v.length; this.value = v; } public String(byte bytes[], int offset, int length, Charset charset) { if (charset == null) throw new NullPointerException("charset"); checkBounds(bytes, offset, length); char[] v = StringCoding.decode(charset, bytes, offset, length); this.offset = 0; this.count = v.length; this.value = v; }编写测试程序如下:
byte[] ascBytes = {(byte)0x61, (byte)0x62, (byte)0x63}; // ASCII的'a','b','c'字符 System.out.println(new String(ascBytes,0,2)); // abc System.out.println(new String(ascBytes,0,2,Charset.forName("ISO-8859-1")));// abc System.out.println(new String(ascBytes,0,2,"ISO-8859-1")); // abc其他的一些使用byte数组创建字符串的构造函数其实最终都是调用如上两个构造函数。另外还提供了StringBuffer和StringBuilder转String字符串的构造函数,如下:
public String(StringBuffer buffer) { synchronized(buffer) { this.value = Arrays.copyOf(buffer.getValue(), buffer.length()); } } public String(StringBuilder builder) { this.value = Arrays.copyOf(builder.getValue(), builder.length()); }
/** * Returns the length of this string. * The length is equal to the number of <a href="Character.html#unicode">Unicode * code units in the string. */ public int length() { return value.length; }如上的方法是返回字符串的长度,但是需要注意的是:
public int codePointCount(int beginIndex, int endIndex) { if (beginIndex < 0 || endIndex > value.length || beginIndex > endIndex) { throw new IndexOutOfBoundsException(); } return Character.codePointCountImpl(value, beginIndex, endIndex - beginIndex); }
static int codePointCountImpl(char[] a, int offset, int count) { int endIndex = offset + count; int n = count; for (int i = offset; i < endIndex; ) { if (isHighSurrogate(a[i++]) && i < endIndex && isLowSurrogate(a[i])) { n--; i++; } } return n; }
来看一下JDK 6中的字符串截取的方法substring():
public String substring(int beginIndex, int endIndex) { if (beginIndex < 0) { throw new StringIndexOutOfBoundsException(beginIndex); } if (endIndex > count) { throw new StringIndexOutOfBoundsException(endIndex); } if (beginIndex > endIndex) { throw new StringIndexOutOfBoundsException(endIndex - beginIndex); } return ((beginIndex == 0) && (endIndex == count)) ? this :new String(offset + beginIndex, endIndex - beginIndex, value); }
// Package private constructor which shares value array for speed. String(int offset, int count, char value[]) { this.value = value; this.offset = offset; this.count = count; }char[] value 数组被共享了。而截取字串的操作是通过偏移量offset和长度count来实现的。这样就会造成内存泄漏问题,例如需要多次截取大字符串中的很小一部分时。
public class LeakTest { public static void main(String...args) { List<String> handler = new ArrayList<String>(); for(int i = 0; i < 100000; i++) { // HugeStr h=new HugeStr(); // 保存了对每个HugeStr类中str的强引用,所以一直无法释放 ImprovedHugeStr h = new ImprovedHugeStr(); handler.add(h.getSubString(1, 5)); } } } class HugeStr{ private String str=new String(new char[10000]); public String getSubString(int begin,int end){ return str.substring(begin,end); } } class ImprovedHugeStr{ private String str=new String(new char[10000]); public String getSubString(int begin,int end){ return new String(str.substring(begin,end)); } }由于每个HugeStr类实例都会对str大字符串对象进行强引用,导致无法释放内存并且造成了内存的巨大浪费,最终程序运行会抛出异常:
Exception in thread "main" java.lang.OutOfMemoryError: Java heap space
在JDK 7中进行了改进,如下:
public String substring(int beginIndex, int endIndex) { if (beginIndex < 0) { throw new StringIndexOutOfBoundsException(beginIndex); } if (endIndex > value.length) { throw new StringIndexOutOfBoundsException(endIndex); } int subLen = endIndex - beginIndex;// 获取截取的长度 if (subLen < 0) { throw new StringIndexOutOfBoundsException(subLen); } return ((beginIndex == 0) && (endIndex == value.length)) ? this : new String(value, beginIndex, subLen); }
new String(value, beginIndex, subLen)
public String(char value[], int offset, int count) { if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count < 0) { throw new StringIndexOutOfBoundsException(count); } // Note: offset or count might be near -1>>>1. if (offset > value.length - count) { throw new StringIndexOutOfBoundsException(offset + count); } this.value = Arrays.copyOfRange(value, offset, offset+count); }使用Arrays.copyOfRange()方法返回了一个新的字符串,串中的内容与原串中要截取的子字符串相同。
public String[] split(String regex, int limit) { /* fastpath if the regex is a (1)one-char String and this character is not one of the RegEx's meta characters ".$|()[{^?*+\\", or (2)two-char String and the first char is the backslash and the second is not the ascii digit or ascii letter. */ char ch = 0; if (((regex.value.length == 1 && ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) || (regex.length() == 2 && regex.charAt(0) == '\\' && (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 && ((ch-'a')|('z'-ch)) < 0 && ((ch-'A')|('Z'-ch)) < 0)) && (ch < Character.MIN_HIGH_SURROGATE || ch > Character.MAX_LOW_SURROGATE)) { int off = 0; int next = 0; boolean limited = limit > 0; ArrayList<String> list = new ArrayList<>(); while ((next = indexOf(ch, off)) != -1) { if (!limited || list.size() < limit - 1) { list.add(substring(off, next)); off = next + 1; } else { // last one //assert (list.size() == limit - 1); list.add(substring(off, value.length)); off = value.length; break; } } // If no match was found, return this if (off == 0) return new String[]{this}; // Add remaining segment if (!limited || list.size() < limit) list.add(substring(off, value.length)); // Construct result int resultSize = list.size(); if (limit == 0) while (resultSize > 0 && list.get(resultSize - 1).length() == 0) resultSize--; String[] result = new String[resultSize]; return list.subList(0, resultSize).toArray(result); } return Pattern.compile(regex).split(this, limit); }
StringTokenizer strtk=new StringTokenizer(str,";"); while(strtk.hasMoreTokens()){ strtk.nextToken(); }
int j=str.indexOf(';'); str.substring(0,j); str=str.substring(j+1);
public int hashCode() { int h = hash; if (h == 0 && value.length > 0) { char val[] = value; for (int i = 0; i < value.length; i++) { h = 31 * h + val[i]; } hash = h; } return h; }字符串的hashCode()计算的方法为:
s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]n为字符串的长度,空字符串的hash值为0。
public boolean equals(Object anObject) { if (this == anObject) { return true; } if (anObject instanceof String) { String anotherString = (String) anObject; int n = value.length; if (n == anotherString.value.length) { char v1[] = value; char v2[] = anotherString.value; int i = 0; while (n-- != 0) { if (v1[i] != v2[i]) return false; i++; } return true; } } return false; }比较两个字符串中字符的序列组成是否相等,也就是比较字符串中的内容。如果不覆写这个方法,默认继承的是Object中的equals()方法,代码如下:
public boolean equals(Object obj) { return (this == obj); }使用“==”符号来比较两个引用类型的值,则比较的是引用地址,不准确,所以一定要进行覆写。
public native String intern();这是一个本地的方法,当调用 intern 方法时,如果缓存池已经包含一个等于此 String 对象的字符串,则返回池中的字符串。否则,将此 String 对象添加到池中,并返回此 String 对象的引用。
String param = "abc"; String newStr = new String("abc"); String param2 = new String("abc"); newStr.intern(); param2 = param2.intern(); // param2指向intern返回的常量池中的引用 System.out.println(param == newStr); // false System.out.println(param == param2); // true可以看到,使用intern()方法后,字符串变量的值如果不存在缓冲区中将会被缓存