平时工作中少不了字符串拼接,了解这些关于字符串拼接的小知识,有助于你在编程过程中有意识的写出高性能的代码。
目前go语言已知的字符串拼接方式有很多,比较常见的有+,fmt.Sprintf,strings.Builder,bytes.Buffer,strings.Jion,[]byte
下面从两种场景分析各种拼接方式的性能情况
对与这种场景,我写了以下几个拼接方法:
// fmt Sprintf
func getDescSprintf(uid, clientIP, url, errStr string, status int) string {
if errStr != "" {
return fmt.Sprintf("uid:[%s] status:[%d] clientIP:[%s] url:[%s] err:[%s]", uid, status, clientIP, url, errStr)
} else {
return fmt.Sprintf("uid:[%s] status:[%d] clientIP:[%s] url:[%s]", uid, status, clientIP, url)
}
}
// +
func getDescPlus(uid, clientIP, url, errStr string, status int) string {
if errStr != "" {
return "uid:[" + uid + "] status:[" + strconv.Itoa(status) + "] clientIP:[" + clientIP + "] url:[" + url + "] err:[" + errStr + "]"
} else {
return "uid:[" + uid + "] status:[" + strconv.Itoa(status) + "] clientIP:[" + clientIP + "] url:[" + url + "]"
}
}
// byte
func getDescByte(uid, clientIP, url, errStr string, status int) string {
var b = make([]byte, 0)
st := strconv.Itoa(status)
b = append(b, "uid:["...)
b = append(b, uid...)
b = append(b, "] status:["...)
b = append(b, st...)
b = append(b, "] clientIP:["...)
b = append(b, clientIP...)
b = append(b, "] url:["...)
b = append(b, url...)
if errStr != "" {
b = append(b, "] err:["...)
b = append(b, errStr...)
}
b = append(b, "]"...)
return string(b)
}
// strings jion
func getDescJion(uid, clientIP, url, errStr string, status int) string {
if errStr != "" {
return strings.Join([]string{"uid:[", uid, "] status:[", strconv.Itoa(status), "] clientIP:[", clientIP, "] url:[", url, "] err:[", errStr, "]"}, "")
} else {
return strings.Join([]string{"uid:[", uid, "] status:[", strconv.Itoa(status), "] clientIP:[", clientIP, "] url:[", url, "]"}, "")
}
}
// byte buffer
func getDescBuffer(uid, clientIP, url, errStr string, status int) string {
var bt = new(bytes.Buffer)
bt.WriteString("uid:[")
bt.WriteString(uid)
bt.WriteString("] status:[")
bt.WriteString(strconv.Itoa(status))
bt.WriteString("] clientIP:[")
bt.WriteString(clientIP)
bt.WriteString("] url:[")
bt.WriteString(url)
if errStr != "" {
bt.WriteString("] err:[")
bt.WriteString(errStr)
}
bt.WriteString("]")
return bt.String()
}
// strings builder
func getDescBuilder(uid, clientIP, url, errStr string, status int) string {
var builder strings.Builder
builder.WriteString("uid:[")
builder.WriteString(uid)
builder.WriteString("] status:[")
builder.WriteString(strconv.Itoa(status))
builder.WriteString("] clientIP:[")
builder.WriteString(clientIP)
builder.WriteString("] url:[")
builder.WriteString(url)
if errStr != "" {
builder.WriteString("] err:[")
builder.WriteString(errStr)
}
builder.WriteString("]")
return builder.String()
}
结果如图所示,综合可以看出拼接速率(>表示优于):+ >= strings.Jion > fmt.Sprintf >= Strings.Builder > bytes.Buffer >= []byte
在内存的消耗上也有明显差距。
可以看到,这里+和 strings.Jion之所以那么快,跟它内存分配次数只有两次有关,其中一次是strconv.Itoa(status),另一次则是拼接时所分配的,而且他们每次拼接所分配的内存也是最小的。
如果是和分配有关,那我们知道在go中切片在make时不仅可以分配len还可以分配cap。那么在这种场景下显然是可以预先分配空间从而减少多次分配带来的性能消耗的。
于是我又添加了下面三个接口:
func getDescByteWithCap(uid, clientIP, url, errStr string, status int) string {
st := strconv.Itoa(status)
l := (len(uid) + len(clientIP) + len(url) + len(errStr) + len(st))
if errStr != "" {
l += len("uid:[] status:[] clientIP:[] url:[] err:[]")
} else {
l += len("uid:[] status:[] clientIP:[] url:[]")
}
var b = make([]byte, 0, l)
b = append(b, "uid:["...)
b = append(b, uid...)
b = append(b, "] status:["...)
b = append(b, st...)
b = append(b, "] clientIP:["...)
b = append(b, clientIP...)
b = append(b, "] url:["...)
b = append(b, url...)
if errStr != "" {
b = append(b, "] err:["...)
b = append(b, errStr...)
}
b = append(b, "]"...)
return string(b)
}
func getDescBufferWithGrow(uid, clientIP, url, errStr string, status int) string {
var bu = new(bytes.Buffer)
st := strconv.Itoa(status)
l := (len(uid) + len(clientIP) + len(url) + len(errStr) + len(st))
if errStr != "" {
l += len("uid:[] status:[] clientIP:[] url:[] err:[]")
} else {
l += len("uid:[] status:[] clientIP:[] url:[]")
}
bu.Grow(l)
bu.WriteString("uid:[")
bu.WriteString(uid)
bu.WriteString("] status:[")
bu.WriteString(st)
bu.WriteString("] clientIP:[")
bu.WriteString(clientIP)
bu.WriteString("] url:[")
bu.WriteString(url)
if errStr != "" {
bu.WriteString("] err:[")
bu.WriteString(errStr)
}
bu.WriteString("]")
return bu.String()
}
func getDescBuilderWithGrow(uid, clientIP, url, errStr string, status int) string {
var builder strings.Builder
st := strconv.Itoa(status)
l := (len(uid) + len(clientIP) + len(url) + len(errStr) + len(st))
if errStr != "" {
l += len("uid:[] status:[] clientIP:[] url:[] err:[]")
} else {
l += len("uid:[] status:[] clientIP:[] url:[]")
}
builder.Grow(l)
builder.WriteString("uid:[")
builder.WriteString(uid)
builder.WriteString("] status:[")
builder.WriteString(st)
builder.WriteString("] clientIP:[")
builder.WriteString(clientIP)
builder.WriteString("] url:[")
builder.WriteString(url)
if errStr != "" {
builder.WriteString("] err:[")
builder.WriteString(errStr)
}
builder.WriteString("]")
return builder.String()
}
发现初始了cap之后,性能确实提升了不少,分配次数也大幅减少,strings.Builder的表现更是直接超越了+和strings.Jion个成为了最佳。
看strings.Jion的源码,我发现其底层也是用初始化cap的strings.Builder做的,所以才能表现这么优秀
但是由于前面的兼容性处理影响了性能所有表现没有我写的针对性方法在这个场景下表现的好
strings.Builder(grow)>+>=strings.Jion>[]byte(cap)>=bytes.Buffer(grow)>fmt.Sprintf>Strings.Builder>[]byte>bytes.Buffer
对与这种场景,我写了以下几个拼接方法:
var keySep = ">>"
func GetUnionKeyPlus(keyElements ...string) string {
if s, ok := quickReturn(keyElements...); ok {
return s
}
st := keyElements[0]
for _, e := range keyElements[1:] {
st = st + e + keySep
}
return st
}
func GetUnionKeySprintf(keyElements ...string) string {
if s, ok := quickReturn(keyElements...); ok {
return s
}
st := keyElements[0]
for _, e := range keyElements[1:] {
st = fmt.Sprintf("%s%s%s", st, keySep, e)
}
return st
}
func GetUnionKeyJion(keyElements ...string) string {
return strings.Join(keyElements, keySep)
}
func GetUnionKeyBuffer(keyElements ...string) string {
if s, ok := quickReturn(keyElements...); ok {
return s
}
var bu = new(bytes.Buffer)
bu.WriteString(keyElements[0])
for _, e := range keyElements[1:] {
bu.WriteString(keySep)
bu.WriteString(e)
}
return bu.String()
}
func GetUnionKeyBufferWithGrow(keyElements ...string) string {
if s, ok := quickReturn(keyElements...); ok {
return s
}
var bu = new(bytes.Buffer)
l := (len(keyElements) - 1) * len(keySep)
for _, s := range keyElements {
l += len(s)
}
bu.Grow(l)
bu.WriteString(keyElements[0])
for _, e := range keyElements[1:] {
bu.WriteString(keySep)
bu.WriteString(e)
}
return bu.String()
}
func GetUnionKeyByte(keyElements ...string) string {
if s, ok := quickReturn(keyElements...); ok {
return s
}
b := make([]byte, 0)
b = append(b, keyElements[0]...)
for _, s := range keyElements {
b = append(b, keySep...)
b = append(b, s...)
}
return string(b)
}
func GetUnionKeyByteWithCap(keyElements ...string) string {
if s, ok := quickReturn(keyElements...); ok {
return s
}
l := (len(keyElements) - 1) * len(keySep)
for _, s := range keyElements {
l += len(s)
}
b := make([]byte, 0, l)
b = append(b, keyElements[0]...)
for _, s := range keyElements {
b = append(b, keySep...)
b = append(b, s...)
}
return string(b)
}
func GetUnionKeyBuilder(keyElements ...string) string {
if s, ok := quickReturn(keyElements...); ok {
return s
}
var bu strings.Builder
bu.WriteString(keyElements[0])
for _, e := range keyElements[1:] {
bu.WriteString(keySep)
bu.WriteString(e)
}
return bu.String()
}
func GetUnionKeyBuilderWithGrow(keyElements ...string) string {
if s, ok := quickReturn(keyElements...); ok {
return s
}
var bu strings.Builder
l := (len(keyElements) - 1) * len(keySep)
for _, s := range keyElements {
l += len(s)
}
bu.Grow(l)
bu.WriteString(keyElements[0])
for _, e := range keyElements[1:] {
bu.WriteString(keySep)
bu.WriteString(e)
}
return bu.String()
}
func quickReturn(keyElements ...string) (string, bool) {
switch len(keyElements) {
case 0:
return "", true
case 1:
return keyElements[0], true
default:
return "", false
}
}
有了场景一的经验,这次我直接加上了初始化cap的方法一同对比测试,并且在每一个接口前加了参数校验,用于平衡当传入字符串数量为0或1时能够快速返回,不需要拼接,结果如下:
可以看到strings.Builder依然优秀,而之前性能很好的+和还可以的fmt.Sprintf这次都纷纷垫底了,因为他们内存分配的次数太多了。
strings.Builder(grow)>=strings.Jion>bytes.Buffer(grow)>=[]byte(cap)>Strings.Builder>bytes.Buffer>[]byte>+>fmt.Sprintf
字符串在 Go 语言中是不可变类型,占用内存大小是固定的,当使用 + 拼接 2 个字符串时,生成一个新的字符串,那么就需要开辟一段新的空间,新空间的大小是原来两个字符串的大小之和。拼接第三个字符串时,再开辟一段新空间,新空间大小是三个字符串大小之和,以此类推。
strings.Builder 和 bytes.Buffer 底层都是 []byte 数组,但 strings.Builder 性能比 bytes.Buffer 快了很多。一个比较重要的区别在于,bytes.Buffer 转化为字符串时重新申请了一块空间,存放生成的字符串变量,而 strings.Builder 直接将底层的 []byte 转换成了字符串类型返回了回来
而加了前面的一系列处理之后strings.Builder方法基本就等于实现了一个strings.jion。所以他们的性能基本不相上下。
1.如果是像场景一,字符串数量固定,可以直接拼接的
strings.Builder(grow)>+>=strings.Jion>[]byte(cap)>=bytes.Buffer(grow)>fmt.Sprintf>Strings.Builder>[]byte>bytes.Buffer。
为了方便和易用,选择+即可,如果需要格式化,建议对于一些简单变量优先使用strconv进行转换,fmt.Sprintf中有大量的反射非常影响性能。如果并不太关注性能或需要格式化一些复杂的结构为字符串,可以选择使用fmt.Sprintf。
2.如果是像场景二,不太确定数量,尤其需要在循环中进行拼接的,
strings.Builder(grow)>=strings.Jion>bytes.Buffer(grow)>=[]byte(cap)>Strings.Builder>bytes.Buffer>[]byte>+>fmt.Sprintf
如果没有特殊需求可以选择使用strings.Jion,如果有针对性的需求可以用strings.Builder(grow)的方式来实现