缘起
最近阅读<<我的第一本算法书>>(【日】石田保辉;宫崎修一)
本系列笔记拟采用golang练习之
归并排序
归并排序算法会把序列分成长度相同的两个子序列,
当无法继续往下分时(也就是每个子序列中只有一个数据时),
就对子序列进行归并。
归并指的是把两个排好序的子序列合并成一个有序序列。
该操作会一直重复执行,直到所有子序列都归并为一个整体为止。
总的运行时间为O(nlogn),这与前面讲到的堆排序相同。
摘自 <<我的第一本算法书>> 【日】石田保辉;宫崎修一
流程
- 给定待排序数组data[N]
- 创建缓冲区buffer[N]
- 设定初始的归并步长(已排序的子序列长度), span=1, 也就是将每个元素视作已排序的一个子序列
根据当前步长, 将所有已排序的子序列两两合并到缓冲区
- 设定q1为子序列1的头部下标, q2为子序列2的头部下标, p指向缓冲区的对应区域
- 比较q1与q2位置的值大小
- 如果q1的值小, 则取q1的值, 放入p, 然后q1和p都加1
- 如果q2的值小, 则取q2的值, 放入p, 然后q2和p都加1
- 如果子序列已经取完, 则复制尚未取完的子序列到p
- 交换data与buffer的指针, 将data视为缓冲区, buffer视为待归并数据
- span = span*2
- 重复步骤3-6, 直到span>N
设计
- ISorter: 定义排序器接口. 定义值比较函数以兼容任意数值类型, 通过调整比较函数实现倒序排序
- tMergeSort: 归并排序器, 实现ISorter接口.
单元测试
merge_sort_test.go. 归并排序是比较快的, 因此设定测试规模为10万元素.
package sorting
import (
"fmt"
"learning/gooop/sorting"
"learning/gooop/sorting/merge_sort"
"math/rand"
"testing"
"time"
)
func Test_MergeSort(t *testing.T) {
fnAssertTrue := func(b bool, msg string) {
if !b {
t.Fatal(msg)
}
}
reversed := false
fnCompare := func(a interface{}, b interface{}) sorting.CompareResult {
i1 := a.(int)
i2 := b.(int)
if i1 < i2 {
if reversed {
return sorting.GREATER
} else {
return sorting.LESS
}
} else if i1 == i2 {
return sorting.EQUAL
} else {
if reversed {
return sorting.LESS
} else {
return sorting.GREATER
}
}
}
fnTestSorter := func(sorter sorting.ISorter) {
reversed = false
// test simple array
samples := []interface{} { 2,3,1,5,4,7,6 }
samples = sorter.Sort(samples, fnCompare)
fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 2 3 4 5 6 7]", "expecting 1,2,3,4,5,6,7")
t.Log("pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]")
// test 10000 items sorting
rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
for plus := 0;plus < 3;plus++ {
sampleCount := 100 * 1000 + plus
t.Logf("prepare large array with %v items", sampleCount)
samples = make([]interface{}, sampleCount)
for i := 0; i < sampleCount; i++ {
samples[i] = rnd.Intn(sampleCount * 10)
}
t.Logf("sorting large array with %v items", sampleCount)
t0 := time.Now().UnixNano()
samples = sorter.Sort(samples, fnCompare)
cost := time.Now().UnixNano() - t0
for i := 1; i < sampleCount; i++ {
fnAssertTrue(fnCompare(samples[i-1], samples[i]) != sorting.GREATER, "expecting <=")
}
t.Logf("end sorting large array, cost = %v ms", cost/1000000)
}
// test 0-20
sampleCount := 20
t.Log("sorting 0-20")
samples = make([]interface{}, sampleCount)
for i := 0;i < sampleCount;i++ {
for {
p := rnd.Intn(sampleCount)
if samples[p] == nil {
samples[p] = i
break
}
}
}
t.Logf("unsort = %v", samples)
samples = sorter.Sort(samples, fnCompare)
t.Logf("sorted = %v", samples)
fnAssertTrue(fmt.Sprintf("%v", samples) == "[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]", "expecting 0-20")
t.Log("pass sorting 0-20")
// test special
samples = []interface{} {}
samples = sorter.Sort(samples, fnCompare)
t.Log("pass sorting []")
samples = []interface{} { 1 }
samples = sorter.Sort(samples, fnCompare)
t.Log("pass sorting [1]")
samples = []interface{} { 3,1 }
samples = sorter.Sort(samples, fnCompare)
fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 3]", "expecting 1,3")
t.Log("pass sorting [1 3]")
reversed = true
samples = []interface{} { 2, 3,1 }
samples = sorter.Sort(samples, fnCompare)
fnAssertTrue(fmt.Sprintf("%v", samples) == "[3 2 1]", "expecting 3,2,1")
t.Log("pass sorting [3 2 1]")
}
t.Log("\ntesting MergeSort")
fnTestSorter(merge_sort.MergeSort)
}
测试输出
- 归并排序相当的快, 比堆排序还快1倍左右
- 比冒泡,选择,插入等有指数级的提升, 符合理论分析
- 代价与堆排序一致, 需要额外分配大小为N的空间, 用做归并缓冲区
- 比堆排序快的主要原因, 推测是因为堆排序初始化时, 批量push操作, 可能引发多次数组扩容.
$ go test -v merge_sort_test.go
=== RUN Test_MergeSort
merge_sort_test.go:111:
testing MergeSort
merge_sort_test.go:48: pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]
merge_sort_test.go:54: prepare large array with 100000 items
merge_sort_test.go:60: sorting large array with 100000 items
merge_sort_test.go:67: end sorting large array, cost = 35 ms
merge_sort_test.go:54: prepare large array with 100001 items
merge_sort_test.go:60: sorting large array with 100001 items
merge_sort_test.go:67: end sorting large array, cost = 36 ms
merge_sort_test.go:54: prepare large array with 100002 items
merge_sort_test.go:60: sorting large array with 100002 items
merge_sort_test.go:67: end sorting large array, cost = 33 ms
merge_sort_test.go:72: sorting 0-20
merge_sort_test.go:83: unsort = [6 10 8 9 14 1 12 4 19 7 11 16 15 17 0 2 18 3 5 13]
merge_sort_test.go:86: sorted = [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
merge_sort_test.go:88: pass sorting 0-20
merge_sort_test.go:93: pass sorting []
merge_sort_test.go:97: pass sorting [1]
merge_sort_test.go:102: pass sorting [1 3]
merge_sort_test.go:108: pass sorting [3 2 1]
--- PASS: Test_MergeSort (0.12s)
PASS
ok command-line-arguments 0.127s
ISorter.go
定义排序器接口. 定义值比较函数以兼容任意数值类型, 通过调整比较函数实现倒序排序
package sorting
type ISorter interface {
Sort(data []interface{}, comparator CompareFunction) []interface{}
}
type CompareFunction func(a interface{}, b interface{}) CompareResult
type CompareResult int
const LESS CompareResult = -1
const EQUAL CompareResult = 0
const GREATER CompareResult = 1
tMergeSort.go
归并排序器, 实现ISorter接口.
package merge_sort
import (
"learning/gooop/sorting"
)
type tMergeSort struct {}
func newMergeSort() sorting.ISorter {
return &tMergeSort{}
}
func (me *tMergeSort) Sort(data []interface{}, comparator sorting.CompareFunction) []interface{} {
if data == nil {
return nil
}
size := len(data)
if size <= 1 {
return data
}
var result []interface{} = nil
buffer := make([]interface{}, size)
for span := 1; span <= size;span *= 2 {
for i := 0;i < size;i += span * 2 {
merge(size, data, i, i + span, span, buffer, i, comparator)
}
result = buffer
data, buffer = buffer, data
}
if result == nil {
result = data
}
return result
}
// 合并data数组中的两个子序列: [q1:q1+span), [q2:q2+span), 到目标数组result的offset位置
func merge(size int, data []interface{}, q1 int, q2 int, span int, result []interface{}, offset int, comparator sorting.CompareFunction) {
e1 := min(q1 + span, size)
e2 := min(q2 + span, size)
j := -1
k := -1
for i := 0;i < span*2;i++ {
if q1 >= e1 {
j = q2
k = e2
} else if q2 >= e2 {
j = q1
k = e1
}
if j >= 0 {
for p := j;p < k;p++ {
result[offset] = data[p]
offset++
}
break
}
v1 := data[q1]
v2 := data[q2]
if lessEqual(v1, v2, comparator) {
result[offset] = v1
q1++
} else {
result[offset] = v2
q2++
}
offset++
}
}
func lessEqual(v1 interface{}, v2 interface{}, comparator sorting.CompareFunction) bool {
return comparator(v1, v2) != sorting.GREATER
}
func min(a,b int) int {
if a <= b {
return a
} else {
return b
}
}
var MergeSort = newMergeSort()
(end)