本人手写或借阅资料,仅供参考,有错误欢迎指正。
本章需调用第三章课后作业部分内容
import random
from homework.homework3 import UnorderedList
import timeit
#5.1 进行随机实验,测试顺序搜索算法与二分搜索算法在处理整数列表时的差异
#顺序搜索
def sequentialsearch(ml, v):
pos = 0
found = False
while pos < len(ml) and not found:
if ml[pos] == v:
found = True
else:
pos += 1
return found
#二分搜索
def binarysearch(ml, v):
l, r = 0, len(ml) - 1
found = False
while l <= r and not found:
mid = (l + r) // 2
if ml[mid] == v:
found = True
else:
if ml[mid] > v:
r = mid - 1
else:
l = mid + 1
return found
#性能比较
mllen = 100000
ml = list(range(mllen))
def searchperformance():
t = timeit.Timer("sequentialsearch(ml, %d)" % random.randrange(mllen), "from homework.homework5 import random, sequentialsearch, ml")
seqtime = t.timeit(number = 100)
t = timeit.Timer("binarysearch(ml, %d)" % random.randrange(mllen), "from homework.homework5 import random, binarysearch, ml")
bintime = t.timeit(number = 100)
print("%10.3f, %10.3f" % (seqtime, bintime))
#5.2 随机生成一个有序的整数列表。通过基准测试分析文中给出的二分搜索函数(递归版本
#与循环版本)。请解释你得到的结果。
#递归版本的二分搜索
def binarysearchrecursion(ml, v):
if len(ml) == 0:
return
else:
mid = len(ml) // 2
if ml[mid] == v:
return True
else:
if ml[mid] > v:
return binarysearchrecursion(ml[:mid], v)
else:
return binarysearchrecursion(ml[mid + 1:], v)
#性能比较
def binsearchperform():
t = timeit.Timer("binarysearchrecursion(ml, %d)" % random.randrange(mllen), "from homework.homework5 import random, \
binarysearchrecursion, ml")
bin1time = t.timeit(number = 100)
t = timeit.Timer("binarysearch(ml, %d)" % random.randrange(mllen), "from homework.homework5 import random, binarysearch, ml")
bin2time = t.timeit(number = 100)
print("%10.3f, %10.3f" % (bin1time, bin2time))
#5.3 不用切片运算符,实现递归版本的二分搜索算法。别忘了传入头元素和尾元素的下标。
#随机生成一个有序的整数列表,并进行基准测试。
def binarysearchrecursion2(ml, v, l, r):
if l > r:
return
mid = (l + r) // 2
if ml[mid] == v:
return True
else:
if ml[mid] > v:
return binarysearchrecursion2(ml, v, l, mid - 1)
else:
return binarysearchrecursion2(ml, v, mid + 1, r)
#性能比较
def binsearchrecurperform():
t = timeit.Timer("binarysearchrecursion(ml, %d)" % random.randrange(mllen), "from homework.homework5 import random, \
binarysearchrecursion, ml")
bin1time = t.timeit(number = 100)
t = timeit.Timer("binarysearchrecursion2(ml, %d, 0, mllen - 1)" % random.randrange(mllen), "from homework.homework5 import random,\
binarysearchrecursion2, ml, mllen")
bin2time = t.timeit(number = 100)
print("%10.3f, %10.3f" % (bin1time, bin2time))
#5.4 为散列表实现len 方法(__len__)
#5.5 为散列表实现in 方法(__contains__)。
#5.7 在本章中,散列表的大小为11。如果表满了,就需要增大。请重新实现put 方法,使
#得散列表可以在载荷因子达到一个预设值时自动调整大小(可以根据载荷对性能的影
#响,自己决定预设值)。
#5.8 实现平方探测这一再散列技巧
class HashTable:
def __init__(self, hsize):
self.size = hsize
self.num = 0
self.rehashsquarenum = 0
self.loadrate = 0.5
self.slots = [None] * self.size
self.data = [None] * self.size
def hashfunction(self, key, size):
return key % size
def rehashlinear(self, oldhash, size):
return (oldhash + 1) % size
def rehashsquare(self, oldhash, size):
self.rehashsquarenum += 1
return (oldhash + self.rehashsquarenum ** 2) % size
def put(self, key, data):
if self.num / self.size > self.loadrate:
print(self.num, self.size, '扩容中')
self.num = 0
oldslot = self.slots[:]
olddata = self.data[:]
self.slots = [None] * self.size * 2
self.data = [None] * self.size * 2
self.size *= 2
for (s, d) in zip(oldslot, olddata):
if s != None:
self.put(s, d)
hashvalue = self.hashfunction(key, self.size)
if self.slots[hashvalue] == None:
self.num += 1
self.slots[hashvalue] = key
self.data[hashvalue] = data
else:
if self.slots[hashvalue] == key:
self.data[hashvalue] = data
else:
nextslot = self.rehashlinear(hashvalue, self.size)
while self.slots[nextslot] != None and self.slots[nextslot] != key:
nextslot = self.rehashlinear(nextslot, self.size)
if self.slots[nextslot] == None:
self.num += 1
self.slots[nextslot] = key
self.data[nextslot] = data
else:
self.data[nextslot] = data
def get(self, key):
startslot = self.hashfunction(key, self.size)
data = None
stop = False
found = False
position = startslot
while self.slots[position] != None and not found and not stop:
if self.slots[position] == key:
found = True
data = self.data[position]
else:
position = self.rehash(position, self.size)
if position == startslot:
stop = True
return data
def __getitem__(self, key):
return self.get(key)
def __setitem__(self, key, value):
return self.put(key, value)
def __len__(self):
return self.num
def __contains__(self, key):
return self.get(key) != None
def hashdel(self, key):
hashvalue = self.hashfunction(key, self.size)
if self.slots[hashvalue] == None:
return
else:
self.slots[hashvalue] = None
self.data[hashvalue] = None
def HashTabletest():
ht = HashTable(2)
ht.put(1, 1)
ht.put(2, 2)
ht.put(3, 3)
ht.put(5, 5)
ht.put(7, 7)
print(ht[7])
#5.6 采用链接法处理冲突时,如何从散列表中删除元素?如果是采用开放定址法,又如何做
#呢?有什么必须处理的特殊情况?请为HashTable 类实现del 方法。
class LinkedHashTable(HashTable):
def __init__(self, hsize):
super().__init__(hsize)
self.slots = [UnorderedList()] * self.size
def put(self, key, data):
hashvalue = self.hashfunction(key, self.size)
self.num += 1
self.slots[hashvalue].add(data)
def get(self, key):
getslot = self.hashfunction(key, self.size)
return str(self.slots[getslot])
def hashdel(self, key):
hashvalue = self.hashfunction(key, self.size)
self.slots[hashvalue] = UnorderedList()
def LinkedHashTabletest():
lh = LinkedHashTable(11)
lh.put(11, 100)
lh.put(22, 200)
print(lh.get(0))
lh.hashdel(0)
print(lh.get(0))
#5.9 使用随机数生成器创建一个含500 个整数的列表。通过基准测试分析本章中的排序算
#法。它们在执行速度上有什么差别?
#5.10 利用同时赋值特性实现冒泡排序。
#5.12 利用同时赋值特性实现选择排序。
def bubblesort(ml):
for passnum in range(len(ml) - 1, 0, -1):
exchange = False
for i in range(passnum):
if ml[i] > ml[i + 1]:
ml[i], ml[i + 1] = ml[i + 1], ml[i]
exchange = True
if passnum and not exchange:
break
def selectionsort(ml):
for fillslot in range(len(ml) - 1, 0, -1):
positionofmax = ml.index(max(ml[0:fillslot + 1]))
ml[positionofmax], ml[fillslot] = ml[fillslot], ml[positionofmax]
def insertionsort(ml):
for idx in range(1, len(ml)):
cur = ml[idx]
pos = idx
while pos > 0 and ml[pos - 1] > cur :
ml[pos] = ml[pos - 1]
pos -= 1
ml[pos] = cur
def shellsort(ml):
sublistcount = len(ml) // 2
while sublistcount > 0:
for sp in range(sublistcount):
gapinsertionsort(ml, sp, sublistcount)
sublistcount //= 2
def gapinsertionsort(ml, start, gap):
for i in range(start + gap, len(ml), gap):
cur = ml[i]
pos = i
while pos >= gap and ml[pos - gap] > cur:
ml[pos] = ml[pos - gap]
pos -= gap
ml[pos] = cur
def mergesort(ml):
if len(ml) > 1:
mid = len(ml) // 2
l = ml[:mid]
r = ml[mid:]
mergesort(l)
mergesort(r)
i, j ,k = 0, 0, 0
while i < len(l) and j < len(r):
if l[i] < r[j]:
ml[k] = l[i]
i += 1
else:
ml[k] = r[j]
j += 1
k += 1
while i < len(l):
ml[k] = l[i]
i += 1
k += 1
while j < len(r):
ml[k] = r[j]
j += 1
k += 1
def quicksort(ml):
quicksorthelper(ml, 0, len(ml) - 1)
def quicksorthelper(ml, l, r):
if l < r:
pivotvalue = ml[l]
leftmark = l + 1
rightmark = r
done = False
while not done:
while leftmark <= rightmark and ml[leftmark] <= pivotvalue:
leftmark += 1
while leftmark <= rightmark and ml[rightmark] >= pivotvalue:
rightmark -= 1
if rightmark < leftmark:
done = True
else:
ml[leftmark], ml[rightmark] = ml[rightmark], ml[leftmark]
ml[l] = ml[rightmark]
ml[rightmark] = pivotvalue
quicksorthelper(ml, l, rightmark - 1)
quicksorthelper(ml, rightmark + 1, r)
sml = [random.randint(0,1000) for _ in range(500)]
def sortperformance():
times = 100
t = timeit.Timer("bubblesort(sml[:])", "from homework.homework5 import bubblesort, sml")
bubbletime = t.timeit(number = times)
t = timeit.Timer("selectionsort(sml[:])", "from homework.homework5 import selectionsort, sml")
selectiontime = t.timeit(number = times)
t = timeit.Timer("insertionsort(sml[:])", "from homework.homework5 import insertionsort, sml")
insertiontime = t.timeit(number = times)
t = timeit.Timer("shellsort(sml[:])", "from homework.homework5 import shellsort, sml")
shelltime = t.timeit(number = times)
t = timeit.Timer("mergesort(sml[:])", "from homework.homework5 import mergesort, sml")
mergetime = t.timeit(number = times)
t = timeit.Timer("quicksort(sml[:])", "from homework.homework5 import quicksort, sml")
quicktime = t.timeit(number = times)
print(bubbletime, selectiontime, insertiontime, shelltime, mergetime, quicktime)
#5.11 可以将冒泡排序算法修改为向两个方向“冒泡”。第一轮沿着列表“向上”遍历,第二
#轮沿着列表“向下”遍历。继续这一模式,直到无须遍历为止。实现这种排序算法,并
#描述它的适用情形。
#适用情景:个别处于逆序位置的数列,如9 2345678 1
def bubbleresort(ml):
l, r = 0, len(ml) - 1
exchange = True
while l < r and exchange:
exchange = False
for i in range(l, r):
if ml[i] > ml[i + 1]:
exchange = True
ml[i], ml[i + 1] = ml[i + 1], ml[i]
r -= 1
for i in range(r, l, -1):
if ml[i] < ml[i - 1]:
exchange = True
ml[i], ml[i - 1] = ml[i - 1], ml[i]
l += 1
#5.13 针对同一个列表使用不同的增量集,为希尔排序进行基准测试。
#书中示例已采用n/2,n/4......(2倍增量)
#略过
# 5.16 修改quickSort 函数,在选取基准值时采用三数取中法。通过实验对比两种技巧的性
#能差异。
# 这方面接触不到,做了没意义,学会快排就行了
#5.14 不使用切片运算符,实现mergeSort 函数。
def mergesortwithpointer(ml, tmp, left, right):
if left < right:
mid = (left + right) // 2
mergesortwithpointer(ml, tmp, left, mid)
mergesortwithpointer(ml, tmp, mid + 1, right)
i, j ,k = left, mid + 1, left
while i <= mid and j <= right:
if ml[i] < ml[j]:
tmp[k] = ml[i]
i += 1
else:
tmp[k] = ml[j]
j += 1
k += 1
while i <= mid:
tmp[k] = ml[i]
i += 1
k += 1
while j <= right:
tmp[k] = ml[j]
j += 1
k += 1
for a in range(left, right + 1):
ml[a] = tmp[a]
sml = [random.randint(0,1000) for _ in range(10)]
def mergesortwithpointertest():
smllen = len(sml)
tmp = [0]*smllen
mergesortwithpointer(sml, tmp, 0, smllen - 1)
print(sml)
#5.15 有一种改进快速排序的办法,那就是在列表长度小于某个值时采用插入排序(这个值被
#称为“分区限制”)。这是什么道理?
# 列表越是有序,快排性能越低
#重新实现快速排序算法,并给一个随机整数列表排序。采用不同的分区限制进行性能分析。
qrate = 0
sml = [random.randint(0,1000) for _ in range(5000)]
def quicksortwithinsert(ml):
quicksortwithinserthelper(ml, 0, len(ml) - 1)
def quickinsertionsort(ml, l, r):
for idx in range(l, r + 1):
cur = ml[idx]
pos = idx
while pos > 0 and ml[pos - 1] > cur :
ml[pos] = ml[pos - 1]
pos -= 1
ml[pos] = cur
def quicksortwithinserthelper(ml, l, r):
if l < r:
if (r - l) / len(ml) < qrate:
quickinsertionsort(ml, l, r)
return
pivotvalue = ml[l]
leftmark = l + 1
rightmark = r
done = False
while not done:
while leftmark <= rightmark and ml[leftmark] <= pivotvalue:
leftmark += 1
while leftmark <= rightmark and ml[rightmark] >= pivotvalue:
rightmark -= 1
if rightmark < leftmark:
done = True
else:
ml[leftmark], ml[rightmark] = ml[rightmark], ml[leftmark]
ml[l] = ml[rightmark]
ml[rightmark] = pivotvalue
quicksortwithinserthelper(ml, l, rightmark - 1)
quicksortwithinserthelper(ml, rightmark + 1, r)
# 5000的千分之二:10 子数组长度为10时插入排序要比快排好
def quicksortwithinsertperformance():
global qrate
for i in range(1, 5):
qrate = i / 1000
t = timeit.Timer("quicksortwithinsert(sml[:])", "from homework.homework5 \
import quicksortwithinsert, sml")
st = t.timeit(number = 10)
print(qrate, st)
#完全快排
t = timeit.Timer("quicksort(sml[:])", "from homework.homework5 \
import quicksort, sml")
st = t.timeit(number = 10)
print('0', st)