容器数据类型抽象基类,collections.abc
为Python标准库
容器数据类型,collections
为Python标准库
from collections import namedtuple
Card = namedtuple('card', ['suits', 'number'])
c1 = Card('红桃', 2)
print(c1, c1.number, c1.suits)
from random import choice, shuffle
from collections import namedtuple
_Card = namedtuple('Card', ['rank', 'suit'])
class Poker:
ranks = [str(n) for n in range(2, 11)] + list('JQKA')
suits = ['红心', '方片', '梅花', '黑桃']
def __init__(self):
self._cards = [_Card(rank, suit) for rank in Poker.ranks for suit in Poker.suits]
def __len__(self):
return self._cards.__len__()
def __getitem__(self, item):
return self._cards[item]
def __setitem__(self, key, value):
self._cards[key] = value
cards = Poker()
print(cards[3])
print(len(cards))
shuffle(cards) # 会改变每个索引对应的值,需要__setitem__
print(cards[3])
print(cards[:3])
print(choice(cards))
print(choice(cards))
from collections import deque
dq = deque()
dq.append(1)
dq.append(2)
print(dq.popleft()) # 1
print(dq.pop()) # 2
可哈希
from collections import Counter
items = ['red', 'blue', 'red', 'green', 'blue', 'blue']
print(Counter(items)) # Counter({'blue': 3, 'red': 2, 'green': 1})
from collections import defaultdict
s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
d = defaultdict(list)
for k, v in s:
d[k].append(v) # 会有一个None到[]的自动变换,否则会报错,处理空值
# default_factory为可调用对象,用于产生一个默认值
print(sorted(d.items())) # [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
dd = defaultdict(lambda: 0)
print(dd['key']) # 0
有序字典中元素排列顺序与插入顺序保持一致,Python3.6及以前dict无序,Python3.7及以后有序
from collections import OrderedDict
d = dict([('a', 1), ('b', 2), ('c', 3)])
print(d)
od = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
print(od)
倒序迭代
from collections import ChainMap
baseline = {'music': 'bach', 'art': 'rembrandt'}
adjustments = {'art': 'van gogh', 'opera': 'carmen'}
cm = ChainMap(adjustments, baseline)
for i in cm:
print(i, end=" ") # music art opera
print(list(cm)) # ['music', 'art', 'opera']
print(cm.get('art')) # van gogh
当需要对内置数据类型str、list、dict
进行拓展,并且想要覆写其某些标准功能时,应该使用UserDict
, UserList
, 和UserString
,而不是直接继承str、list、dict
from collections import UserString, UserList, UserDict
us = UserString("abc")
print(us)
print(us.data)
ul = UserList([1, 2, 3])
print(ul)
print(ul.data)
ud = UserDict([(1, 'a'), (2, 'b'), (3, 'c')])
print(ud)
print(ud.data)
实现一个自动转大写key的字段,初始化、[]
设置属性__setitem__
、update分别需要重写,若使用UserDict只需重写__setitem__
class UpperDict(dict):
def __init__(self, data):
data = {k.upper(): v for k, v in data.items()}
super().__init__(data)
def __setitem__(self, key, value):
key = key.upper()
super().__setitem__(key, value)
def update(self, data): # 方法 'UpperDict.update()' 的签名与类 'MutableMapping' 中基方法的签名不匹配
data = {k.upper(): v for k, v in data.items()}
super().update(data)
d = UpperDict({'a': 1})
d['b'] = 1
d.update({'c': 1})
print(d)
class UpperDict(UserDict):
def __setitem__(self, key, value):
key = key.upper()
super().__setitem__(key, value)
d = UpperDict({'a': 1})
d['b'] = 1
d.update({'c': 1})
print(d)
queue
为Python标准库,同步队列,线程安全
import queue
q = queue.Queue()
print(q.put("1")) # None
print(q.qsize()) # 1
print(q.get()) # 1
print(q.put_nowait("2")) # None
print(q.get_nowait()) # 2
# 后进先出队列
lifo_queue = queue.LifoQueue()
print(lifo_queue.put(1)) # None
print(lifo_queue.put(2)) # None
print(lifo_queue.get()) # 2
# 优先队列,最小的先出
pq = queue.PriorityQueue()
pq.put(3)
pq.put(1)
print(pq.get()) # 1
pq.put(4)
pq.put(0)
print(pq.get()) # 0
enum
为Python标准库,枚举
from enum import Enum
class Color(Enum):
RED = 1
GREEN = 2
BLUE = 3
print(Color.RED.name, Color.RED.value) # RED 1
array
为Python标准库,数字数组,纯数字操作比list效率高
from array import array
a = array('l', [1, 2, 3, 4, 5])
print(a, type(a))
heapq
为Python标准库,堆队列算法(优先队列算法),堆使用小根堆,基于数组实现
import heapq
import operator
l = [5, 3, 4, 2, 1]
# 堆化
heapq.heapify(l)
print(l) # [1, 2, 4, 5, 3]
print(l[0]) # 1 第一个元素为小根堆最小值
# 入堆
heapq.heappush(l, 6)
print(l) # [1, 2, 4, 5, 3, 6]
# 出堆
print(heapq.heappop(l)) # 1
# 先入后出,比heappush+heappop高效
print(heapq.heappushpop(l, 0)) # 0
print(l) # [2, 3, 4, 5, 6]
# 先出后入,比heappop+heappush高效
print(heapq.heapreplace(l, 0)) # 2
print(l) # [0, 3, 4, 5, 6]
# 获取迭代器最大的n个元素
print(heapq.nlargest(10, range(100, 200, 5))) # [195, 190, 185, 180, 175, 170, 165, 160, 155, 150]
# 获取迭代器最小的n个元素
print(heapq.nsmallest(10, range(100, 200, 5))) # [100, 105, 110, 115, 120, 125, 130, 135, 140, 145]
# 若干有序迭代器元素合并,合并结果为一个生成器,日志合并示例
log_file1 = ["2023-10-23 10:12:13 [INFO] ...", "2023-10-23 10:12:14 [INFO] ...", "2023-10-23 10:12:15 [INFO] ..."]
log_file2 = ["2023-10-23 10:12:12 [INFO] ...", "2023-10-23 10:12:14 [DEBUG] ...", "2023-10-23 10:12:14 [INFO] ..."]
log_file3 = ["2023-10-23 10:12:11 [WARN] ...", "2023-10-23 10:12:18 [INFO] ...", "2023-10-23 10:12:20 [ERROR] ..."]
for line in heapq.merge(log_file1, log_file2, log_file3, key=operator.itemgetter(slice(0, 20))):
print(line)
# 2023-10-23 10:12:11 [WARN] ...
# 2023-10-23 10:12:12 [INFO] ...
# 2023-10-23 10:12:13 [INFO] ...
# 2023-10-23 10:12:14 [INFO] ...
# 2023-10-23 10:12:14 [DEBUG] ...
# 2023-10-23 10:12:14 [INFO] ...
# 2023-10-23 10:12:15 [INFO] ...
# 2023-10-23 10:12:18 [INFO] ...
# 2023-10-23 10:12:20 [ERROR] ...
bisect
为Python标准库,基于二分查找算法定位插入点,模块中函数基于插入点设计,并非使用__eq__
查找特定值,而是使用__lt__
查找插入点。
公共参数说明
a
,列表x
,列表元素lo
,低位下标hi
,高位下标key
,比较键函数说明
bisect.bisect_left(a, x, lo=0, hi=len(a), *, key=None)
,返回的插入点ip
将数组a
分为两个切片使得对于左侧切片 all(elem < x for elem in a[lo : ip])
为真值而对于右侧切片 all(elem >= x for elem in a[ip : hi])
为真值bisect.bisect_right(a, x, lo=0, hi=len(a), *, key=None)
,返回的插入点ip
将数组a
分为两个切片使得对于左侧切片 all(elem <= x for elem in a[lo : ip])
为真值而对于右则切片 all(elem > x for elem in a[ip : hi])
为真值bisect.insort_right(a, x, lo=0, hi=len(a), *, key=None)
,基于bisect_right
函数定位插入点然后插入元素,插入算法时间复杂度为O(n)
bisect.insort_left(a, x, lo=0, hi=len(a), *, key=None)
,基于bisect_left
函数定位插入点然后插入元素,插入算法时间复杂度为O(n)
bisect.bisect
与bisect.bisect_right
等价bisect.insort
与bisect.insort_right
等价import bisect
import operator
from collections import namedtuple
l = [1, 3, 5, 7, 9]
print(bisect.bisect_right(l, 5)) # 3
print(bisect.bisect_left(l, 5)) # 2
print(bisect.bisect_right(l, 4)) # 2
print(bisect.bisect_left(l, 4)) # 2
print(bisect.bisect_right(l, 9)) # 5
print(bisect.bisect_left(l, 9)) # 4
print(bisect.bisect_right(l, 1)) # 1
print(bisect.bisect_left(l, 1)) # 0
print(bisect.bisect_right(l, 0)) # 0
print(bisect.bisect_left(l, 0)) # 0
print(bisect.bisect_right(l, 10)) # 5
print(bisect.bisect_left(l, 10)) # 5
print(bisect.bisect_left(l, 5)) # 2
bisect.insort_left(l, 5)
print(l) # [1, 3, 5, 5, 7, 9] 插入的是第一个5 left会定位所有 满足小于5 的元素的下一个位置
print(bisect.bisect_right(l, 5)) # 4
bisect.insort_right(l, 5)
print(l) # [1, 3, 5, 5, 5, 7, 9] 插入的是第三个5 right会定位所有 满足大于5 的元素的上一个位置
# 若数组无序则不会正常工作
a = [1, 5, 3, 6]
print(bisect.bisect_left(a, 5)) # 3
bisect.bisect_left(a, 5) # [1, 5, 3, 6]
print(a)
基于定位插入点特性便于分段
import bisect
import operator
from collections import namedtuple
for score in [33, 99, 77, 70, 89, 90, 100]:
print('FDCBA'[bisect.bisect([60, 70, 80, 90], score)])
# F A C C B A A
基于定位插入点特性不便于查找特定元素,通常需要封装一层函数
import bisect
import operator
from collections import namedtuple
Movie = namedtuple('Movie', ('id', 'name', 'released', 'director'))
movies = [
Movie(1, 'Jaws', 1975, 'Spielberg'),
Movie(3, 'Titanic', 1997, 'Cameron'),
Movie(5, 'The Birds', 1963, 'Hitchcock'),
Movie(7, 'Aliens', 1986, 'Cameron')
]
print(bisect.bisect_left(movies, 2, key=operator.attrgetter('id'))) # 1
print(bisect.bisect_left(movies, 3, key=operator.attrgetter('id'))) # 1
print(bisect.bisect_right(movies, 3, key=operator.attrgetter('id'))) # 2
def index(a, x, key=None):
i = bisect.bisect_left(a, x, key=key)
if i != len(a) and (key(a[i]) if key else a[i]) == x:
return i
raise ValueError
def find_lt(a, x, key=None):
i = bisect.bisect_left(a, x, key=key)
if i:
return a[i - 1]
raise ValueError
def find_le(a, x, key=None):
i = bisect.bisect_right(a, x, key=key)
if i:
return a[i - 1]
raise ValueError
def find_gt(a, x, key=None):
i = bisect.bisect_right(a, x, key=key)
if i != len(a):
return a[i]
raise ValueError
def find_ge(a, x, key=None):
i = bisect.bisect_left(a, x, key=key)
if i != len(a):
return a[i]
raise ValueError
print(index(movies, 5, key=operator.attrgetter('id'))) # 2
print(index(movies, 2, key=operator.attrgetter('id'))) # ValueError
三方有序容器库,https://grantjenks.com/docs/sortedcollections/
import copy
l = [1, 2, [3]]
copy_copy = copy.copy(l)
copy_copy[0] = '1a'
copy_copy[2][0] = '3a'
print(l, copy_copy) # [1, 2, ['3a']] ['1a', 2, ['3a']]
copy_deepcopy = copy.deepcopy(copy_copy)
copy_deepcopy[0] = 1
copy_deepcopy[2][0] = 3
print(copy_copy, copy_deepcopy) # ['1a', 2, ['3a']] [1, 2, [3]]