功能基础篇7——Python基础数据结构与算法标准库

基础数据结构与算法

collections.abc

容器数据类型抽象基类,collections.abc为Python标准库

collections

容器数据类型,collections为Python标准库

命名元组

from collections import namedtuple

Card = namedtuple('card', ['suits', 'number'])
c1 = Card('红桃', 2)
print(c1, c1.number, c1.suits)
from random import choice, shuffle
from collections import namedtuple

_Card = namedtuple('Card', ['rank', 'suit'])


class Poker:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = ['红心', '方片', '梅花', '黑桃']

    def __init__(self):
        self._cards = [_Card(rank, suit) for rank in Poker.ranks for suit in Poker.suits]

    def __len__(self):
        return self._cards.__len__()

    def __getitem__(self, item):
        return self._cards[item]

    def __setitem__(self, key, value):
        self._cards[key] = value


cards = Poker()
print(cards[3])
print(len(cards))
shuffle(cards)  # 会改变每个索引对应的值,需要__setitem__
print(cards[3])
print(cards[:3])
print(choice(cards))
print(choice(cards))

双端队列

from collections import deque

dq = deque()
dq.append(1)
dq.append(2)
print(dq.popleft())  # 1
print(dq.pop())  # 2

计数器

可哈希

from collections import Counter

items = ['red', 'blue', 'red', 'green', 'blue', 'blue']
print(Counter(items))  # Counter({'blue': 3, 'red': 2, 'green': 1})

带默认工厂字典

from collections import defaultdict

s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
d = defaultdict(list)
for k, v in s:
    d[k].append(v)  # 会有一个None到[]的自动变换,否则会报错,处理空值
# default_factory为可调用对象,用于产生一个默认值
print(sorted(d.items()))  # [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]

dd = defaultdict(lambda: 0)
print(dd['key'])  # 0

有序字典

有序字典中元素排列顺序与插入顺序保持一致,Python3.6及以前dict无序,Python3.7及以后有序

from collections import OrderedDict

d = dict([('a', 1), ('b', 2), ('c', 3)])
print(d)

od = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
print(od)

ChainMap

倒序迭代

from collections import ChainMap

baseline = {'music': 'bach', 'art': 'rembrandt'}
adjustments = {'art': 'van gogh', 'opera': 'carmen'}
cm = ChainMap(adjustments, baseline)
for i in cm:
    print(i, end=" ")  # music art opera 
print(list(cm))  # ['music', 'art', 'opera']
print(cm.get('art'))  # van gogh

UserString、UserList、UserDict

当需要对内置数据类型str、list、dict进行拓展,并且想要覆写其某些标准功能时,应该使用UserDict, UserList, 和UserString,而不是直接继承str、list、dict

from collections import UserString, UserList, UserDict

us = UserString("abc")
print(us)
print(us.data)

ul = UserList([1, 2, 3])
print(ul)
print(ul.data)

ud = UserDict([(1, 'a'), (2, 'b'), (3, 'c')])
print(ud)
print(ud.data)

实现一个自动转大写key的字段,初始化、[]设置属性__setitem__、update分别需要重写,若使用UserDict只需重写__setitem__

class UpperDict(dict):

    def __init__(self, data):
        data = {k.upper(): v for k, v in data.items()}
        super().__init__(data)

    def __setitem__(self, key, value):
        key = key.upper()
        super().__setitem__(key, value)

    def update(self, data):  # 方法 'UpperDict.update()' 的签名与类 'MutableMapping' 中基方法的签名不匹配
        data = {k.upper(): v for k, v in data.items()}
        super().update(data)


d = UpperDict({'a': 1})
d['b'] = 1
d.update({'c': 1})
print(d)
class UpperDict(UserDict):
    def __setitem__(self, key, value):
        key = key.upper()
        super().__setitem__(key, value)


d = UpperDict({'a': 1})
d['b'] = 1
d.update({'c': 1})
print(d)

queue

queue为Python标准库,同步队列,线程安全

import queue

q = queue.Queue()
print(q.put("1"))  # None
print(q.qsize())  # 1
print(q.get())  # 1
print(q.put_nowait("2"))  # None
print(q.get_nowait())  # 2

# 后进先出队列
lifo_queue = queue.LifoQueue()
print(lifo_queue.put(1))  # None
print(lifo_queue.put(2))  # None
print(lifo_queue.get())  # 2

# 优先队列,最小的先出
pq = queue.PriorityQueue()
pq.put(3)
pq.put(1)
print(pq.get())  # 1
pq.put(4)
pq.put(0)
print(pq.get())  # 0

enum

enum为Python标准库,枚举

from enum import Enum


class Color(Enum):
    RED = 1
    GREEN = 2
    BLUE = 3


print(Color.RED.name, Color.RED.value)  # RED 1

array

array为Python标准库,数字数组,纯数字操作比list效率高

from array import array

a = array('l', [1, 2, 3, 4, 5])
print(a, type(a))

heapq

heapq为Python标准库,堆队列算法(优先队列算法),堆使用小根堆,基于数组实现

import heapq
import operator

l = [5, 3, 4, 2, 1]

# 堆化
heapq.heapify(l)
print(l)  # [1, 2, 4, 5, 3]
print(l[0])  # 1 第一个元素为小根堆最小值

# 入堆
heapq.heappush(l, 6)
print(l)  # [1, 2, 4, 5, 3, 6]

# 出堆
print(heapq.heappop(l))  # 1

# 先入后出,比heappush+heappop高效
print(heapq.heappushpop(l, 0))  # 0
print(l)  # [2, 3, 4, 5, 6]

# 先出后入,比heappop+heappush高效
print(heapq.heapreplace(l, 0))  # 2
print(l)  # [0, 3, 4, 5, 6]

# 获取迭代器最大的n个元素
print(heapq.nlargest(10, range(100, 200, 5)))  # [195, 190, 185, 180, 175, 170, 165, 160, 155, 150]

# 获取迭代器最小的n个元素
print(heapq.nsmallest(10, range(100, 200, 5)))  # [100, 105, 110, 115, 120, 125, 130, 135, 140, 145]


# 若干有序迭代器元素合并,合并结果为一个生成器,日志合并示例
log_file1 = ["2023-10-23 10:12:13 [INFO] ...", "2023-10-23 10:12:14 [INFO] ...", "2023-10-23 10:12:15 [INFO] ..."]
log_file2 = ["2023-10-23 10:12:12 [INFO] ...", "2023-10-23 10:12:14 [DEBUG] ...", "2023-10-23 10:12:14 [INFO] ..."]
log_file3 = ["2023-10-23 10:12:11 [WARN] ...", "2023-10-23 10:12:18 [INFO] ...", "2023-10-23 10:12:20 [ERROR] ..."]

for line in heapq.merge(log_file1, log_file2, log_file3, key=operator.itemgetter(slice(0, 20))):
    print(line)
# 2023-10-23 10:12:11 [WARN] ...
# 2023-10-23 10:12:12 [INFO] ...
# 2023-10-23 10:12:13 [INFO] ...
# 2023-10-23 10:12:14 [INFO] ...
# 2023-10-23 10:12:14 [DEBUG] ...
# 2023-10-23 10:12:14 [INFO] ...
# 2023-10-23 10:12:15 [INFO] ...
# 2023-10-23 10:12:18 [INFO] ...
# 2023-10-23 10:12:20 [ERROR] ...

bisect

bisect为Python标准库,基于二分查找算法定位插入点,模块中函数基于插入点设计,并非使用__eq__查找特定值,而是使用__lt__查找插入点。

公共参数说明

  • a,列表
  • x,列表元素
  • lo,低位下标
  • hi,高位下标
  • key,比较键

函数说明

  • bisect.bisect_left(a, x, lo=0, hi=len(a), *, key=None),返回的插入点ip将数组a分为两个切片使得对于左侧切片 all(elem < x for elem in a[lo : ip]) 为真值而对于右侧切片 all(elem >= x for elem in a[ip : hi]) 为真值
  • bisect.bisect_right(a, x, lo=0, hi=len(a), *, key=None),返回的插入点ip将数组a分为两个切片使得对于左侧切片 all(elem <= x for elem in a[lo : ip]) 为真值而对于右则切片 all(elem > x for elem in a[ip : hi]) 为真值
  • bisect.insort_right(a, x, lo=0, hi=len(a), *, key=None),基于bisect_right函数定位插入点然后插入元素,插入算法时间复杂度为O(n)
  • bisect.insort_left(a, x, lo=0, hi=len(a), *, key=None),基于bisect_left函数定位插入点然后插入元素,插入算法时间复杂度为O(n)
  • bisect.bisectbisect.bisect_right等价
  • bisect.insortbisect.insort_right等价
import bisect
import operator
from collections import namedtuple

l = [1, 3, 5, 7, 9]

print(bisect.bisect_right(l, 5))  # 3
print(bisect.bisect_left(l, 5))  # 2
print(bisect.bisect_right(l, 4))  # 2
print(bisect.bisect_left(l, 4))  # 2
print(bisect.bisect_right(l, 9))  # 5
print(bisect.bisect_left(l, 9))  # 4
print(bisect.bisect_right(l, 1))  # 1
print(bisect.bisect_left(l, 1))  # 0
print(bisect.bisect_right(l, 0))  # 0
print(bisect.bisect_left(l, 0))  # 0
print(bisect.bisect_right(l, 10))  # 5
print(bisect.bisect_left(l, 10))  # 5

print(bisect.bisect_left(l, 5))  # 2
bisect.insort_left(l, 5)
print(l)  # [1, 3, 5, 5, 7, 9] 插入的是第一个5 left会定位所有 满足小于5 的元素的下一个位置  

print(bisect.bisect_right(l, 5))  # 4
bisect.insort_right(l, 5)
print(l)  # [1, 3, 5, 5, 5, 7, 9] 插入的是第三个5 right会定位所有 满足大于5 的元素的上一个位置

# 若数组无序则不会正常工作
a = [1, 5, 3, 6]
print(bisect.bisect_left(a, 5))  # 3
bisect.bisect_left(a, 5)  # [1, 5, 3, 6]
print(a)

基于定位插入点特性便于分段

import bisect
import operator
from collections import namedtuple

for score in [33, 99, 77, 70, 89, 90, 100]:
    print('FDCBA'[bisect.bisect([60, 70, 80, 90], score)])
# F A C C B A A

基于定位插入点特性不便于查找特定元素,通常需要封装一层函数

import bisect
import operator
from collections import namedtuple

Movie = namedtuple('Movie', ('id', 'name', 'released', 'director'))

movies = [
    Movie(1, 'Jaws', 1975, 'Spielberg'),
    Movie(3, 'Titanic', 1997, 'Cameron'),
    Movie(5, 'The Birds', 1963, 'Hitchcock'),
    Movie(7, 'Aliens', 1986, 'Cameron')
]

print(bisect.bisect_left(movies, 2, key=operator.attrgetter('id')))  # 1
print(bisect.bisect_left(movies, 3, key=operator.attrgetter('id')))  # 1
print(bisect.bisect_right(movies, 3, key=operator.attrgetter('id')))  # 2


def index(a, x, key=None):
    i = bisect.bisect_left(a, x, key=key)
    if i != len(a) and (key(a[i]) if key else a[i]) == x:
        return i
    raise ValueError


def find_lt(a, x, key=None):
    i = bisect.bisect_left(a, x, key=key)
    if i:
        return a[i - 1]
    raise ValueError


def find_le(a, x, key=None):
    i = bisect.bisect_right(a, x, key=key)
    if i:
        return a[i - 1]
    raise ValueError


def find_gt(a, x, key=None):
    i = bisect.bisect_right(a, x, key=key)
    if i != len(a):
        return a[i]
    raise ValueError


def find_ge(a, x, key=None):
    i = bisect.bisect_left(a, x, key=key)
    if i != len(a):
        return a[i]
    raise ValueError


print(index(movies, 5, key=operator.attrgetter('id')))  # 2
print(index(movies, 2, key=operator.attrgetter('id')))  # ValueError

三方有序容器库,https://grantjenks.com/docs/sortedcollections/

浅拷贝与深拷贝,shallow copy and deep copy

import copy

l = [1, 2, [3]]
copy_copy = copy.copy(l)
copy_copy[0] = '1a'
copy_copy[2][0] = '3a'
print(l, copy_copy)  # [1, 2, ['3a']] ['1a', 2, ['3a']]

copy_deepcopy = copy.deepcopy(copy_copy)
copy_deepcopy[0] = 1
copy_deepcopy[2][0] = 3
print(copy_copy, copy_deepcopy)  # ['1a', 2, ['3a']] [1, 2, [3]]

你可能感兴趣的:(Python高楼平地起,python,开发语言,数据结构,算法)