数据结构示例(基于Python)

链表

每个元素都是一个对象,每个对象称为一个节点,里面有两个属性,分别为自身值和指向下一个节点的指针next,通过各个节点间相互连接,所以要想知道某一个节点的值,必须通过从头节点开始一个个指过去。而插入和删除时只需改变该节点和左右两边的节点即可,其特点概况就是查找慢,但插入删除快

简单例子
class Node(object):
   def __init__(self, item):
      self.item = item
      self.next = None

a = Node(10)
b = Node(20)
c = Node(30)
a.next = b      #a的下一个节点指向b
b.next = c
print(a.item)           #10
print(a.next.item)      #20
print(a.next.next.item) #30
遍历链表
class Node(object):
   def __init__(self, item):
      self.item = item
      self.next = None

def traversal(head):
   curNode = head  #当前节点从头结点开始
   while curNode is not None:
      print(curNode.item)
      curNode = curNode.next  #依次遍历节点

head = Node(10)
head.next = Node(20)
head.next.next = Node(30)
traversal(head)
建立链表
  • 头插法
class Node(object):
   def __init__(self, item):
      self.item = item
      self.next = None

def createLinkListF(li):
   l = Node(0)
   for num in li:
      s = Node(num)
      s.next = l.next     #每一个都插入到第一个节点和第二个节点之间
      l.next = s
   return l

def traversal(head):
   curNode = head
   while curNode is not None:
      print(curNode.item)
      curNode = curNode.next

li = [1,2,3,4,5,6]
l = createLinkListF(li)
traversal(l)    #0,6,5,4,3,2,1
  • 尾插法
class Node(object):
   def __init__(self, item):
      self.item = item
      self.next = None

def createLinkListR(li):
   l = Node(0)
   r = l
   for num in li:
      s = Node(num)
      r.next = s      #每个都插入到最后一个位置
      r = s   #节点移到最后一个
   return l

def traversal(head):
   curNode = head
   while curNode is not None:
      print(curNode.item)
      curNode = curNode.next

li = [1,2,3,4,5,6]
l = createLinkListR(li)
traversal(l)    #0,1,2,3,4,5,6
基于单链表实现的数组
class List:
    '''链表数组实现'''
    def __init__(self):
        # 头指针及数组尺寸
        self.first = self.Node(None, None)
        self.size = 0

    class Node:
        '''链表单元节点'''
        def __init__(self, element, next):
            self.element = element
            self.next = next

    def _get_node(self, index):
        '''获取数组指定索引的节点'''
        node = self.first
        if index == 0:
            return node
        elif 0 < index <= self.size:
            for _ in range(index):
                node = node.next
            return node
        else:
            raise Exception("index out of list")

    def index_of(self, index):
        '''访问指定索引下的值'''
        return self._get_node(index + 1).element

    def add(self, element):
        '''在尾部添加值'''
        tmp = self._get_node(self.size)
        tmp.next = self.Node(element, None)
        self.size += 1

    def insert(self, index, element):
        '''在指定索引处插入值'''
        tmp = self._get_node(index)
        new_node = self.Node(element, None)
        new_node.next = tmp.next
        tmp.next = new_node
        self.size += 1

    def delete(self, index):
        '''删除指定索引的值'''
        if index >= self.size:
            raise Exception("index out of list")
        tmp = self._get_node(index)
        tmp.next = tmp.next.next
        self.size -= 1

    def update(self, index, element):
        '''更新指定索引的值'''
        tmp = self._get_node(index + 1)
        tmp.element = element

    def get_size(self):
        '''返回数组大小'''
        return self.size

    def pop(self):
        '''pop实现:获取倒数第二个,从而返回倒数第一个的值,然后将倒数第二个的next指向空,并把size-1'''
        tmp = self._get_node(self.size - 1)
        element = tmp.next.element
        tmp.next = None
        self.size -= 1
        return element

    def clear(self):
        '''把头指针的next指向空,size置0'''
        self.first.next = None
        self.size = 0

    def reverse(self):
        '''递归实现倒序:新建一个空数组,递归到原数组的底部开始指向'''
        self.new = self.Node(None, None)
        self._reverse(self.first.next)
        self.first = self.new
        del self.new

    def _reverse(self, node):
        '''内部倒叙递归实现:当到达最后一个节点位置,用新的数组指向'''
        if node.next:
            self._reverse(node.next)
            node.next = None    
        new = self.new
        while new.next:
            new = new.next
        new.next = node
        return

    def sort(self):
        '''冒泡排序,不是交换节点,而是交换节点的值,否则十分麻烦,可以看下一个例子'''
        for i in range(self.size):
            node = self.first
            while node.next:
                node = node.next
                if node.next and node.element > node.next.element:
                    tmp = node.next.element
                    node.next.element = node.element
                    node.element = tmp

    # def sort(self):
    #   '''冒泡排序,基于交换节点的方式,此时需要考虑是第一个节点的情况和中间节点的情况'''
    #   for i in range(self.size):
    #       node = self.first.next
    #       if node.next:
    #           if node.element > node.next.element:
    #               head = node.next
    #               node.next = node.next.next
    #               head.next = node
    #               self.first.next = head
    #       while node.next:
    #           if node.next.next:
    #               if node.next.element > node.next.next.element:
    #                   head = node.next.next
    #                   node.next.next = head.next
    #                   head.next = node.next
    #                   node.next = head
    #           node = node.next

    def __str__(self):
        '''打印格式'''
        node = self.first
        s = '['
        while node.next:
            node = node.next
            if s != '[':
                s += ", " + str(node.element)
            else:
                s += str(node.element)
        s += "]"
        return str(s)

if __name__ == '__main__':
    # 测试
    l = List()
    l.add(1)
    l.add(2)
    l.add(3)
    l.add(4)
    l.insert(1, 5)
    l.insert(0, 6)
    print(l.index_of(5))
    print(l, l.size)
    l.delete(5)
    print(l, l.size)
    l.update(2, 100)
    print(l, l.size)
    print(l.pop())
    print(l, l.size)
    l.reverse()
    print(l, l.size)
    l.sort()
    print(l, l.size)
    l.clear()
    print(l, l.size)

双链表

和单链表相比,多了个指向前一个节点的指针,如下:

class Node(object):
   def __init__(self, item):
      self.item = item
      self.next = None
      self.prior = None

后进先出的数据结构,实现起来很简单

简单示例
class Stack:
    def __init__(self):
        self.stack = []
    def push(self, data):
        self.stack.append(data)
    def pop(self):
        return self.stack.pop()
    def get(self):
        return self.stack[-1]

if __name__ == '__main__':
    s = Stack()
    s.push(1)
    s.push(2)
    s.push(3)
    print(s.pop())
    print(s.pop())
    print(s.get())
更加原始的实现示例

前面的实现是基于列表自带的api实现的,如果希望更加接近原始的实现方式,则看如下示例:

class Stack:
    def __init__(self):
        self.size = 8
        self.stack = [None] * self.size
        self.index = 0

    def push(self, v):
        self._size_check()
        self.stack[self.index - 1] = v

    def pop(self):
        self._empty_check()
        self.index -= 1
        return self.stack[self.index]

    def get(self):
        return self.stack[self.index - 1]

    def _size_check(self):
        self.index += 1
        if self.index < self.size:
            return
        tstack = self.stack
        self.size = int(1.5 * self.size)
        self.stack = [None] * self.size
        for i in range(self.index):
            self.stack[i] = tstack[i]

    def _empty_check(self):
        if self.index < 1:
            raise Exception("当前栈为空!")

    def __str__(self):
        return str(self.stack[:self.index])

if __name__ == '__main__':
    s = Stack()
    s.push(5)
    s.push(3)
    s.push(2)
    print(s.get())
    s.pop()
    s.pop()
    s.pop()
    s.push(1)
    print(s)

队列

先进先出的数据结构,和栈差不多

简单示例
class Queue:
    def __init__(self):
        self.queue = []
    def put(self, data):
        self.queue.append(data)
    def poll(self):
        return self.queue.pop(0)
    def get(self):
        return self.queue[0]

if __name__ == "__main__":
    q = Queue()
    q.put(1)
    q.put(2)
    q.put(3)
    print(q.poll())
    print(q.poll())
    print(q.get())

上面的是基于动态数组实现的,每当删除第一个元素时,都需要将后面的元素往前挪,效率偏低,而使用链表实现则能够很好地提升效率,示例如下:

class Queue:
    class Node:
        def __init__(self, v):
            self.v = v
            self.prev = None
            self.next = None

    def __init__(self):
        self.head = self.Node(None)
        self.tail = self.head
        self.size = 0
    
    def add(self, v):
        self.tail.next = self.Node(v)
        self.tail = self.tail.next
        self.size += 1
    
    def pop(self):
        if self.size <= 0:
            return
        first = self.head.next
        if first is self.tail:
            self.tail = self.head
        self.head.next = first.next
        self.size -= 1
        return first.v
    
    def get_size(self):
        return self.size

二叉搜索树

左子节点的值都比当前节点的小,右子节点的值都比当前节点的大

简单实现
class BST:
    class Node:
        def __init__(self, val, parent):
            self.val = val
            self.parent = parent
            self.left = None
            self.right = None
        def __repr__(self):
            return f"{self.val}(left:({self.left})right:({self.right}))"

    def __init__(self, tree = []):
        self.root = self.Node(None, None)
        self.size = 0
        while tree:
            self.add(tree.pop(0))
    
    def add(self, val):
        """
        @description:插入节点(如果父节点为空,则直接在根节点赋值;如果跟父节点的值一样,则覆盖;如果大于父节点的值,则成为父节点的右子节点;如果小于,则成为左子节点)
        """
        self._val_check(val)
        parent = self._find_parent(val)
        if not parent:
            self.root.val = val
        elif val == parent.val:
            return val
        elif val > parent.val:
            parent.right = self.Node(val, parent)
        else:
            parent.left = self.Node(val, parent)
        self.size += 1
        return val

    def _find_parent(self, val):
        """
        @description:找到指定数据能够插入的父节点(如果是空,说明为根节点;如果是同样的值,那么父节点存放的就是和当前同样的值)
        """
        node = self.root
        parent = node.parent
        while node and node.val:
            if val == node.val:
                return node
            parent = node
            if val > node.val:
                node = node.right
            else:
                node = node.left
        return parent

    def remove(self, val):
        """
        @description:删除节点
        """
        self._val_check(val)
        parent = self._find_parent(val)
        if not (parent and parent.val == val):
            return
        # 度为2的节点,使用当前节点的前驱或者后继节点覆盖当前节点,并将覆盖的原节点删除(这里使用前驱节点)
        # 并且度为2的前驱或者后继节点的度一定为0或者1
        if self._has_two_child(parent):
            prev = self._find_prev(parent)
            parent.val = prev.val
            parent = prev
        # 度为0的节点,直接将该节点删除
        if not self._has_child(parent):
            self._replace_node(parent, None)
        # 度为1的节点,直接将当前节点的父节点指向当前节点的子节点
        elif self._has_one_child(parent):
            if self._has_left_child(parent):
                node = parent.left
            if self._has_right_child(parent):
                node = parent.right
            self._replace_node(parent, node)
        self.size -= 1

    def _replace_node(self, node, new_node):
        """
        @description:将当前节点替换
        """
        if node.parent.left == node:
            node.parent.left = new_node
        elif node.parent.right == node:
            node.parent.right = new_node

    def _find_prev(self, node):
        """
        @description:寻找前驱节点
        """
        mid_tree = self.mid_order(False)
        for i, n in enumerate(mid_tree):
            if n == node:
                return mid_tree[i - 1] if i - 1 >= 0 else None
        return None

    def get_height(self):
        """
        @description:获取树高度(根节点的高度)
        """
        return self._get_node_height(self.root)

    def _get_node_height(self, node):
        """
        @description:获取节点高度(最高的子节点高度加1)
        """
        if not node:
            return 0
        return max(self._get_node_height(node.left), self._get_node_height(node.right), 0) + 1

    def constains(self, val):
        """
        @description:是否包含某个节点(能够找到父节点,且父节点的值和传入的一样则说明存在)
        """
        self._val_check(val)
        parent = self._find_parent(val)
        return parent and parent.val == val

    def is_empty(self):
        """
        @description:是否为空
        """
        return self.size == 0

    def get_size(self):
        """
        @description:获取树的节点数
        """
        return self.size

    def clear(self):
        """
        @description:清空树
        """
        self.root.val = None
        self.root.left = None
        self.root.right = None
        self.size = 0

    def pre_order(self, output=True):
        """
        @description:前序遍历
        """
        def travel(node):
            if not node:
                return
            res.append(node)
            travel(node.left)
            travel(node.right)
        res = []
        travel(self.root)
        if output:
            print([node.val for node in res])
        return res

    def mid_order(self, output=True):
        """
        @description:中序遍历
        """
        def travel(node):
            if not node:
                return
            travel(node.left)
            res.append(node)
            travel(node.right)
        res = []
        travel(self.root)
        if output:
            print([node.val for node in res])
        return res

    def next_order(self, output=True):
        """
        @description:后序遍历
        """
        def travel(node):
            if not node:
                return
            travel(node.left)
            travel(node.right)
            res.append(node)
        res = []
        travel(self.root)
        if output:
            print([node.val for node in res])
        return res

    def layer_order(self, output=True):
        """
        @description:层序遍历
        """
        res = []
        queue = [(self.root, 0)]
        while queue:
            cur, layer = queue.pop(0)
            res.append(cur)
            if self._has_left_child(cur):
                queue.append((cur.left, layer + 1))
            if self._has_right_child(cur):
                queue.append((cur.right, layer + 1))
        if output:
            print([node.val for node in res])
        return res

    def _val_check(self, val):
        """
        @description:判断是否为可比较类型的值
        """
        if not type(val) in [int, float]:
            raise Exception(f"传入类型必须为int/float型,你传入的是:{type(val)}")

    def _is_root(self, node):
        """
        @description:是否为根节点
        """
        return node == self.root

    def _has_one_child(self, node):
        """
        @description:是否只存在一个子节点
        """
        return self._has_child(node) and not self._has_two_child(node)

    def _has_two_child(self, node):
        """
        @description:是否存在两个子节点
        """
        return self._has_left_child(node) and self._has_right_child(node)

    def _has_child(self, node):
        """
        @description:是否有子节点
        """
        return self._has_left_child(node) or self._has_right_child(node)

    def _has_left_child(self, node):
        """
        @description:是否有左子节点
        """
        return node and node.left != None

    def _has_right_child(self, node):
        """
        @description:是否有右子节点
        """
        return node and node.right != None

    def __repr__(self):
        return f"{self.root}"

if __name__ == '__main__':
    t = BST([12,16,1,4,8,11,14,21,17,16,15,18])
    print(t)
    print(t.get_height())
    print(t.get_size())
    print(t.constains(2))
    print(t.constains(23))
    print(t.constains(12))
    print(t.constains(15))
    print(t.constains(19))
    t.layer_order()
    t.pre_order()
    t.mid_order()
    t.next_order()
    t.remove(12)
    t.layer_order()
    t.clear()
    print(t.is_empty())
    t.add(5)
    t.add(9)
    t.add(3)
    t.add(10)
    t.add(6)
    t.layer_order()

哈希表

一般是将数组和红黑树(或者链表之类的)进行结合

  • 添加元素时,首先通过哈希函数进行计算后,将键和值添加到对应地址的红黑树(链表)上,假如发生hash冲突时,则在对应地址的红黑树(链表)上再添加一个新的节点
  • 查询时查找对应hash值地址上的红黑树(链表)节点,找到指定值时返回
  • 删除时查找对应hash值地址上的红黑树(链表)节点,将该节点删除
简单实现

这里基于动态数组+双向链表实现一个简单的hashmap:

class HashMap:
    """
    @description: 基于动态数组 + 链表实现的简单版hashmap
    """
    class Node:
        """
        @description: 链表节点,包含key、value、上一个节点和下一个节点
        """
        def __init__(self, k, v, prev):
            self.k = k
            self.v = v
            self.prev = prev
            self.next = None
        def has_next(self):
            return self.next != None

    def __init__(self):
        """
        @description: 初始化数组size为16,数组的每个位置存放一个根节点
        """
        self.max_size = 2 << 3
        self.size = 0
        self.map = [self.Node(None, None, None) for each in range(self.max_size)]

    def put(self, k, v):
        """
        @description: 添加操作,找到添加的节点,并将值添加进去
        """
        self._max_size_check()
        node = self._find(k, add=True)
        node.k = k
        node.v = v

    def get(self, k):
        """
        @description: 获取key对应value,没有就返回None
        """
        node = self._find(k)
        if node == None:
            return None
        return node.v

    def remove(self, k):
        """
        @description: 删除指定key的value
        """
        node = self._find(k)
        if node == None:
            return False
        node.prev.next = node.next
        self._after_remove(k)
        return True

    def _after_remove(self, k):
        """
        @description: 删除后处理-如果当前位置只剩下根节点,则size减1,并且判断是否需要缩容
        """
        h = self._get_hash(k)
        root = self.map[h]
        if not root.has_next():
            self.size -= 1
            self._min_size_check()

    def _find(self, k, add=False):
        """
        @description: 根据key寻找节点,add代表添加操作时的查找逻辑
            - add:True(不存在则创建一个节点,如果根节点下没有子节点,则说明数组当前位置被使用,size加1)
        """
        h = self._get_hash(k)
        root = self.map[h]
        if root.next == None:
            if add:
                self.size += 1
                root.next = self.Node(None, None, root)
            return root.next
        while root.has_next():
            root = root.next
            if root.k == k:
                return root
            if not root.has_next():
                if not add:
                    return None
                root.next = self.Node(None, None, root)
                return root.next
        return root

    def _get_hash(self, k):
        """
        @description: 计算hash值对应地址
        """
        return hash(k) % self.max_size

    def _max_size_check(self):
        """
        @description: 检查size如果超过数组长度的2/3时,则对数组进行扩容,并重新添加进去
        """
        if self.size <= self.max_size / 3 * 2:
            return
        self.max_size <<= 1
        self._copy_map()

    def _min_size_check(self):
        """
        @description: 检查删除后size如果小于数组长度的1/5时,则对数组进行缩容,并重新添加进去
        """
        if self.size >= self.max_size // 5:
            return
        self.max_size >>= 1
        self._copy_map()
        
    def _copy_map(self):
        """
        @description: 重新拷贝map
        """
        self.size = 0
        t_map = self.map.copy()
        self.map = [self.Node(None, None, None) for each in range(self.max_size)]
        for root in t_map:
            while root.next != None:
                root = root.next
                self.put(root.k, root.v)

    def __str__(self):
        """
        @description: 打印hashmap
        """
        s = "-" * 100 + "\n"
        for index, root in enumerate(self.map):
            s += f"{index:>0{len(str(self.max_size))}} -> "
            while root.next != None:
                root = root.next
                s += f"{root.k}:{root.v} -> "
            s = s[:-4]
            s += "\n"
        s += "-" * 100
        return s

if __name__ == "__main__":
    hashmap = HashMap()
    hashmap.put("a", 1)
    hashmap.put("ac", 2)
    hashmap.put("d", 100)
    hashmap.put("aaa", 15)
    print(hashmap)
    print(hashmap.get("aaa"))
    print(hashmap.remove("aaa"))
    print(hashmap.get("aaa"))
    print(hashmap.remove("add"))
    hashmap.put("acd", 21)
    hashmap.put("dcsa", 13)
    hashmap.put("ddascsa", 53)
    hashmap.put("dcgfdsa", 34)
    hashmap.put("dgfdgdcsa", 33)
    hashmap.put("dgfdgdcsa", 35)
    print(hashmap)
    hashmap.remove("acd")
    hashmap.remove("dcsa")
    hashmap.remove("ddascsa")
    hashmap.remove("dcgfdsa")
    print(hashmap)

一种树形数据结构,其中较为常见的就是二叉堆,其根据自身特性主要又分为大顶堆和小顶堆(大顶堆:所有节点的值都比自身子节点的值要大,小顶堆则是子节点的值都比自身大)。由于堆的性质,很适合在一些取最值的场景当前使用(堆顶必然是最大值/最小值)

大顶堆简单实现
class MaxHeap:
    """
    @description: 大顶堆(改成小顶堆只需要将compare方法中的比较方式进行修改即可)
    """
    def __init__(self, eles=[]):
        self._hsize = 8
        self._set_size(0)
        self._heap = [None] * self._hsize
        # 当传入数组时,进行批量建堆
        if eles:
            # 批量建堆三种方式:
            # 第一种:遍历添加(效率最低)
            # for ele in eles:
            #     self.add(ele)
            # --------------------------
            # 第二/三种:自上而下的上滤/自下而上的下滤
            self._hsize = len(eles)
            self._set_size(self._hsize)
            self._heap = eles
            self.heapify()

    def heapify(self):
        """
        @description: 批量建堆
        """
        # 自上而下的上滤
        # for i in range(self.get_size()):
        #     self._sift_up(i)
        # ---------------------------------
        # 自下而上的上滤(效率相对更高)
        for i in range(self.get_size() >> 1 - 1, -1, -1):
            self._sift_down(i)

    def get(self):
        """
        @description: 获取堆顶元素 - 最大值
        """
        self._empty_check()
        return self._heap[self._root]

    def add(self, v):
        """
        @description: 往堆中添加一个元素
            + 检查:传入的为可比较元素、尺寸不足时动态扩容
            + 步骤:将新添的元素放入尾部,然后进行上滤操作
        """
        self._is_comparable(v)
        self._set_size(self.get_size() + 1)
        self._size_check()
        self._heap[self.get_size() - 1] = v
        self._sift_up(self.get_size() - 1)

    def remove(self):
        """
        @description: 删除堆顶元素
            + 检查:堆不能为空
            + 步骤:将尾部元素放到头部,尺寸减1,然后进行下滤操作
        """
        self._empty_check()
        v = self._heap[self._root]
        self._heap[self._root] = self._heap[self.get_size() - 1]
        self._set_size(self.get_size() - 1)
        self._sift_down(self._root)
        return v

    def replace(self, v):
        """
        @description: 将堆顶元素删除,添加一个新元素,并将删除的元素返回
        """
        self._empty_check()
        # 两种方式:
        # 第一种:先删除后添加(实现简单,但效率较低)
        # old = self.remove()
        # self.add(v)
        # ----------------------------------------
        # 第二种:直接覆盖第一个,然后下滤
        old = self._heap[self._root]
        self._heap[self._root] = v
        self._sift_down(self._root)
        return old

    def _sift_up(self, cur):
        """
        @description: 上滤操作,由下至上,若父元素比子元素小,则进行交换操作
        """
        while cur > self._root:
            parent = self._get_parent(cur)
            if self._compare(parent, cur):
                break
            self._swap(parent, cur)
            cur = parent

    def _sift_down(self, cur):
        """
        @description: 下滤操作,由上至下,找出最大的直接子元素,与父元素比较大小,子元素更大则进行交换操作
        """
        while cur < self.get_size():
            left_child = self._get_left(cur)
            if not left_child:
                break
            max_child = left_child
            right_child = self._get_right(cur)
            if right_child and self._compare(right_child, max_child):
                max_child = right_child
            if self._compare(cur, max_child):
                break
            self._swap(cur, max_child)
            cur = max_child

    def clear(self):
        """
        @description: 清空堆
        """
        self._hsize = 8
        self._heap = [None] * self._hsize
        self._set_size(0)

    def is_empty(self):
        """
        @description: 堆是否为空
        """
        return self.get_size() == 0

    def _empty_check(self):
        """
        @description: 堆必须非空才能进行读取和删除堆顶操作
        """
        if self.is_empty():
            raise Exception("当前堆为空!")

    def _size_check(self):
        """
        @description: 这里模拟动态数组,当size不够时动态扩容
        """
        if self.get_size() < self._hsize:
            return
        self._hsize = int(1.5 * self._hsize)
        t_heap = self._heap
        tsize = self.get_size()
        self._heap = [None] * self._hsize
        for i in range(tsize):
            self._heap[i] = t_heap[i]

    def _is_comparable(self, v):
        """
        @description: 判断传入的元素是否为可比较类型
        """
        if not type(v) in [int, float]:
            raise Exception(f"传入类型必须为int/float型,你传入的是:{type(v)}")

    def _compare(self, parent, children):
        """
        @description: 大顶堆,所以判断的条件是父节点不小于子节点,小顶堆只需要把这里的比较改为不大于即可
        """
        return self._heap[parent] >= self._heap[children]

    def _swap(self, e1, e2):
        """
        @description: 交换堆中的两个节点
        """
        self._heap[e1], self._heap[e2] = self._heap[e2], self._heap[e1]

    def _set_size(self, size):
        """
        @description: 设置堆尺寸
        """
        self._size = size

    def get_size(self):
        """
        @description: 获取堆中元素数量
        """
        return self._size

    def get_height(self):
        """
        @description: 获取堆高度
        """
        height = 0
        sub = self.get_size()
        while sub > 0:
            sub >>= 1
            height += 1
        return height

    @property
    def _root(self):
        """
        @description: 返回根节点索引 - 0
        """
        return 0

    def _get_parent(self, cur):
        """
        @description: 获取父节点索引 - floor((i-1)/2)
        """
        return (cur - 1) // 2

    def _get_left(self, cur):
        """
        @description: 获取左子节点索引 - 2*i+1
        """
        left = 2 * cur + 1
        return left if left < self.get_size() else None

    def _get_right(self, cur):
        """
        @description: 获取右子节点索引 - 2*i+2
        """
        right = 2 * cur + 2
        return right if right < self.get_size() else None

    def __str__(self):
        """
        @description: 打印堆数组
        """
        return str(self._heap[:self.get_size()])

    def graph(self):
        """
        @description: 以树形方式打印堆
        """
        def get_cur_layer(layer):
            return 2**(height - layer) - 1
        layer = 1
        height = self.get_height()
        max_len = max(map(lambda ele: len(str(ele)) if ele else 0, self._heap))
        gap = " " * max_len
        print("-" * (get_cur_layer(layer) * 5))
        print(gap * (get_cur_layer(layer)), end="")
        for i in range(1, self.get_size() + 1):
            print(f"{self._heap[i - 1]:>{max_len}}", end=gap * ((2 ** (height - layer + 1)) - 1))
            if 2 ** layer - 1 == i:
                print(gap * (get_cur_layer(layer)))
                layer += 1
                if height >= layer:
                    print(gap * (get_cur_layer(layer)), end="")
        print()
        print("-" * (get_cur_layer(layer) * 5))

if __name__ == '__main__':
    h = MaxHeap([5,7,3,8,6,12,54,3,0,9,32,41])
    print(h)
    h.graph()
    h.remove()
    h.graph()
    h.replace(10)
    h.graph()
    h.add(30)
    h.add(100)
    h.graph()
    h.clear()
    print(h.is_empty())
    print(h.is_empty())
    h.add(1)
    print(h.get_size())
    print(h)

Trie

又称字典树、前缀树,其搜索效率只和字符串的长度有关,因此能够十分高效地实现前缀搜索功能。其每个节点只存放对应长度位置的字符,例如存放单词:["abc", "acc", "bca"],那么就按如下方式存储:

    root
    /  \
   a    b
  / \   |
 b   c  c
 |   |  |
 c   c  a

此时假如搜索单词abc,那么就会先搜索根下是否存在值为a的节点,如果存在则进入a节点,继续寻找是否存在值为b的节点,...,只要期间检索失败,则返回False

简单实现
class Trie:
    class Node:
        def __init__(self, value, parent):
            """
            @description: 每个节点的基本信息:
                - value:节点的value
                - parent: 节点的父节点
                - childrens: 所有子节点
                - word: 是否为完整的一个单词
            """
            self.value = value
            self.childrens = {}
            self.parent = parent
            self.word = False

        def get_childrens(self):
            """
            @description: 获取所有子节点
            """
            return self.childrens

        def __repr__(self):
            return f"{self.value}"

    def __init__(self):
        self.size = 0
        self.root = self.Node(None, None)

    def is_empty(self):
        return self.size == 0

    def clear(self):
        """
        @description: 清空操作,直接清空根节点的子节点即可
        """
        self.size = 0
        self.root.get_childrens().clear()

    def _get_v(self, v):
        """
        @description: 返回指定节点的value
        """
        return self._find(v).value

    def constains(self, v):
        """
        @description: 是否包含某个单词
        """
        node = self._find(v)
        return node != None and node.word

    def _find(self, v):
        """
        @description: 寻找某个节点
        """
        self._v_check(v)
        root = self.root.get_childrens()
        for s in v:
            node = root.get(s)
            if not node:
                return None
            root = root.get(s).get_childrens()
        return node

    def _v_check(self, v):
        """
        @description: 传入字符检查
        """
        if not (isinstance(v, str) and v != ""):
            raise Exception("传入的内容必须为非空字符串")

    def add(self, v):
        """
        @description: 添加一个单词,依次遍历字母找节点,如果不存在则创建节点,当遍历完以后,在最后一个节点设置word为True
        """
        self._v_check(v)
        root = self.root
        for i, s in enumerate(v):
            children = root.get_childrens()
            node = children.get(s)
            if not node:
                children[s] = self.Node(v[:i+1], root)
                node = children[s]
            root = node
        root.word = True

    def remove(self, v):
        """
        @description: 删除某个单词,如果单词存在,则找到该单词的节点,如果该节点存在子节点,则仅把word设为False;如果不存在子节点,则直接删除该节点,然后再对其父节点进行相同操作
        """
        node = self._find(v)
        if not node:
            return
        node.word = False
        if node.get_childrens():
            return
        parent = node.parent
        while parent != None:
            children = parent.get_childrens()
            children.pop(node.value)
            if children:
                break
            node = parent
            parent = parent.parent

    def start_with(self, v):
        """
        @description: 判断是否存在以指定字符开头的内容
        """
        return self._find(v) != None

    def __str__(self):
        output = ""
        stack = [(0, self.root)]
        layer = 0
        while len(stack) > 0:
            l, node = stack.pop(0)
            if l > layer:
                output += "\n"
                layer = l
            childrens = node.get_childrens().values()
            if not childrens:
                continue
            for c in node.get_childrens().keys():
                output += f'{c}(parent: {node} | word:{node.get_childrens().get(c).word})\t'
            stack.extend(zip([l + 1] *len(childrens), childrens))
        return output

if __name__ == '__main__':
    t = Trie()
    t.add("a")
    t.add("gs")
    t.add("acd")
    t.add("ab")
    t.add("cba")
    t.add("aaaa")
    t.add("bcd")
    print(t)
    print(t.constains("a"))
    print(t.constains("ab"))
    print(t.start_with("ccc"))
    t.remove("a")
    print(t.start_with("ab"))
    print(t.constains("acd"))
    print(t.constains("a"))

有向图简单实现
class Graph:
    class Vertex:
        """
        @description:顶点,包括顶点值、入度边和出度边
        """
        def __init__(self, v):
            self.val = v
            self.in_edges = []
            self.out_edges = []
        def __str__(self):
            in_e = ["{}({})".format(e.fro, e.w) for e in self.in_edges]
            out_e = ["{}({})".format(e.to, e.w) for e in self.out_edges]
            return f"{in_e} -> {self.val} -> {out_e}".replace("'", "")

    class Edge:
        """
        @description:边,包括指向和权值
        """
        def __init__(self, fro, to, w=0):
            self.fro = fro
            self.to = to
            self.w = w
        def __eq__(self, edge):
            return (self.fro, self.to, self.w) == (edge.fro, edge.to, edge.w)
        def __hash__(self):
            return hash((self.fro, self.to, self.w))

    def __init__(self, vertexs=[], edges=[]):
        """
        @description:初始化变量存放所有的顶点和边
        """
        self._vertexs = {}
        self._edges = set()
        if vertexs:
            for vertex in vertexs:
                self.add_vertex(vertex)
        if edges:
            for edge in edges:
                self.add_edge(*edge)

    @property
    def vertex_size(self):
        """
        @description:顶点数
        """
        return len(self._vertexs)

    @property
    def edge_size(self):
        """
        @description:边数
        """
        return len(self._edges)

    def add_vertex(self, v):
        """
        @description:添加顶点,如果顶点已存在则不操作
        """
        if not v in self._vertexs:
            self._vertexs[v] = self.Vertex(v)
        return self._vertexs[v]

    def add_edge(self, fro, to, w=0):
        """
        @description:添加边,首先要确保有对应顶点,然后才能添加边,并且边不能重复
        """
        # 如果边存在,则不重复添加
        edge = self.Edge(fro, to, w)
        if edge in self._edges:
            return
        # 确保顶点都存在,不存在就添加
        self.add_vertex(fro)
        self.add_vertex(to)
        # 获取顶点
        fro_vertex = self._vertexs.get(fro)
        to_vertex = self._vertexs.get(to)
        # 根据顶点和权值创建边
        
        # 分别往两个顶点的出度边和入度边里添加当前边
        fro_vertex.out_edges.append(edge)
        to_vertex.in_edges.append(edge)
        self._edges.add(edge)

    def remove_vertex(self, v):
        """
        @description:删除顶点,步骤:
            找到当前顶点的所有出度边,那么这些边的出度顶点里的入度边里肯定存在这些边,将这些边全部删除;
            同理找当前顶点入度边,在入度边的入度顶点里的出度边里删除这些边;
            最后删除当前顶点
        """
        if not v in self._vertexs:
            return
        redges = set()
        rv = self._vertexs.pop(v)
        for e in rv.out_edges:
            vv = self._vertexs[e.to]
            vv.in_edges.remove(e)
            redges.add(e)
        for e in rv.in_edges:
            vv = self._vertexs[e.fro]
            vv.out_edges.remove(e)
            redges.add(e)
        for e in redges:
            self._edges.remove(e)

    def remove_edge(self, fro, to, w=0):
        """
        @description:删除边,需要删除在对应顶点存在的出度边、入度边,并将边的记录删除
        """
        edge = self.Edge(fro, to, w)
        if not edge in self._edges:
            return
        fro_vertex = self._vertexs.get(fro)
        to_vertex = self._vertexs.get(to)
        tedge = self.Edge(fro, to, w)
        redge = None
        for oedge in fro_vertex.out_edges:
            if tedge == oedge:
                redge = oedge
                break
        if redge:
            fro_vertex.out_edges.remove(redge)
            to_vertex.in_edges.remove(redge)
        self._edges.remove(redge)

    def graph(self):
        """
        @description:边,包括指向和权值
        """
        print("-"*100)
        for v in self._vertexs.values():
            print(v)
        print("-"*100)

if __name__ == '__main__':
    vertixs = (0,1,2,3,4,5)
    edges = ((0, 1, 10), (0, 2, 3), (1, 5), (1, 3), (2, 5, 1))
    g = Graph(vertixs, edges)
    g.graph()
    g.add_edge(1, 2, 3)
    g.add_edge(0, 1, 10)
    g.add_edge(1, 0, 10)
    g.add_edge(2, 8, 1)
    g.add_vertex(10)
    g.graph()
    g.remove_edge(0, 1, 10)
    g.remove_edge(0, 1, 9)
    g.graph()
    g.remove_vertex(0)
    g.graph()
    print(g.vertex_size, g.edge_size)
广度优先搜索

参考树的层序遍历

深度优先搜索

参考树的前序遍历

并查集

适合多个集合,并判断某个元素是否在某个集合的场景

快查型实现

每个元素都指向其根节点(相当于只有一层),所以查询很快(直接返回对应索引即可,O(1));合并时将所有根节点相同的都更新,所以更新时需要遍历元素(O(n))

class UnionFind(object):
    def __init__(self, l):
        self.uset = [i for i in range(l + 1)]
        
    def find(self, e):
        return self.uset[e]

    def union(self, e1, e2):
        v1 = self.find(e1)
        v2 = self.find(e2)
        if v1 == v2:
            return
        for i, e in enumerate(self.uset):
            if e == v1:
                self.uset[i] = v2

    def judge(self, e1, e2):
        return self.find(e1) == self.find(e2)

if __name__ == "__main__":
    uf = UnionFind(10)
    uf.union(1, 2)    
    uf.union(1, 3)    
    uf.union(1, 5)    
    uf.union(6, 7)    
    uf.union(6, 8)    
    uf.union(4, 9)    
    uf.union(4, 10)
    print(uf.judge(4, 9))
    print(uf.judge(10, 9))
    print(uf.judge(1, 9))
    uf.union(1, 9)
    print(uf.judge(1, 9))
    print(uf.judge(10, 9))
    print(uf.uset)
快合并型

查询时直接找到元素的根节点(父元素为自身);合并时找到两个元素的根节点进行判断,如果根节点不同则仅将根节点进行替换,所以合并复杂度基于查询复杂度

class UnionFind(object):
    def __init__(self, l):
        self.uset = [i for i in range(l + 1)]
        
    def find(self, e):
        """找到根节点"""
        while self.uset[e] != e:
            e = self.uset[e]
        return e

    def union(self, e1, e2):
        v1 = self.find(e1)
        v2 = self.find(e2)
        if v1 == v2:
            return
        self.uset[v2] = v1
        # 将根节点替换

    def judge(self, e1, e2):
        return self.find(e1) == self.find(e2)

if __name__ == "__main__":
    uf = UnionFind(10)
    uf.union(1, 2)
    uf.union(1, 3)    
    uf.union(1, 5)    
    uf.union(6, 7)    
    uf.union(6, 8)    
    uf.union(4, 9)    
    uf.union(4, 10)
    print(uf.judge(4, 9))
    print(uf.judge(10, 9))
    print(uf.judge(1, 9))
    uf.union(1, 9)
    print(uf.judge(1, 9))
    print(uf.judge(10, 9))
    print(uf.uset)

布隆过滤器

一个能够提供高效存储和查询的数据结构,其能够告诉你某个数据一定不存在或者可能存在

优缺点
优点
  • 优化了空间上的利用率
  • 能够保持高效的查询和添加效率
缺点
  • 不能保证结果一定准确(返回True未必存在,但是返回False一定不存在)
适合场景

海量数据,并且允许有一定的容错率,例如大批量分布式爬虫,如果使用hash进行存储,虽然时间复杂度为O(1)级别,但是hash表为了减少hash冲突,必然会有存储稀疏的特点,此时如果数据量过大,则可能导致内存不足的问题。而布隆过滤器则可以在一定程度上优化空间问题,通过设计一个没那么密集的数据结构来存储数据,并且保证了查询和添加的效率不会太低(取决于hash函数的数量以及hash函数的复杂度)

原理
存储方式

通过设计一个十分长的二进制字符,并假设有n个hash函数,那么对每个添加的数,都会算出几个hash函数对应的值,然后在二进制字符上将这些值的对应位置置为1

校验方式

检查是否存在时,就会检查这个添加的数的几个hash函数值位置是否都为1,是则存在,不是则不存在。由此可以得出:只要有一个位置不为1,肯定是不存在,但是全部为1也未必存在,因为可能别的算出来的hash函数值存在一样的,即hash冲突造成的位置为1

简单实现
import math
import random
from bitarray import bitarray
# 二进制数据类

class BloomFilter:
    def __init__(self, n, p):
        """
        @desription:初始化根据数据规模和容错率生成对应尺寸的布隆过滤器
        @params: 
            - n:数据规模
            - p:误判率(0~1)
        """
        if n <= 0 or not (0 < p < 1):
            raise Exception("请保证数据规模大于0,且误判率在0~1之间")
        self.size = int(-1*(n * math.log(p)) / (math.log(2) ** 2))
        # 布隆过滤器尺寸
        self.bit = bitarray(self.size)
        # 存放二进制数据的数组
        self.bit.setall(0)
        # 初始化全部位置置0
        self.hash_size = int(self.size / n * math.log(2))
        # hash函数个数
        self.hash_funs = self.get_hash_funs(self.hash_size)
        # 需要执行的hash函数
        print(f"初始化完成!(数据规模:{n},容错率:{p*100}%,尺寸:{self.size},hash函数个数:{self.hash_size})")

    def add(self, v):
        """
        @desription:添加一个元素
        """
        self.check(v)
        indexs = self.getIndex(v)
        for index in indexs:
            self.setOne(index)
        return indexs
    
    def contain(self, v):
        """
        @desription:判断一个元素是否存在,返回True代表可能存在,返回False代表一定不存在
        """
        self.check(v)
        indexs = self.getIndex(v)
        return all(map(lambda index: self.bit[index] == 1, indexs))

    def get_hash_funs(self, n):
        """
        @desription:返回n个hash函数
        """
        def hash_fun(n):
            def my_hash(value):
                s = str({"n": n, "value": value})
                return hash(s)
            return my_hash
        return [hash_fun(ni) for ni in range(n)]

    def check(self, v):
        """
        @desription:判断数据是否为空
        """
        if v == None:
            raise Exception("传入数据不能为空")
        
    def getIndex(self, v):
        """
        @desription:判断通过hash函数计算后的所有索引
        """
        return [hash_fun(v) % self.size for hash_fun in self.hash_funs]

    def setOne(self, index):
        """
        @desription:将对应位置值置1
        """
        self.bit[index] = 1

def count_acc(n, rate):
    """
    @desription:计算精确度,n为数据规模,rate为容错率
    """
    b = BloomFilter(n, rate)
    test_data = set()
    while len(test_data) < n * 2:
        test_data.add(random.randint(0, n * 100))
    test_data = list(test_data)
    for data in test_data[:n]:
        b.add(data)
    print(f"错误率:{sum([b.contain(data) for data in test_data[n:]]) / n * 100}%")

if __name__ == '__main__':
    count_acc(1000, 0.01)
    
# 结果:
# 初始化完成!(数据规模:1000,容错率:1.0%,尺寸:9585,hash函数个数:6)
# 错误率:0.8%

运行后会发现错误率基本在设定的值左右,如果结果偏差较大,可能是因为以下原因:

  • hash函数设计的不够好(要尽可能的达到每个hash函数算出的值都不冲突)
  • ...
其他
删除功能

布隆过滤器一般不提供删除功能,因为如果将指定位置的值置零,可能别的也有hash指向这里,就容易出问题。如果一定要实现,可以通过引用计数方式(不是通过0/1进行存储,而是每当有一个指向,则加1,删除时进行减1即可),但是这样每个位置都变成int型,其也就失去了空间小的优势了

一致性哈希

理论参考:https://www.jianshu.com/p/735a3d4789fc

class HashCircle:
    """哈希环"""
    def __init__(self, vnode_num=3):
        # 虚拟节点数量
        self.vnode_num = 3
        # 排序后的虚拟节点
        self.sort_vnodes = []
        # 虚拟节点与真实节点的映射
        self.vnode_map = {}

    def add(self, node):
        """添加一个节点"""
        self.check_node(node)
        for i in range(self.vnode_num):
            vnode = self.get_hash(str(node) + str(i))
            self.sort_vnodes.append(vnode)
            self.vnode_map[vnode] = node
        # 添加完成后需要对节点进行排序
        self.sort_vnodes.sort()

    def delete(self, node):
        """删除一个节点"""
        self.check_node(node)
        remove_vns = []
        for vn, n in self.vnode_map.items():
            if n is not node:
                continue
            if vn in self.sort_vnodes:
                self.sort_vnodes.remove(vn)
            remove_vns.append(vn)
        for vn in remove_vns:
            del self.vnode_map[vn]

    @staticmethod
    def check_node(node):
        """检查是否为节点类型"""
        assert isinstance(node, Node), "必须传入Node类或其子类节点!"

    @staticmethod
    def get_hash(key):
        """节点哈希运算"""
        return hash(key)

    def find_node(self, key):
        """寻找key所在节点"""
        if not self.sort_vnodes:
            return
        khash = self.get_hash(key)
        knode = self.sort_vnodes[0]
        for vn in self.sort_vnodes:
            if vn > khash:
                knode = vn
                break
        return self.vnode_map[knode]

    def get_key(self, key):
        """寻找指定节点,并取出对应值"""
        knode = self.find_node(key)
        if not knode:
            return
        print(f"查询key:{key},所在节点:{knode},value:{knode.get(key, None)}")
        return knode.get(key, None)

    def set_key(self, key, value):
        knode = self.find_node(key)
        if not knode:
            return
        knode.set(key, value)

class Node:
    """模拟一个查询节点"""
    def __init__(self, ip):
        self.ip = ip
        self.map = {}

    def set(self, key, value):
        self.map[key] = value

    def get(self, key, default=None):
        return self.map.get(key, default)
    
    def __str__(self):
        return str(self.ip)

def test():
    hc = HashCircle()
    # 创建三个节点
    node1 = Node("192.168.0.1")
    node2 = Node("192.168.0.2")
    node3 = Node("192.168.0.3")
    # 添加到哈希环中
    hc.add(node1)
    hc.add(node2)
    hc.add(node3)
    # 将数据添加进节点
    for i in range(10):
        hc.set_key(f"k{i}", i)
    # 查看结果
    for i in range(10):
        hc.get_key(f"k{i}")
    # 删除其中一个节点后查看结果
    print("------------------------------------------")
    hc.delete(node3)
    for i in range(10):
        hc.get_key(f"k{i}")

if __name__ == "__main__":
    test()

你可能感兴趣的:(数据结构示例(基于Python))