    The implementation of data structure 'heap'(max and min) through "priority queue".
    Reference: https://www.liwei.party/2019/01/10/algorithms-and-data-structures/priority-queue/
class MaxHeap:
    def __init__(self, capability):
        Initiate the queue.
        Note: because the heap behaves like a tree, we use a list to "record" this tree. In addition, the first
        element starts with index 1, not 0.
        # define how many elements it can contain
        self.capability = capability
        # define the list to store data (with pre-defined space). '+1': because index starts with 1
        self.data = [None for _ in range(capability + 1)]
        # define the number of elemente
        self._count = 0

    def get_size(self) -> int:
        get the size of the heap
        return self._count

    def set_size(self, size):
        set the size of the heap
        self._count = size

    def is_empty(self) -> bool:
        determine whether it is empty
        return self._count == 0

    def insert(self, num):
        insert new data to heap
        :param num: the new element
        if self._count == self.capability:
            raise Exception("Heap reaches the limitation.")

        # insert the element to the tail first
        self._count += 1
        self.data[self._count] = num

        # see if it can be moved up

    def shift_up(self, itemPos):
        'swim' the element to higher place if it is larger than others
        :param numPos: the index of the element
            target = self.data[itemPos]

            # father: itemPos // 2
            while itemPos > 1 and self.data[itemPos // 2] < target:
                self.data[itemPos] = self.data[itemPos // 2]
                itemPos //= 2
            self.data[itemPos] = target

        except TypeError:

    def shift_down(self, itemPos):
        'sink' the element to lower place if it is smaller than others
        method: as long as the 'tree' has children, it will keep replacing.
        :param numPos: the index of the element
            target = self.data[itemPos]

            # children (left): itemPos * 2
            # children (right): itemPos * 2 + 1
            while itemPos * 2 <= self._count:
                children = itemPos * 2
                # attention to the next half: left children and right children needs to be compared
                if children + 1 <= self._count and self.data[children + 1] > self.data[children]:
                    children = children + 1

                # attention: check the false condition first. If the children is smaller than target,
                # no need to keep looping (cannot write to another format —— 'itemPos' will keep changing)
                if self.data[children] <= target:

                self.data[itemPos] = self.data[children]
                itemPos = children

            self.data[itemPos] = target
        except TypeError:

    def extract_max(self):
        get the maximum element at the top
        :return: the maximum value of the heap
        if self.is_empty():
            raise Exception("Empty heap.")

        maxVal = self.data[1]

        # use the last element to replace the empty position (natural way)
        self.data[1], self.data[self._count] = self.data[self._count], self.data[1]

        self.data[self._count] = None
        self._count -= 1

        return maxVal

# similar to MaxHeap
class MinHeap:
    def __init__(self, capability):
        self.capability = capability
        self.data = [None for _ in range(capability + 1)]
        self._count = 0

    def get_size(self):
        return self._count

    def set_size(self, size):
        set the size of the heap
        self._count = size

    def is_empty(self):
        return self._count == 0

    def insert(self, item):
        # add to the tail
        self._count += 1
        self.data[self._count] = item

        # swim

    def shift_up(self, itemPos):
            target = self.data[itemPos]

            # father: itemPos // 2
            while itemPos > 1 and self.data[itemPos // 2] > target:
                self.data[itemPos] = self.data[itemPos // 2]
                itemPos //= 2
            self.data[itemPos] = target

        except TypeError:

    def extract_min(self):
        if self.is_empty():
            raise Exception("Empty heap.")

        minVal = self.data[1]

        # replace with the tail item
        self.data[1] = self.data[self._count]
        self.data[self._count] = None
        self._count -= 1

        # sink

        return minVal

    def shift_down(self, itemPos):
            target = self.data[itemPos]

            # children (left): itemPos * 2
            # children (right): itemPos * 2 + 1
            while itemPos * 2 <= self._count:
                children = itemPos * 2
                # attention to the next half: left children and right children needs to be compared
                if children + 1 <= self._count and self.data[children + 1] < self.data[children]:
                    children = children + 1

                # attention: check the false condition first. If the children is larger than target,
                # no need to keep looping (cannot write to another format —— 'itemPos' will keep changing)
                if self.data[children] >= target:

                self.data[itemPos] = self.data[children]
                itemPos = children

            self.data[itemPos] = target
        except TypeError:


今天的笔记包含基于双堆(Two Heaps)类型下的3个题目,它们在leetcode上的编号和题名分别是:

  • 295 - Find Median from Data Stream
  • 480 - Sliding Window Median
  • 502 - IPO

下面将根据以上顺序分别记录代码和对应心得,使用的编译器为Pycharm (Python3)。

Find Median from Data Stream

Median is the middle value in an ordered integer list. If the size of the list is even, there is no middle value. So the median is the mean of the two middle value.

For example,
[2,3,4], the median is 3
[2,3], the median is (2 + 3) / 2 = 2.5

Design a data structure that supports the following two operations:
1. void addNum(int num) - Add a integer number from the data stream to the data structure.
2. double findMedian() - Return the median of all elements so far.


findMedian() -> 1.5
findMedian() -> 2




  1. 大顶堆的顶层元素值 <= 小顶堆的顶层元素值;
  2. 大顶堆的元素个数 = 小顶堆元素个数 or(小顶堆元素个数+1)。 



from Data_Structure.Heap import MaxHeap, MinHeap

class MedianFinder:
    # correct solution: 双堆(最大堆和最小堆)。
    def __init__(self):
        initialize your data structure here.
        self.maxHeap = MaxHeap(10)
        self.minHeap = MinHeap(10)

    def addNum(self, num: int) -> None:
        # insert to max heap and extract the largest number to min heap

        # check the balance
        if self.minHeap.get_size() > self.maxHeap.get_size():

    def findMedian(self) -> float:
        # odd or even
        if self.minHeap.get_size() == self.maxHeap.get_size():
            # get the first item directly
            return (self.maxHeap.data[1] + self.minHeap.data[1]) / 2
            return self.maxHeap.data[1]

Sliding Window Median

Median is the middle value in an ordered integer list. If the size of the list is even, there is no middle value. So the median is the mean of the two middle value.

[2,3,4] , the median is 3
[2,3], the median is (2 + 3) / 2 = 2.5

Given an array nums, there is a sliding window of size k which is moving from the very left of the array to the very right. You can only see the k numbers in the window. Each time the sliding window moves right by one position. Your job is to output the median array for each window in the original array.

For example,
Given nums = [1,3,-1,-3,5,3,6,7], and k = 3.

Window position                Median
---------------               -----
[1  3  -1] -3  5  3  6  7       1
 1 [3  -1  -3] 5  3  6  7       -1
 1  3 [-1  -3  5] 3  6  7       -1
 1  3  -1 [-3  5  3] 6  7       3
 1  3  -1  -3 [5  3  6] 7       5
 1  3  -1  -3  5 [3  6  7]      6
Therefore, return the median sliding window as [1,-1,-1,3,5,6].

You may assume k is always valid, ie: k is always smaller than input array's size for non-empty array.
Answers within 10^-5 of the actual value will be accepted as correct.





import heapq
from heapq import *

class Solution:
    def __init__(self):
        # parameters
        self.ans = []
        self.maxHeap = []
        self.minHeap = []

    def medianSlidingWindow(self, nums: list, k: int) -> list:
        # official solution: 使用Python自带的'heapq'包。
        # traverse
        for i in range(len(nums)):
            # push the element to maxHeap/minHeap
            if not self.maxHeap or -self.maxHeap[0] >= nums[i]:
                # after push, automatically sorted
                heappush(self.maxHeap, -nums[i])
                heappush(self.minHeap, nums[i])

            # balance two heaps

            # check the window
            if i + 1 - k >= 0:
                # get current median

                # remove the first element in the window (it will move outside the window at next round)
                # remember to reverse any number in maxHeap when comparing or searching
                removeNum = nums[i + 1 - k]
                # essential!!!!!!!!!! There is a 'equal' symbol for comparison
                if removeNum <= -self.maxHeap[0]:
                    self.delete(self.maxHeap, -removeNum)
                    self.delete(self.minHeap, removeNum)

                # balance again because the number of elements has declined

        return self.ans

    def getMedian(self):
        # even
        if len(self.maxHeap) == len(self.minHeap):
            return -self.maxHeap[0] / 2 + self.minHeap[0] / 2
        # odd
            return -self.maxHeap[0]

    def delete(self, heap, num):
        # change the delete number to last element in heap and remove the last element
        index = heap.index(num)
        heap[index] = heap[-1]
        del heap[-1]

        # attention: sift up and down
        if index < len(heap):
            heapq._siftup(heap, index)
            heapq._siftdown(heap, 0, index)

    def balance(self):
            1. the top element in maxHeap is always smaller or equal to any elements in minHeap
            2. the number of elements in maxHeap is equal or only gets one more element than that in minHeap
        :return: None
        if len(self.minHeap) > len(self.maxHeap):
            # note: 'heappush' will automatically sort the list after inserting any numbers
            # same as 'heappop'
            heappush(self.maxHeap, -heappop(self.minHeap))
        elif len(self.maxHeap) > len(self.minHeap) + 1:
            heappush(self.minHeap, -heappop(self.maxHeap))


Suppose LeetCode will start its IPO soon. In order to sell a good price of its shares to Venture Capital, LeetCode would like to work on some projects to increase its capital before the IPO. Since it has limited resources, it can only finish at most k distinct projects before the IPO. Help LeetCode design the best way to maximize its total capital after finishing at most k distinct projects.

You are given several projects. For each project i, it has a pure profit Pi and a minimum capital of Ci is needed to start the corresponding project. Initially, you have W capital. When you finish a project, you will obtain its pure profit and the profit will be added to your total capital.

To sum up, pick a list of at most k distinct projects from given projects to maximize your final capital, and output your final maximized capital.

Example 1:

Input: k=2, W=0, Profits=[1,2,3], Capital=[0,1,1].

Output: 4

Explanation: Since your initial capital is 0, you can only start the project indexed 0.
             After finishing it you will obtain profit 1 and your capital becomes 1.
             With capital 1, you can either start the project indexed 1 or the project indexed 2.
             Since you can choose at most 2 projects, you need to finish the project indexed 2 to get the maximum capital.
             Therefore, output the final maximized capital, which is 0 + 1 + 3 = 4.

You may assume all numbers in the input are non-negative integers.
The length of Profits array and Capital array will not exceed 50,000.
The answer is guaranteed to fit in a 32-bit signed integer.



但有一点得注意,这里的"成本"与"利润"是绑定在一起的,因此在构建堆时一开始得将它们统一带入。在Python中,有一个叫做'zip()'的内置函数,可以绑定两个不同列表,将其元素一一对应结合为元组,然后以列表的形式返回(参考:Python3 zip() 函数)。 

知道了这个,我们就可以一开始将"成本"与"利润"用zip绑定起来,统一放入小顶堆自主排序(元组的排序中,首先比较第一个元素谁小,如果相等就比较第二个元素,以此类推),进入循环;接着,在k次循环里,根据我们现有的资金W判断当前有哪些项目可以做,并把当前能做的所有项目pop出来,将其利润单独push进大顶堆中然后,根据大顶堆的元素,pop出利润最高的项目(最大值),并与我们最终需要返回的最大资金(final maximal capital)变量累加;最后,一直循环直到我们的精力不够(k=0)或者没项目可做(大顶堆没元素)了为止。

from heapq import *

class Solution:
    def __init__(self):
        # parameters
        self.maxCapital = 0
        self.minHeap = []
        self.maxHeap = []

    def findMaximizedCapital(self, k: int, W: int, Profits: list, Capital: list) -> int:
        # solution: two heaps. 堆排序+限定条件下提取最值操作。

        # bind the profits and corresponding capital, then sort them to minHeap
        # note: when sorting tuples, 'heapify' will take the first element of a tuple into account
        self.minHeap = list(zip(Capital, Profits))
        self.maxCapital = W

        curProject = (0, 0)
        curMaxProfit = 0
        # Process projects
        while k > 0:
            # get every project that we can currently carry out (some projects have same capital but different profits)
            while self.minHeap and W >= self.minHeap[0][0]:
                curProject = heappop(self.minHeap)
                heappush(self.maxHeap, -curProject[1])

            # add project profit to W
            W += curProject[1]

            # choose the biggest profit
            if self.maxHeap:
                curMaxProfit = -heappop(self.maxHeap)
                self.maxCapital += curMaxProfit
            # if there is no more projects we can do (k is too large and there are no more potential projects)

            k -= 1

        return self.maxCapital

solution = Solution()
print(solution.findMaximizedCapital(11, 11, [1, 2, 3], [11, 12, 13]))




  1. 这种模式在优先队列计划安排问题(Scheduling)中有奇效;
  2. 如果问题让你找一组数中的最大/最小/中位数;
  3. 有时候,这种模式在涉及到二叉树数据结构时很有用(e.g. 完全二叉树)。

这里多一句嘴,以上代码的" from ** import ** " 其实并不是规范写法,应该用“import *** ”。以后的练习中我会注意代码规范的。



