(1)Bootstrapping:训练随机森林,对于每一个树就是采样booststraping方法采样,也算是随机森林的其中一个随机性表现。再比如bagging方法,也是基于该有放回重采样方法。
(2)数据扩充:数据降采样
(3)Focal Loss:对不同样本预测错误的惩罚权重不同
主动学习调研(PS:下次要侧重主动学习在目标检测中的应用,图像分类的太简单了!)
def IOU(x1,y1,X1,Y1, x2,y2,X2,Y2):
xx = max(x1,x2)
XX = min(X1,X2)
yy = max(y1,y2)
YY = min(Y1,Y2)
m = max(0., XX-xx)
n = max(0., YY-yy)
Jiao = m*n
Bing = (X1-x1)*(Y1-y1)+(X2-x2)*(Y2-y2)-Jiao
return Jiao/Bing
知乎:IOU, NMS原理与代码实现
import numpy as np
def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0] # pred bbox top_x
y1 = dets[:, 1] # pred bbox top_y
x2 = dets[:, 2] # pred bbox bottom_x
y2 = dets[:, 3] # pred bbox bottom_y
scores = dets[:, 4] # pred bbox cls score
areas = (x2 - x1 + 1) * (y2 - y1 + 1) # pred bbox areas
order = scores.argsort()[::-1] # 对pred bbox按score做降序排序,对应step-2
keep = [] # NMS后,保留的pred bbox
while order.size > 0:
i = order[0] # top-1 score bbox
keep.append(i) # top-1 score的话,自然就保留了
xx1 = np.maximum(x1[i], x1[order[1:]]) # top-1 bbox(score最大)与order中剩余bbox计算NMS
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter) # 无处不在的IoU计算~~~
inds = np.where(ovr <= thresh)[0] # 这个操作可以对代码断点调试理解下,结合step-3,我们希望剔除所有与当前top-1 bbox IoU > thresh的冗余bbox,那么保留下来的bbox,自然就是ovr <= thresh的非冗余bbox,其inds保留下来,作进一步筛选
order = order[inds + 1] # 保留有效bbox,就是这轮NMS未被抑制掉的幸运儿,为什么 + 1?因为ind = 0就是这轮NMS的top-1,剩余有效bbox在IoU计算中与top-1做的计算,inds对应回原数组,自然要做 +1 的映射,接下来就是step-4的循环
return keep # 最终NMS结果返回
if __name__ == '__main__':
dets = np.array([[100,120,170,200,0.98],
[20,40,80,90,0.99],
[20,38,82,88,0.96],
[200,380,282,488,0.9],
[19,38,75,91, 0.8]])
print(py_cpu_nms(dets, 0.5))
最大连续子数组乘积
二叉树
解释: 知乎:Python实现堆排序及原理详解 + TopK面试题(多图解释)
求第k个最大值:215. 数组中的第K个最大元素,解答: 三种方法解决TopK问题
方法1:快排
#只排序后k个大的数
#获得前n-k小的数O(n),进行快排O(klogk)
#快速排序
#只排序后k个大的数
#获得前n-k小的数O(n),进行快排O(klogk)
def partition(nums, left, right):
pivot = nums[left]#初始化一个待比较数据
i,j = left, right
while(i < j):
while(i<j and nums[j]>=pivot): #从后往前查找,直到找到一个比pivot更小的数
j-=1
nums[i] = nums[j] #将更小的数放入左边
while(i<j and nums[i]<=pivot): #从前往后找,直到找到一个比pivot更大的数
i+=1
nums[j] = nums[i] #将更大的数放入右边
#循环结束,i与j相等
nums[i] = pivot #待比较数据放入最终位置
return i #返回待比较数据最终位置
#快速排序
def quicksort(nums, left, right):
if left < right:
index = partition(nums, left, right)
quicksort(nums, left, index-1)
quicksort(nums, index+1, right)
arr = [1,3,2,2,0]
quicksort(arr, 0, len(arr)-1)
print(arr)
def topk_split(nums, k, left, right):
#寻找到第k个数停止递归,使得nums数组中index左边是前k个小的数,index右边是后面n-k个大的数
if (left<right):
index = partition(nums, left, right)
if index==k:
return
elif index < k:
topk_split(nums, k, index+1, right)
else:
topk_split(nums, k, left, index-1)
def topk_sort_right(nums, k):
topk_split(nums, len(nums)-k, 0, len(nums)-1)
topk = nums[len(nums)-k:]
quicksort(topk, 0, len(topk)-1)
return topk #只排序后k个数字
arr = [0,0,1,3,4,5,0,-7,6,7]
k = 4
l = len(arr)
print(topk_sort_right(arr, k))
方法2:最小堆
# !/usr/bin/env python
# -*- coding:gbk -*-
import sys
import heapq
class TopKHeap(object):
def __init__(self, k):
self.k = k
self.data = []
def push(self, elem):
if len(self.data) < self.k:
heapq.heappush(self.data, elem)
else:
topk_small = self.data[0]
if elem > topk_small:
heapq.heapreplace(self.data, elem)
def topk(self):
return [x for x in reversed([heapq.heappop(self.data) for x in range(len(self.data))])]
def main():
list_num = [1, 7, 3, 5, 4, 6, 9, 8, 2]
th = TopKHeap(5)
for i in list_num:
th.push(i)
print(th.topk())
if __name__ == "__main__":
main()
https://leetcode-cn.com/problems/split-array-largest-sum/
二分+贪心
class Solution:
def splitArray(self, nums: List[int], m: int) -> int:
def check(x: int) -> bool:
total, cnt = 0, 1
for num in nums:
if total + num > x:
cnt += 1
total = num
else:
total += num
return cnt <= m
left = max(nums)
right = sum(nums)
while left < right:
mid = (left + right) // 2
if check(mid):
right = mid
else:
left = mid + 1
return left