蓄水池抽样是一系列的随机算法,其目的在于从包含 n n n个项目的集合 S S S中选取 k k k个样本,其中 n n n为一很大或未知的数量,尤其适用于不能把所有 n n n个项目都存放到内存的情况。
n | P(i|n=1) | P(i|n=2) | P(i|n=3) | P(i|n) |
---|---|---|---|---|
1 | 1 | 1 × ( 1 − 1 2 ) = 1 2 1\times (1-\frac{1}{2})=\frac{1}{2} 1×(1−21)=21 | 1 2 × ( 1 − 1 3 ) = 1 3 \frac{1}{2}\times (1-\frac{1}{3})=\frac{1}{3} 21×(1−31)=31 | 1 n − 1 × ( 1 − 1 n ) = 1 n \frac{1}{n-1}\times (1-\frac{1}{n})=\frac{1}{n} n−11×(1−n1)=n1 |
2 | 1 2 \frac{1}{2} 21 | 1 2 × ( 1 − 1 3 ) = 1 3 \frac{1}{2}\times (1-\frac{1}{3})=\frac{1}{3} 21×(1−31)=31 | 1 n − 1 × ( 1 − 1 n ) = 1 n \frac{1}{n-1}\times (1-\frac{1}{n})=\frac{1}{n} n−11×(1−n1)=n1 | |
3 | 1 3 \frac{1}{3} 31 | 1 n − 1 × ( 1 − 1 n ) = 1 n \frac{1}{n-1}\times (1-\frac{1}{n})=\frac{1}{n} n−11×(1−n1)=n1 | ||
n | 1 n \frac{1}{n} n1 |
import random
n = 10
for time in range(1, 10):
res = 0
for i in range(1, n + 1):
r = random.random()
if r < 1/i:
res = i
print("第 {} 次:{}".format(time, res))
数学归纳法
P ( i = 1 ∣ n ) = k n − 1 × ( 1 − k n × 1 k ) = k n P(i=1|n) = \frac{k}{n-1} \times (1-\frac{k}{n}\times\frac{1}{k})=\frac{k}{n} P(i=1∣n)=n−1k×(1−nk×k1)=nk
import random
n = 10
k = 3
arr = []
for i in range(1, n + 1):
if i <= k:
arr.append(i)
else:
j = random.randint(1, i) # 和range不同,这里包括i
if j <= k:
arr[j - 1] = i
print(arr)
注意:不可能生成两个相同的数
# Definition for singly-linked list.
# class ListNode:
# def __init__(self, val=0, next=None):
# self.val = val
# self.next = next
class Solution:
def __init__(self, head: Optional[ListNode]):
self.head = head
def getRandom(self) -> int:
node = self.head
i = 0
ans = None
while node != None:
i += 1
r = random.random()
if r <= 1/i:
ans = node.val
node = node.next
return ans
class Solution(object):
def __init__(self, nums):
self.nums = nums
self.n = len(nums)
def pick(self, target):
ans = -1
j = 0.0
for i in range(self.n):
if self.nums[i] == target:
j += 1
r = random.random()
if r <= 1/j:
ans = i
return ans
先选了矩形,没有考虑到面积等概率
class Solution(object):
def __init__(self, rects):
self.rects = rects
self.n = len(rects)
def pick(self):
# 随机选择矩形
idx = -1
for i in range(self.n):
r = random.randint(1, i + 1)
if r <= 1:
idx = i
a, b, x, y = self.rects[idx]
ansX = 0
for i in range(a, x + 1):
r = random.randint(1, i - a + 1)
if r <= 1:
ansX = i
ansY = 0
for i in range(b, y + 1):
r = random.randint(1, i - b + 1)
if r <= 1:
ansY = i
return [ansX, ansY]
class Solution(object):
def __init__(self, rects):
self.rects = rects
self.n = len(rects)
def pick(self):
# 随机选择矩形,矩阵的权重为矩形的面积
idx = -1
for i in range(self.n):
r = random.randint(1, i + 1)
if r <= 1:
idx = i
a, b, x, y = self.rects[idx]
ansX = 0
for i in range(a, x + 1):
r = random.randint(1, i - a + 1)
if r <= 1:
ansX = i
ansY = 0
for i in range(b, y + 1):
r = random.random(1, i - b + 1)
if r <= 1:
ansY = i
return [ansX, ansY]
class Solution(object):
def __init__(self, m, n):
self.m = m
self.n = n
self.used = set()
self.d = defaultdict(int)
def flip(self):
ansM = -1
ansN = -1
count = 0
for i in range(self.m):
remain = self.n - self.d[i]
if remain == 0:
continue
count += remain
r = random.randint(1, count)
if r <= remain:
ansM = i
cnt = 0
for j in range(self.n):
if (ansM, j) not in self.used:
cnt += 1
r = random.randint(1, cnt)
if r <= 1:
ansN = j
self.d[ansM] += 1
self.used.add((ansM, ansN))
return [ansM, ansN]
def reset(self):
self.used.clear()
self.d.clear()
class Solution(object):
def __init__(self, m, n):
self.m = m
self.n = n
self.total = self.m * self.n
self.Map = {}
def flip(self):
x = random.randint(0, self.total - 1)
self.total -= 1
if x not in self.Map:
idx = x
else:
idx = self.Map[x]
if self.total not in self.Map:
self.Map[x] = self.total
else:
self.Map[x] = self.Map[self.total]
return [idx // self.n, idx % self.n]
def reset(self):
self.total = self.m * self.n
self.Map.clear()