基于贪心思想的近似算法是目前常用的解决集合覆盖问题的算法,网上也有很多相关的实现代码。
除此之外,线性规划其实也能够解决集合覆盖问题,之所以不常用是因为算法效率相对贪心算法较低(具体原理这里不再解释),下面对比分析两个算法在解决集合覆盖问题上的性能。
生成集合
# 生成大小固定的集合X,利用python内置函数set能避免集合元素重复问题
X = set()
for n in range(1000):
X = random.sample(range(1, 10000), n)
print('集合X中元素个数:', len(X))
print('集合X:', X)
生成相应的子集族
# 生成子集族
"""
子集族生成规则:
先生成能够覆盖集合X中全部元素的子集族,
若要求子集族中子集的个数和集合X中元素个数相同,
则剩下的子集族随机生成。
具体生成规则可根据要求自己设计
"""
S0 = random.sample(X, 20)
n1 = random.randint(1, 20)
x1 = random.randint(1, n1)
# 从前面的已生成的子集中抽取x元素,从剩下未被抽中的集合中抽取n-x元素
S1 = (random.sample(set(S0), x1))+(random.sample(set(X)-set(S0), n1-x1))
Sub_set = [S0, S1]
for item in range(2, n):
S_item_len = random.randint(1, 20)
S_last_len = random.randint(1, S_item_len)
Sub_set_item = list(chain.from_iterable(Sub_set)) # 压平嵌套列表
if len(set(X) - set(Sub_set_item)) >= S_item_len-S_last_len:
# 当前循环生成的子集
S_now = (random.sample(set(Sub_set_item), S_last_len))+(random.sample(set(X)-set(Sub_set_item), S_item_len-S_last_len))
# 所有子集组成的子集族
Sub_set.append(S_now)
else:
Sub_set.append(list(set(X)-set(Sub_set_item)))
break
# print(set(X)-set(list(chain.from_iterable(Sub_set))))
# 检查集合中的元素是否已被子集族全覆盖
for j in range(n-len(Sub_set)):
select_num = random.randint(1, 20)
select_sub = random.sample(X, select_num)
Sub_set.append(select_sub)
print('子集族:', Sub_set)
# 求解线性规划问题
"""
# 目标函数:min CX -----此问题中C取值为1
# 约束条件:AX>=B
# 定义C---目标函数的系数
"""
C = [1]*n
A = [[0 for i in range(n)] for i in range(n)]
for i in range(len(X)):
for j in range(len(Sub_set)):
if X[i] in Sub_set[j]:
A[i][j] = A[i][j] + 1
B = [1]*n
# 定义X
X = [pulp.LpVariable(f'x{i}', lowBound=0, upBound=1) for i in range(n)]
# 确定最大化最小化问题,最大化只要把Min改成Max即可
m = pulp.LpProblem(sense=pulp.LpMinimize)
m += pulp.lpDot(C, X)
# 设置约束条件
for i in range(len(A)):
m += (pulp.lpDot(A[i], X) >= B[i])
m.solve() # 求解
object_result = pulp.value(m.objective)
result = [pulp.value(var) for var in X]
# 集合覆盖问题-线性规划算法
import time
import random
import pulp
from itertools import chain
import matplotlib.pyplot as plt
# 生成有限集X
X = set()
iter_ = [100, 200, 500] # 用于比较算法性能
time_cost = []
for n in iter_:
start_t = time.clock() # 计算程序运行时间
X = random.sample(range(1, 10000), n)
print('集合X中元素个数:', len(X))
print('集合X:', X)
# 生成子集
S0 = random.sample(X, 20)
n1 = random.randint(1, 20)
x1 = random.randint(1, n1)
# 从前面的子集中抽取x元素,从剩下的集合中抽取n-x元素
S1 = (random.sample(set(S0), x1))+(random.sample(set(X)-set(S0), n1-x1))
Sub_set = [S0, S1]
for item in range(2, n):
S_item_len = random.randint(1, 20)
S_last_len = random.randint(1, S_item_len)
Sub_set_item = list(chain.from_iterable(Sub_set)) # 压平嵌套列表
if len(set(X) - set(Sub_set_item)) >= S_item_len-S_last_len:
# 当前循环生成的子集
S_now = (random.sample(set(Sub_set_item), S_last_len))+(random.sample(set(X)-set(Sub_set_item), S_item_len-S_last_len))
# 所有子集组成的子集族
Sub_set.append(S_now)
else:
Sub_set.append(list(set(X)-set(Sub_set_item)))
break
# print(set(X)-set(list(chain.from_iterable(Sub_set))))
# 检查集合中的元素是否已被子集族全覆盖
for j in range(n-len(Sub_set)):
select_num = random.randint(1, 20)
select_sub = random.sample(X, select_num)
Sub_set.append(select_sub)
print('子集族:', Sub_set)
# 计算X中每个元素的频率并返回最大频率f-用于舍入法判断
all_f = [0]*n
for i in range(len(X)):
for j in range(len(Sub_set)):
if X[i] in Sub_set[j]:
all_f[i] = all_f[i]+1
f = max(all_f)
# 求解线性规划问题
# 目标函数:min CX -----此问题中C取值为1
# 约束条件:AX>=B
# 定义C---目标函数的系数
C = [1]*n
A = [[0 for i in range(n)] for i in range(n)]
for i in range(len(X)):
for j in range(len(Sub_set)):
if X[i] in Sub_set[j]:
A[i][j] = A[i][j] + 1
B = [1]*n
# 定义X
X = [pulp.LpVariable(f'x{i}', lowBound=0, upBound=1) for i in range(n)]
# 确定最大化最小化问题,最大化只要把Min改成Max即可
m = pulp.LpProblem(sense=pulp.LpMinimize)
m += pulp.lpDot(C, X)
# 设置约束条件
for i in range(len(A)):
m += (pulp.lpDot(A[i], X) >= B[i])
m.solve() # 求解
object_result = pulp.value(m.objective)
result = [pulp.value(var) for var in X]
# 舍入法保留最终结果
for item in range(len(X)):
if result[item] >= 1/f:
result[item] = 1
else:
result[item] = 0
final_set = []
for item in range(len(Sub_set)):
if result[item] == 1:
final_set.append(Sub_set[item])
print('可行解C:', final_set)
# 记录运行时间
end_t = time.clock()
time_iter = end_t-start_t
print("集合覆盖问题-基于动态规划运行时间", time_iter)
time_cost.append(time_iter)
print('\n')
# 结果可视化
plt.plot(iter_, time_cost)
plt.show()