先看个问题:一个旅行商需要去拜访一系列城市,这些城市之间的距离都不相同,怎么才能访问每个城市一次并回到起始城市最短回路?
这一问题叫旅行商问题(TSP问题),它是组合优化中的一个NP困难问题,当前有很多种解决算法,比如暴力搜索法、随机法或者模拟退火算法、遗传算法、蚁群算法等启发式算法。
今天来主要讲讲遗传算法是怎么解决此类问题的。
什么是遗传算法
遗传算法是一类受生物进化启发的一种随机搜索与优化的算法,它通过变异和交叉两种手段在当前已知的最好解来生成后续的解。
比如说蜜蜂采密,在亿万年的进化过程中,体内藏有一套高效的在不同花朵中寻找最短路劲的算法
遗传算法流程
遗传算法运行中会设置种群的规模、交叉率、变异率、保留胜出比例、进化迭代次数等参数,整体流程如下:
种群
种群是每代所产生的染色体总数,一个种群包含了该问题在这一代的一些解的集合
适应度评价
适应度评价是评估产生的解好坏的标准,如距离越短越好:f(s) = 1/dintance
选择
在种群中我们选择top N的优胜者进入下一轮迭代,一般使用按分数排序或者轮盘赌的方式来选择优胜者
交叉
交叉是将种群中两组优胜者的解分别拿出一部分,组合成为新的解
变异
变异是将种群中一组优胜者的解做一些随机变化,产生新的解
代码实现
# -*- encoding: utf8 -*-
import random
import math
import matplotlib.pyplot as plt
class GA:
def __init__(self, scale, num, max_gen, pc, elite):
# 种群规模
self.scale = scale
# 染色体数量(城市数量)
self.num = num
# 运行代数
self.max_gen = max_gen
# 交叉率
self.pc = pc
# 每代多少胜出者
self.elite = elite
def crossover(self, tour1, tour2, data):
# 交叉
c1 = random.randint(0, len(tour1['tour']) - 1)
c2 = random.randint(0, len(tour1['tour']) - 1)
while c1 == c2:
c2 = random.randint(0, len(tour1['tour']) - 1)
if c1 > c2:
c1, c2 = c2, c1
tour_part = tour1['tour'][c1:c2]
new_tour = []
index = 0
for i in range(len(tour2['tour'])):
if index == c1:
for item in tour_part:
new_tour.append(item)
if tour2['tour'][i] not in tour_part:
new_tour.append(tour2['tour'][i])
index += 1
return self.get_tour_detail(new_tour, data)
def mutate(self, tour, data):
# 变异
c1 = random.randint(0, len(tour['tour']) - 1)
c2 = random.randint(0, len(tour['tour']) - 1)
while c1 == c2:
c2 = random.randint(0, len(tour['tour']) - 1)
new_tour = []
for i in range(len(tour['tour'])):
if i == c1:
new_tour.append(tour['tour'][c2])
elif i == c2:
new_tour.append(tour['tour'][c1])
else:
new_tour.append(tour['tour'][i])
return self.get_tour_detail(new_tour, data)
def sort(self, tours):
# 排序
tours_sort = sorted(tours, key=lambda x: x['score'], reverse=True)
return tours_sort
def get_distance(self, c1, c2):
# 获取距离
xd = abs(c1['x'] - c2['x'])
yd = abs(c1['y'] - c2['y'])
distance = math.sqrt(xd * xd + yd * yd)
return distance
def get_score(self, distance):
# 适应性函数 距离越小适应值越大
return 1 / (distance + 1)
def get_tour(self, data):
# 随机获得一条路径
tour = []
for key, value in data.items():
tour.append(key)
random.shuffle(tour)
return self.get_tour_detail(tour, data)
def get_tour_detail(self, tour, data):
tmp = None
distance = 0
for item in tour:
if tmp is not None:
distance_tmp = self.get_distance(data[tmp], data[item])
distance += distance_tmp
tmp = item
return {'tour': tour, 'distance': distance, 'score': self.get_score(distance)}
def initialise(self, data):
# 初始种群
tours = []
for i in range(self.scale):
tours.append(self.get_tour(data))
return tours
def get_fittest(self, tours):
# 获取当前种群中最优个体
tmp = None
for item in tours:
if tmp is None or item['score'] > tmp['score']:
tmp = item
return tmp
def run(self, data):
pop = self.initialise(data)
old_fittest = self.get_fittest(pop)
topelite = int(self.scale * self.elite)
same_num = 0
max_same_num = 30
max_score = 0
for i in range(self.max_gen):
ranked = self.sort(pop)
pop = ranked[0:topelite]
fittest = self.get_fittest(pop)
if fittest['score'] > max_score:
same_num = 0
max_score = fittest['score']
else:
same_num += 1
# 最大分数保持n次一直没有变化则退出
if same_num > max_same_num:
break
while len(pop) < self.scale:
if random.random() < self.pc:
c1 = random.randint(0, topelite)
c2 = random.randint(0, topelite)
while c1 == c2:
c2 = random.randint(0, topelite)
tour = self.crossover(ranked[c1], ranked[c2], data)
else:
c = random.randint(0, topelite - 1)
tour = self.mutate(ranked[c], data)
pop.append(tour)
print 'total gen:', i
print 'before fittest:'
print old_fittest
print 'after fittest:'
new_fittest = self.get_fittest(pop)
print new_fittest
return new_fittest
def init_data(num):
data = {}
def get_random_point():
# 随机生成坐标
return {
'x': random.randint(1, 99),
'y': random.randint(1, 99)
}
for i in range(num):
data[i + 1] = get_random_point()
return data
def show(citys, tour):
# 绘图
plt.bar(left=0, height=100, width=100, color=(0, 0, 0, 0), edgecolor=(0, 0, 0, 0))
plt.title(u'tsp')
plt.xlabel('total distance: %s m' % tour['distance'])
x = []
y = []
i = 0
for item in tour['tour']:
city = citys[item]
x.append(city['x'])
y.append(city['y'])
i += 1
if i == 1:
plt.plot(city['x'], city['y'], 'or')
else:
plt.plot(city['x'], city['y'], 'bo')
plt.plot(x, y, 'g')
plt.xlim(0.0, 100)
plt.ylim(0.0, 100)
plt.show()
if __name__ == '__main__':
scale, num, max_gen, pc, elite = 50, 10, 1000, 0.8, 0.2
data = init_data(num)
ga = GA(scale, num, max_gen, pc, elite)
new_fittest = ga.run(data)
show(data, new_fittest)
运行效果:
遗传算法的优缺点
优点:
- 不盲目穷举, 是启发式搜索
- 与问题的领域无关,可快速切换
- 可并行计算一组种群
缺点:
- 计算量大, 不适合纬度高的问题
- 算法稳定性差