要求:在 [ − 1 , 1 ] [-1,1] [−1,1]范围内拟合函数 y = x 4 + x 3 + x 2 + x y=x^4+x^3+x^2+x y=x4+x3+x2+x
代码如下:
import math
import random
import operator
from deap import creator,base,tools,gp
import numpy
from matplotlib import pyplot as plt
def protectedDiv(left,right):
if right == 0:
return 1
else:
return left / right
# 定义 primitive set
pset = gp.PrimitiveSet('main',1)
pset.renameArguments(ARG0 = 'x')
pset.addPrimitive(operator.add,2)
pset.addPrimitive(operator.sub,2)
pset.addPrimitive(operator.mul,2)
pset.addPrimitive(protectedDiv,2)
pset.addPrimitive(math.sin,1)
pset.addPrimitive(math.cos,1)
pset.addTerminal(5)
pset.addEphemeralConstant('a',lambda:random.randint(-2,2))
# 创建fitness类、individual类
creator.create('FitnessMin',base.Fitness,weights = (-1.0,))
creator.create('Individual',gp.PrimitiveTree,fitness = creator.FitnessMin)
# 定义个体的生成方法、种群的生成方法
tool = base.Toolbox()
tool.register('expr',gp.genHalfAndHalf,pset = pset,min_ = 1,max_ = 4)
tool.register('individual',tools.initIterate,creator.Individual,tool.expr)
tool.register('population',tools.initRepeat,list,tool.individual)
tool.register('compile',gp.compile, pset = pset)
# 定义评价函数
def fit_evaluation(individual,points):
func = tool.compile(expr = individual) # pset上面已经给过了
sqerrors = ((func(x) - x**4 - x**3 - x**2 - x)**2 for x in points)
return math.fsum(sqerrors) / len(points), # 必须返回一个tuple
# 定义evaluate、select、mate、mutate(这几个名字必须这样取,否则出错)
tool.register('evaluate',fit_evaluation,points = [x/10. for x in range(-10,10)])
tool.register('select',tools.selTournament,tournsize = 3)
tool.register('mate',gp.cxOnePoint)
# 单点交叉 会产生两棵树(a field guide里说一般只用一棵树)
tool.register('expr_mut',gp.genFull,pset=pset,min_ = 0,max_ = 2) # 生成一个subtree
tool.register('mutate',gp.mutUniform,expr=tool.expr_mut,pset=pset) # subtree mutation
# 限制一下交叉变异后的树深度,根据Koza的论文,最大17
tool.decorate('mate',gp.staticLimit(key=operator.attrgetter('height'),max_value=13))
tool.decorate('mutate',gp.staticLimit(key=operator.attrgetter('height'),max_value=13))
def main():
# 参数设置
cxpb = 0.4 #交叉概率
mutpb = 0.2 #变异概率
ngen = 100 #迭代次数
popSize = 300 #种群规模
# 生成初始种群
pop = tool.population(n = popSize)
fitnesses = list(map(tool.evaluate,pop)) # 求初始种群的每个个体的适应度值,是一个list
for ind,fit in zip(pop,fitnesses):
ind.fitness.values = fit # 给每个ind的fitness赋值
best_inds = [] # 记录每一代的最优个体
hof = tools.HallOfFame(1)
hof.update(pop)
best_ind = hof.items[0]
best_inds.append(best_ind)
stats = tools.Statistics(lambda ind:ind.fitness.values)
stats.register('avg',numpy.mean,axis = 0)
# axis = 0表示数组的列求平均;axis = 1 表示数组的行求平均
stats.register('std',numpy.std,axis = 0)
stats.register('min',numpy.min,axis = 0)
stats.register('max',numpy.max,axis = 0)
# record是一个字典:record{‘avg’:[],'std':[],...},每个value是一个list
record = stats.compile(pop)
for key in record:
record[key] = record[key][0]
logbook = tools.Logbook()
logbook.record(gen = 0,eval = popSize,best_ind = best_ind,**record)
logbook.header = 'gen','min','max','avg','std','eval','best_ind'
# print(logbook)
print('--------开始迭代--------')
for g in range(ngen):
# select 选出popSize个个体进入下一代
print('第',g+1,'次迭代')
offSpring = tool.select(pop,len(pop))
offSpring = list(map(tool.clone,offSpring))
# crossover 依次两两交叉
for child1,child2 in zip(offSpring[1::2],offSpring[::2]):
if random.random() < cxpb:
tool.mate(child1,child2) # 交叉产生两个新的个体
del child1.fitness.values # 新的个体需要重新evaluate,所以删掉原来的fitness value
del child2.fitness.values
# mutate
for child in offSpring: # 每个个体都有概率进行变异
if random.random() < mutpb:
tool.mutate(child)
del child.fitness.values
# evaluate
invalid_fit = [] # 找到需要重新计算fitness的个体
for ind in offSpring:
if not ind.fitness.valid:
invalid_fit.append(ind)
fitnesses = list(map(tool.evaluate,invalid_fit))
for ind,fit in zip(invalid_fit,fitnesses):
ind.fitness.values = fit
pop = offSpring # 更新种群
hof.update(pop) # 找到本代的最优个体
best_ind = hof.items[0]
best_inds.append(best_ind)
record = stats.compile(pop) # 数据统计
for key in record:
record[key] = record[key][0]
logbook.record(gen = g+1,eval = len(invalid_fit),best_ind = best_ind,**record)
print('--------迭代结束-------')
print(logbook)
# 画出实际函数和优化的函数
points = [x/10. for x in range(-10,10)]
y = [x**4+x**3+x**2+x for x in points]
func = tool.compile(expr = hof.items[0])
z = [func(x) for x in points]
plt.plot(points,y,label = 'target function')
plt.plot(points,z,'--',label = 'envolved function')
plt.legend(loc = 'best')
plt.show()
在收敛曲线图中,横坐标是迭代的代数,纵坐标是每一代的最优个体的适应度值。这个示例比较简单,可以看出GP的收敛速度很快,在40代左右就能找到较好的解。
从图中可以看出,拟合的曲线在 [ − 1 , 1 ] [-1,1] [−1,1]几乎完全与目标函数的曲线重合了。
上图是最优个体对应的函数的示意图。
附上画图的代码:
import pygraphviz as pgv
def drawBestInd(expr): # 输入参数是一个表达式(expr/tree),这个例子中就是individual
nodes,edges,labels = gp.graph(expr)
g = pgv.AGraph()
g.add_nodes_from(nodes)
g.add_edges_from(edges)
g.layout(prog="dot")
for i in nodes:
n = g.get_node(i)
n.attr["label"] = labels[i]
g.draw("tree.pdf")