遗传编程(Genetic Programming)学习笔记(四):利用DEAP框架实现GP符号回归

要求:在 [ − 1 , 1 ] [-1,1] [1,1]范围内拟合函数 y = x 4 + x 3 + x 2 + x y=x^4+x^3+x^2+x y=x4+x3+x2+x
代码如下:

import math
import random
import operator
from deap import creator,base,tools,gp
import numpy
from matplotlib import pyplot as plt

def protectedDiv(left,right):
    if right == 0:
        return 1
    else:
        return left / right

# 定义 primitive set
pset = gp.PrimitiveSet('main',1)
pset.renameArguments(ARG0 = 'x')
pset.addPrimitive(operator.add,2)
pset.addPrimitive(operator.sub,2)
pset.addPrimitive(operator.mul,2)
pset.addPrimitive(protectedDiv,2)
pset.addPrimitive(math.sin,1)
pset.addPrimitive(math.cos,1)
pset.addTerminal(5)
pset.addEphemeralConstant('a',lambda:random.randint(-2,2))

# 创建fitness类、individual类
creator.create('FitnessMin',base.Fitness,weights = (-1.0,))
creator.create('Individual',gp.PrimitiveTree,fitness = creator.FitnessMin)
# 定义个体的生成方法、种群的生成方法
tool = base.Toolbox()
tool.register('expr',gp.genHalfAndHalf,pset = pset,min_ = 1,max_ = 4)
tool.register('individual',tools.initIterate,creator.Individual,tool.expr)
tool.register('population',tools.initRepeat,list,tool.individual)
tool.register('compile',gp.compile, pset = pset)

# 定义评价函数
def fit_evaluation(individual,points):
    func = tool.compile(expr = individual)   # pset上面已经给过了
    sqerrors = ((func(x) - x**4 - x**3 - x**2 - x)**2 for x in points)
    return math.fsum(sqerrors) / len(points),  # 必须返回一个tuple

# 定义evaluate、select、mate、mutate(这几个名字必须这样取,否则出错)
tool.register('evaluate',fit_evaluation,points = [x/10. for x in range(-10,10)])
tool.register('select',tools.selTournament,tournsize = 3)
tool.register('mate',gp.cxOnePoint)           
# 单点交叉 会产生两棵树(a field guide里说一般只用一棵树) 
tool.register('expr_mut',gp.genFull,pset=pset,min_ = 0,max_ = 2)     # 生成一个subtree
tool.register('mutate',gp.mutUniform,expr=tool.expr_mut,pset=pset) # subtree mutation    
# 限制一下交叉变异后的树深度,根据Koza的论文,最大17
tool.decorate('mate',gp.staticLimit(key=operator.attrgetter('height'),max_value=13))   
tool.decorate('mutate',gp.staticLimit(key=operator.attrgetter('height'),max_value=13))

def main():
    # 参数设置
    cxpb = 0.4      #交叉概率
    mutpb = 0.2     #变异概率
    ngen = 100       #迭代次数
    popSize = 300   #种群规模

	# 生成初始种群
    pop = tool.population(n = popSize)
    fitnesses = list(map(tool.evaluate,pop)) # 求初始种群的每个个体的适应度值,是一个list
    
    for ind,fit in zip(pop,fitnesses):
        ind.fitness.values = fit  # 给每个ind的fitness赋值

    best_inds = []    # 记录每一代的最优个体
    hof = tools.HallOfFame(1)  
    hof.update(pop)
    best_ind = hof.items[0]
    best_inds.append(best_ind)


    stats = tools.Statistics(lambda ind:ind.fitness.values)
    stats.register('avg',numpy.mean,axis = 0)   
    # axis = 0表示数组的列求平均;axis = 1 表示数组的行求平均
    stats.register('std',numpy.std,axis = 0)
    stats.register('min',numpy.min,axis = 0)
    stats.register('max',numpy.max,axis = 0)
    # record是一个字典:record{‘avg’:[],'std':[],...},每个value是一个list
    record = stats.compile(pop)   
    for key in record:
        record[key] = record[key][0]

    logbook = tools.Logbook()
    logbook.record(gen = 0,eval = popSize,best_ind = best_ind,**record)
    logbook.header = 'gen','min','max','avg','std','eval','best_ind'
    # print(logbook)
    print('--------开始迭代--------')
   
    for g in range(ngen):
        # select  选出popSize个个体进入下一代
        print('第',g+1,'次迭代')
        offSpring = tool.select(pop,len(pop))  
        offSpring = list(map(tool.clone,offSpring))

        # crossover  依次两两交叉
        for child1,child2 in zip(offSpring[1::2],offSpring[::2]):
            if random.random() < cxpb:
                tool.mate(child1,child2)    # 交叉产生两个新的个体
                del child1.fitness.values   # 新的个体需要重新evaluate,所以删掉原来的fitness value
                del child2.fitness.values

        # mutate
        for child in offSpring:       # 每个个体都有概率进行变异
            if random.random() < mutpb:
                tool.mutate(child)
                del child.fitness.values

        # evaluate
        invalid_fit = []      # 找到需要重新计算fitness的个体
        for ind in offSpring:
            if not ind.fitness.valid:
                invalid_fit.append(ind)
        fitnesses = list(map(tool.evaluate,invalid_fit))
        for ind,fit in zip(invalid_fit,fitnesses):
            ind.fitness.values = fit

        pop = offSpring       # 更新种群
        hof.update(pop)       # 找到本代的最优个体
        best_ind = hof.items[0]
        best_inds.append(best_ind)
        record = stats.compile(pop)  # 数据统计
        for key in record:
            record[key] = record[key][0]
        logbook.record(gen = g+1,eval = len(invalid_fit),best_ind = best_ind,**record)

    print('--------迭代结束-------')
    print(logbook)

    # 画出实际函数和优化的函数
    points = [x/10. for x in range(-10,10)]
    y = [x**4+x**3+x**2+x for x in points]
    func = tool.compile(expr = hof.items[0])
    z = [func(x) for x in points]

    plt.plot(points,y,label = 'target function')
    plt.plot(points,z,'--',label = 'envolved function')
    plt.legend(loc = 'best')
    plt.show()


遗传编程(Genetic Programming)学习笔记(四):利用DEAP框架实现GP符号回归_第1张图片
在收敛曲线图中,横坐标是迭代的代数,纵坐标是每一代的最优个体的适应度值。这个示例比较简单,可以看出GP的收敛速度很快,在40代左右就能找到较好的解。
遗传编程(Genetic Programming)学习笔记(四):利用DEAP框架实现GP符号回归_第2张图片
从图中可以看出,拟合的曲线在 [ − 1 , 1 ] [-1,1] [11]几乎完全与目标函数的曲线重合了。
遗传编程(Genetic Programming)学习笔记(四):利用DEAP框架实现GP符号回归_第3张图片
上图是最优个体对应的函数的示意图。
附上画图的代码:

import pygraphviz as pgv
def drawBestInd(expr):   # 输入参数是一个表达式(expr/tree),这个例子中就是individual
    nodes,edges,labels = gp.graph(expr)
    g = pgv.AGraph()
    g.add_nodes_from(nodes)
    g.add_edges_from(edges)
    g.layout(prog="dot")

    for i in nodes:
        n = g.get_node(i)
        n.attr["label"] = labels[i]

    g.draw("tree.pdf")

你可能感兴趣的:(人工智能,算法)