GEP基因表达式编程

GEP基因表达式编程

基因表达式编程GEP(Gene Expression Programming)其实是遗传算法的一种改进,如果对遗传算法有了解的话,对于GEP的理解也就自然而然了。
遗传算法的染色体是不同的解的值, 而GEP的染色体是不同的表达式,如 y=sinx1+cosx2 y=x21+x22 等等。下图是一个例子(来源:DEAP docs):

具体的讲解大家可以看这里,这其实是DEAP的官方文档,它是基于Python的一个遗传算法实现框架。下面是我用GEP预测一个时间序列的代码,供大家参考:

# -*- coding: utf-8 -*-
"""
Created on Fri Nov 10 23:30:52 2017

@author: tober
"""
import math
import random
import operator
import numpy as np
import pandas as pd
from deap import creator, base, tools, gp, algorithms

def avg(left, right):
    return (left + right) / 2.0   
pset = gp.PrimitiveSet('main', 10)
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(max, 2)
pset.addPrimitive(min, 2)
pset.addPrimitive(avg, 2)
pset.addEphemeralConstant('a', lambda: random.uniform(-10, 10))
pset.addEphemeralConstant('b', lambda: random.uniform(-10, 10))
pset.addEphemeralConstant('c', lambda: random.uniform(-10, 10))
pset.addEphemeralConstant('d', lambda: random.uniform(-10, 10))
pset.addEphemeralConstant('e', lambda: random.uniform(-10, 10))
pset.addEphemeralConstant('f', lambda: random.uniform(-10, 10))
pset.addEphemeralConstant('g', lambda: random.uniform(-10, 10))
pset.addEphemeralConstant('h', lambda: random.uniform(-10, 10))
pset.addEphemeralConstant('i', lambda: random.uniform(-10, 10))
pset.addEphemeralConstant('j', lambda: random.uniform(-10, 10))

creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin, pset=pset)
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=2, max_=6)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

def evalSymbReg(individual, data):
    func = toolbox.compile(expr=individual)
    bias = [func(*d[:-1]) - d[-1] for d in data]
    return np.sum(np.power(bias, 2)) ** 0.5 / len(data),

def getdata(Series, delay):
    sr = Series.copy()
    data=pd.DataFrame()
    for i in range(delay + 1):
        data = pd.concat([sr.shift(i), data], axis=1)
    return data.dropna().values

sr = pd.read_csv("time_series.data", index_col=0)
toolbox.register("evaluate", evalSymbReg, data=getdata(sr, 10))
toolbox.register("select", tools.selTournament, tournsize=10)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=2, max_=6)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

pop = toolbox.population(n=30)
hof = tools.HallOfFame(1)
pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 100, halloffame=hof, verbose=True)

fits = [ind.fitness.values[0] for ind in pop]
optimal = fits.index(min(fits))
tree = gp.PrimitiveTree(pop[optimal])
print(str(tree))

func = toolbox.compile(expr=pop[optimal])
data = getdata(sr, 10)
pre = [func(*d[:-1]) for d in data]
index = sr.index[10:]
p = pd.Series(pre, index=index, name='predict')
r = sr.iloc[10:, 0].rename('real')
X = pd.concat([p, r], axis=1)
p.corr(r)
pr = r.tolist()[-10:]
for i in range(100):
    pr.append(func(*pr[-10:]))
pd.Series(pr).plot()

你可能感兴趣的:(python,GEP,遗传编程,基因表达式编程)