I was able to work this out with help from the DEAP developers. For those who find themselves in my position, below is some working code for a DEAP GP algorithm that optimizes the values in two columns of a DataFrame to 0. The example problem is obviously trivial and useless; it's meant to be a straightforward example of DEAP working on DataFrames.
import operator
import math
import random
import numpy as np
import pandas as pd
from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp
def add_5(input_df):
return input_df + 5.
def subtract_5(input_df):
return input_df - 5.
def multiply_5(input_df):
return input_df * 5.
def divide_5(input_df):
return input_df / 5.
pset = gp.PrimitiveSet('MAIN', 1)
pset.addPrimitive(add_5, 1)
pset.addPrimitive(subtract_5, 1)
pset.addPrimitive(multiply_5, 1)
pset.addPrimitive(divide_5, 1)
creator.create('FitnessMin', base.Fitness, weights=(-1.0,))
creator.create('Individual', gp.PrimitiveTree, fitness=creator.FitnessMin)
toolbox = base.Toolbox()
toolbox.register('expr', gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register('individual', tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register('population', tools.initRepeat, list, toolbox.individual)
toolbox.register('compile', gp.compile, pset=pset)
def evalSymbReg(individual, points):
# Transform the tree expression in a callable function
func = toolbox.compile(expr=individual)
result = func(points)
return abs(result.column1.sum() + result.column2.sum()),
toolbox.register('evaluate', evalSymbReg, points=pd.DataFrame({'column1': [125] * 500, 'column2': [125] * 500}))
toolbox.register('select', tools.selTournament, tournsize=3)
toolbox.register('mate', gp.cxOnePoint)
toolbox.register('expr_mut', gp.genFull, min_=0, max_=2)
toolbox.register('mutate', gp.mutUniform, expr=toolbox.expr_mut, pset=pset)
if __name__ == '__main__':
pop = toolbox.population(n=100)
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register('avg', np.mean)
stats.register('min', np.min)
stats.register('max', np.max)
pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 20, stats=stats, halloffame=hof)