I'm trying to use Pybrain to predict sequences of characters belonging to the Reber grammar.
Concretely what I'm doing is generating strings using the Reber grammar graph (you can check it here : http://www.felixgers.de/papers/phd.pdf page 22). An example of such string could be BPVVE. I want my neural network to learn the underlying rules of the grammar. For each of these string I create a sequence that would typically look like this :
[B, T, S, X, P, V, E,] , [B, T, S, X, P, V, E,]
B -> value = [1, 0, 0, 0, 0, 0, 0,] , target = [0, 0, 0, 0, 1, 0, 0,]
P -> value = [0, 0, 0, 0, 1, 0, 0,] , target = [0, 0, 0, 0, 0, 1, 0,]
V -> value = [0, 0, 0, 0, 0, 1, 0,] , target = [0, 0, 0, 0, 0, 1, 0,]
V -> value = [0, 0, 0, 0, 0, 1, 0,] , target = [0, 0, 0, 0, 0, 0, 1,]
E -> E is ignored for now because it marks the end
as you can see the value is just a 7-d vector representing the current letter and the target is the next letter in the Reber word.
Here is the code I'm trying to run :
#!/usr/bin/python
import reberGrammar as reber
import random as rnd
from pylab import *
from pybrain.supervised import RPropMinusTrainer
from pybrain.supervised import BackpropTrainer
from pybrain.datasets import SequenceClassificationDataSet
from pybrain.structure.modules import LSTMLayer, SoftmaxLayer
from pybrain.tools.validation import testOnSequenceData
from pybrain.tools.shortcuts import buildNetwork
def reberToListInt(word): #e.g. "BPVVE" -> [0,4,3,3,5]
out = [None]*len(word)
for i,l in enumerate(word):
if l == 'B':
out[i] = 0
elif l == 'T':
out[i] = 1
elif l == 'S':
out[i] = 2
elif l == 'V':
out[i] = 3
elif l == 'P':
out[i] = 4
elif l == 'E':
out[i] = 5
else :
out[i] = 6
return out
def buildReberDataSet(numSample):
"""Generate a 7 class dataset"""
reberLexicon = reber.ReberGrammarLexicon(numSample)
DS = SequenceClassificationDataSet(7, 7, nb_classes=7)
for rw in reberLexicon.lexicon:
DS.newSequence()
rw2 = reberToListInt(rw)
for i in range(len(rw2)-1): #inserting one letter at a time
inpt = outpt = [0.0]*7
inpt[rw2[i]]=1.0
outpt[rw2[i+1]]=1.0
DS.addSample(inpt,outpt)
return DS
def printDataSet(DS, numLines): #just to print some stat
print "\t############"
print "Number of sequences: ",DS.getNumSequences()
print "Input and output dimensions: ", DS.indim,"\t", DS.outdim
print "\n"
for i in range(numLines):
for inp, target in DS.getSequenceIterator(i):
print inp,
print "\n"
print "\t#############"
'''Dataset creation / split into training and test sets'''
fullDS = buildReberDataSet(700)
tstdata, trndata = fullDS.splitWithProportion( 0.25 )
trndata._convertToOneOfMany( bounds=[0.,1.])
tstdata._convertToOneOfMany( bounds=[0.,1.])
#printDataSet(trndata,2)
'''Network setup / training'''
rnn = buildNetwork( trndata.indim, 7, trndata.outdim, hiddenclass=LSTMLayer, outclass=SoftmaxLayer, outputbias=False, recurrent=True)
trainer = RPropMinusTrainer( rnn, dataset=trndata, verbose=True )
#trainer = BackpropTrainer( rnn, dataset=trndata, verbose=True, momentum=0.9, learningrate=0.5 )
trainError=[]
testError =[]
#errors = trainer.trainUntilConvergence()
for i in range(9):
trainer.trainEpochs( 2 )
trainError.append(100. * (1.0-testOnSequenceData(rnn, trndata)))
testError.append(100. * (1.0-testOnSequenceData(rnn, tstdata)))
print "train error: %5.2f%%" % trainError[i], ", test error: %5.2f%%" % testError[i]
plot(trainError)
hold(True)
plot(testError)
show()
I fail to train this net. The errors are fluctuating a lot and there is no real convergence. I would really appreciate some advises on this.
Here is the code I'm using to generate Reber strings :
#!/usr/bin/python
import random as rnd
class ReberGrammarLexicon(object):
lexicon = set() #contain Reber words
graph = [ [(1,'T'), (5,'P')], \
[(1, 'S'), (2, 'X')], \
[(3,'S') ,(5, 'X')], \
[(6, 'E')], \
[(3, 'V'),(2, 'P')], \
[(4, 'V'), (5, 'T')] ] #store the graph
def __init__(self, num, maxSize = 1000): #fill Lexicon with num words
self.maxSize = maxSize
if maxSize < 5:
raise NameError('maxSize too small, require maxSize > 4')
while len(self.lexicon) < num:
word = self.generateWord()
if word != None:
self.lexicon.add(word)
def generateWord(self): #generate one word
c = 2
currentEdge = 0
word = 'B'
while c <= self.maxSize:
inc = rnd.randint(0,len(self.graph[currentEdge])-1)
nextEdge = self.graph[currentEdge][inc][0]
word += self.graph[currentEdge][inc][1]
currentEdge = nextEdge
if currentEdge == 6 :
break
c+=1
if c > self.maxSize :
return None
return word
Thanks,
Best