I’ve got a pyparsing issue that I have spent days trying to fix, with no luck.
Here’s the relevant pseudocode:
class Parser(object):
def __init__(self):
self.multilineCommands = []
self.grammar = <pyparsing grammar> # depends on self.multilineCommands
So, I’m trying to get a specific set of doctests to pass. But the tests in question update self.multilineCommands
after instantiation. Although there are no issues setting the attribute correctly, self.grammar
seems blind to the change, and fails the tests.
However, if I set self.multilineCommands
inside __init__()
, then the tests all pass.
How can I get self.grammar
to stay up-to-date with self.multilineCommands
?
Follow-Up
So, part of the issue here is that I’m refactoring code I didn’t write. My experience with pyparsing is also exclusively limited to my work on this project.
Pyparsing author Paul McGuire posted a helpful response, but I couldn’t get it to work. It could be an error on my part, but more likely the bigger issue is that I over-simplified the pseudo-code written above.
So, I’m going to post the actual code.
Warning!
What you are about to see is uncensored. The sight of it might make you cringe…or maybe even cry. In the original module, this code was just a single piece of a total “god class”. Splitting out what is below into the Parser
class is just step 1 (and apparently, step 1 was enough to break the tests).
class Parser(object):
'''Container object pyparsing-related parsing.
'''
def __init__(self, *args, **kwargs):
r'''
>>> c = Cmd()
>>> c.multilineCommands = ['multiline']
>>> c.multilineCommands
['multiline']
>>> c.parser.multilineCommands
['multiline']
>>> c.case_insensitive = True
>>> c.case_insensitive
True
>>> c.parser.case_insensitive
True
>>> print (c.parser('').dump())
[]
>>> print (c.parser('/* empty command */').dump())
[]
>>> print (c.parser('plainword').dump())
['plainword', '']
- command: plainword
- statement: ['plainword', '']
- command: plainword
>>> print (c.parser('termbare;').dump())
['termbare', '', ';', '']
- command: termbare
- statement: ['termbare', '', ';']
- command: termbare
- terminator: ;
- terminator: ;
>>> print (c.parser('termbare; suffx').dump())
['termbare', '', ';', 'suffx']
- command: termbare
- statement: ['termbare', '', ';']
- command: termbare
- terminator: ;
- suffix: suffx
- terminator: ;
>>> print (c.parser('barecommand').dump())
['barecommand', '']
- command: barecommand
- statement: ['barecommand', '']
- command: barecommand
>>> print (c.parser('COMmand with args').dump())
['command', 'with args']
- args: with args
- command: command
- statement: ['command', 'with args']
- args: with args
- command: command
>>> print (c.parser('command with args and terminator; and suffix').dump())
['command', 'with args and terminator', ';', 'and suffix']
- args: with args and terminator
- command: command
- statement: ['command', 'with args and terminator', ';']
- args: with args and terminator
- command: command
- terminator: ;
- suffix: and suffix
- terminator: ;
>>> print (c.parser('simple | piped').dump())
['simple', '', '|', ' piped']
- command: simple
- pipeTo: piped
- statement: ['simple', '']
- command: simple
>>> print (c.parser('double-pipe || is not a pipe').dump())
['double', '-pipe || is not a pipe']
- args: -pipe || is not a pipe
- command: double
- statement: ['double', '-pipe || is not a pipe']
- args: -pipe || is not a pipe
- command: double
>>> print (c.parser('command with args, terminator;sufx | piped').dump())
['command', 'with args, terminator', ';', 'sufx', '|', ' piped']
- args: with args, terminator
- command: command
- pipeTo: piped
- statement: ['command', 'with args, terminator', ';']
- args: with args, terminator
- command: command
- terminator: ;
- suffix: sufx
- terminator: ;
>>> print (c.parser('output into > afile.txt').dump())
['output', 'into', '>', 'afile.txt']
- args: into
- command: output
- output: >
- outputTo: afile.txt
- statement: ['output', 'into']
- args: into
- command: output
>>> print (c.parser('output into;sufx | pipethrume plz > afile.txt').dump())
['output', 'into', ';', 'sufx', '|', ' pipethrume plz', '>', 'afile.txt']
- args: into
- command: output
- output: >
- outputTo: afile.txt
- pipeTo: pipethrume plz
- statement: ['output', 'into', ';']
- args: into
- command: output
- terminator: ;
- suffix: sufx
- terminator: ;
>>> print (c.parser('output to paste buffer >> ').dump())
['output', 'to paste buffer', '>>', '']
- args: to paste buffer
- command: output
- output: >>
- statement: ['output', 'to paste buffer']
- args: to paste buffer
- command: output
>>> print (c.parser('ignore the /* commented | > */ stuff;').dump())
['ignore', 'the /* commented | > */ stuff', ';', '']
- args: the /* commented | > */ stuff
- command: ignore
- statement: ['ignore', 'the /* commented | > */ stuff', ';']
- args: the /* commented | > */ stuff
- command: ignore
- terminator: ;
- terminator: ;
>>> print (c.parser('has > inside;').dump())
['has', '> inside', ';', '']
- args: > inside
- command: has
- statement: ['has', '> inside', ';']
- args: > inside
- command: has
- terminator: ;
- terminator: ;
>>> print (c.parser('multiline has > inside an unfinished command').dump())
['multiline', ' has > inside an unfinished command']
- multilineCommand: multiline
>>> print (c.parser('multiline has > inside;').dump())
['multiline', 'has > inside', ';', '']
- args: has > inside
- multilineCommand: multiline
- statement: ['multiline', 'has > inside', ';']
- args: has > inside
- multilineCommand: multiline
- terminator: ;
- terminator: ;
>>> print (c.parser('multiline command /* with comment in progress;').dump())
['multiline', ' command /* with comment in progress;']
- multilineCommand: multiline
>>> print (c.parser('multiline command /* with comment complete */ is done;').dump())
['multiline', 'command /* with comment complete */ is done', ';', '']
- args: command /* with comment complete */ is done
- multilineCommand: multiline
- statement: ['multiline', 'command /* with comment complete */ is done', ';']
- args: command /* with comment complete */ is done
- multilineCommand: multiline
- terminator: ;
- terminator: ;
>>> print (c.parser('multiline command ends\n\n').dump())
['multiline', 'command ends', '\n', '\n']
- args: command ends
- multilineCommand: multiline
- statement: ['multiline', 'command ends', '\n', '\n']
- args: command ends
- multilineCommand: multiline
- terminator: ['\n', '\n']
- terminator: ['\n', '\n']
>>> print (c.parser('multiline command "with term; ends" now\n\n').dump())
['multiline', 'command "with term; ends" now', '\n', '\n']
- args: command "with term; ends" now
- multilineCommand: multiline
- statement: ['multiline', 'command "with term; ends" now', '\n', '\n']
- args: command "with term; ends" now
- multilineCommand: multiline
- terminator: ['\n', '\n']
- terminator: ['\n', '\n']
>>> print (c.parser('what if "quoted strings /* seem to " start comments?').dump())
['what', 'if "quoted strings /* seem to " start comments?']
- args: if "quoted strings /* seem to " start comments?
- command: what
- statement: ['what', 'if "quoted strings /* seem to " start comments?']
- args: if "quoted strings /* seem to " start comments?
- command: what
'''
# SETTINGS
self._init_settings()
# GRAMMAR
self._init_grammars()
# PARSERS
# For easy reference to all contained parsers.
# Hacky, I know. But I'm trying to fix code
# elsewhere at the moment... :P)
self._parsers = set()
self._init_prefixParser()
self._init_terminatorParser()
self._init_saveParser()
self._init_inputParser()
self._init_outputParser()
# intermission! :D
# (update grammar(s) containing parsers)
self.afterElements = \
pyparsing.Optional(self.pipe + pyparsing.SkipTo(self.outputParser ^ self.stringEnd, ignore=self.doNotParse)('pipeTo')) + \
pyparsing.Optional(self.outputParser('output') + pyparsing.SkipTo(self.stringEnd, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('outputTo'))
self._grammars.add('afterElements')
# end intermission
self._init_blankLineTerminationParser()
self._init_multilineParser()
self._init_singleLineParser()
self._init_optionParser()
# Put it all together:
self.mainParser = \
( self.prefixParser +
( self.stringEnd |
self.multilineParser |
self.singleLineParser |
self.blankLineTerminationParser |
self.multilineCommand + pyparsing.SkipTo(
self.stringEnd,
ignore=self.doNotParse)
)
)
self.mainParser.ignore(self.commentGrammars)
#self.mainParser.setDebug(True)
# And we've got mainParser.
#
# SPECIAL METHODS
#
def __call__(self, *args, **kwargs):
'''Call an instance for convenient parsing. Example:
p = Parser()
result = p('some stuff for p to parse')
This just calls `self.parseString()`, so it's safe to
override should you choose.
'''
return self.parseString(*args, **kwargs)
def __getattr__(self, attr):
# REMEMBER: This is only called when normal attribute lookup fails
raise AttributeError('Could not find {0!r} in class Parser'.format(attr))
@property
def multilineCommands(self):
return self._multilineCommands
@multilineCommands.setter
def multilineCommands(self, value):
value = list(value) if not isinstance(value, list) else value
self._multilineCommands = value
@multilineCommands.deleter
def multilineCommands(self):
del self._multilineCommands
self._multilineCommands = []
#
# PSEUDO_PRIVATE METHODS
#
def _init_settings(self, *args, **kwargs):
self._multilineCommands = []
self.abbrev = True # recognize abbreviated commands
self.blankLinesAllowed = False
self.case_insensitive = True
self.identchars = cmd.IDENTCHARS
self.legalChars = u'!#$%.:?@_' + pyparsing.alphanums + pyparsing.alphas8bit
self.noSpecialParse = {'ed','edit','exit','set'}
self.redirector = '>' # for sending output to file
self.reserved_words = []
self.shortcuts = {'?' : 'help' ,
'!' : 'shell',
'@' : 'load' ,
'@@': '_relative_load'}
self.terminators = [';']
self.keywords = [] + self.reserved_words
def _init_grammars(self, *args, **kwargs):
# Basic grammars
self.commentGrammars = (pyparsing.pythonStyleComment|pyparsing.cStyleComment).ignore(pyparsing.quotedString).suppress()
self.commentInProgress = '/*' + pyparsing.SkipTo( pyparsing.stringEnd ^ '*/' )
self.doNotParse = self.commentGrammars | self.commentInProgress | pyparsing.quotedString
self.fileName = pyparsing.Word(self.legalChars + '/\\')
self.inputFrom = self.fileName('inputFrom')
self.inputMark = pyparsing.Literal('<')
self.pipe = pyparsing.Keyword('|', identChars='|')
self.stringEnd = pyparsing.stringEnd ^ '\nEOF'
# Complex grammars
self.multilineCommand = pyparsing.Or([pyparsing.Keyword(c, caseless=self.case_insensitive) for c in self.multilineCommands ])('multilineCommand')
self.multilineCommand.setName('multilineCommand')
self.oneLineCommand = ( ~self.multilineCommand + pyparsing.Word(self.legalChars))('command')
# Hack-y convenience access to grammars
self._grammars = {
# Basic grammars
'commentGrammars',
'commentInProgress',
'doNotParse',
'fileName',
'inputFrom',
'inputMark',
'noSpecialParse',
'pipe',
'reserved_words',
'stringEnd',
# Complex grammars
'multilineCommand',
'oneLineCommand'
}
self.inputFrom.setParseAction(replace_with_file_contents)
self.inputMark.setParseAction(lambda x: '')
self.commentGrammars.addParseAction(lambda x: '')
if not self.blankLinesAllowed:
self.blankLineTerminator = (pyparsing.lineEnd * 2)('terminator')
if self.case_insensitive:
self.multilineCommand.setParseAction(lambda x: x[0].lower())
self.oneLineCommand.setParseAction(lambda x: x[0].lower())
def _init_all_parsers(self):
self._init_prefixParser()
self._init_terminatorParser()
self._init_saveParser()
self._init_inputParser()
self._init_outputParser()
# intermission! :D
# (update grammar(s) containing parsers)
self.afterElements = \
pyparsing.Optional(self.pipe + pyparsing.SkipTo(self.outputParser ^ self.stringEnd, ignore=self.doNotParse)('pipeTo')) + \
pyparsing.Optional(self.outputParser('output') + pyparsing.SkipTo(self.stringEnd, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('outputTo'))
self._grammars.setName('afterElements')
self._grammars.add('afterElements')
# end intermission
# FIXME:
# For some reason it's necessary to set this again.
# (Otherwise pyparsing results include `outputTo`, but not `output`.)
self.outputParser('output')
self._init_blankLineTerminationParser()
self._init_multilineParser()
self._init_singleLineParser()
self._init_optionParser()
def _init_prefixParser(self):
self.prefixParser = pyparsing.Empty()
self.prefixParser.setName('prefixParser')
self._parsers.add('prefixParser')
def _init_terminatorParser(self):
self.terminatorParser = pyparsing.Or([ (hasattr(t, 'parseString') and t) or pyparsing.Literal(t) for t in self.terminators])('terminator')
self.terminatorParser.setName('terminatorParser')
self._parsers.add('terminatorParser')
def _init_saveParser(self):
self.saveparser = (pyparsing.Optional(pyparsing.Word(pyparsing.nums)|'*')('idx') +
pyparsing.Optional(pyparsing.Word(self.legalChars + '/\\'))('fname') +
pyparsing.stringEnd)
self.saveparser.setName('saveParser')
self._parsers.add('saveParser')
def _init_outputParser(self):
# outputParser = (pyparsing.Literal('>>') | (pyparsing.WordStart() + '>') | pyparsing.Regex('[^=]>'))('output')
self.outputParser = self.redirector * 2 | (pyparsing.WordStart() + self.redirector) | pyparsing.Regex('[^=]' + self.redirector)('output')
self.outputParser.setName('outputParser')
self._parsers.add('outputParser')
def _init_inputParser(self):
# a not-entirely-satisfactory way of distinguishing < as in "import from" from <
# as in "lesser than"
self.inputParser = self.inputMark + \
pyparsing.Optional(self.inputFrom) + \
pyparsing.Optional('>') + \
pyparsing.Optional(self.fileName) + \
(pyparsing.stringEnd | '|')
self.inputParser.ignore(self.commentInProgress)
self.inputParser.setName('inputParser')
self._parsers.add('inputParser')
def _init_blankLineTerminationParser(self):
self.blankLineTerminationParser = pyparsing.NoMatch
if not self.blankLinesAllowed:
self.blankLineTerminationParser = ((self.multilineCommand ^ self.oneLineCommand) + pyparsing.SkipTo(self.blankLineTerminator, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('args') + self.blankLineTerminator )
# FIXME: Does this call *really* have to be reassigned into the variable???
self.blankLineTerminationParser = self.blankLineTerminationParser.setResultsName('statement')
self.blankLineTerminationParser.setName('blankLineTerminationParser')
self._parsers.add('blankLineTerminationParser')
def _init_multilineParser(self):
#self.multilineParser = self.multilineParser.setResultsName('multilineParser')
self.multilineParser = (
(
(self.multilineCommand('multilineCommand') ^ self.oneLineCommand)
+ pyparsing.SkipTo(self.terminatorParser, ignore=self.doNotParse).setParseAction(lambda x: x[0].strip())('args')
+ self.terminatorParser
)('statement')
+ pyparsing.SkipTo(
self.outputParser ^ self.pipe ^ self.stringEnd, ignore=self.doNotParse
).setParseAction(lambda x: x[0].strip())('suffix')
+ self.afterElements)
self.multilineParser.ignore(self.commentInProgress)
self.multilineParser.setName('multilineParser')
self._parsers.add('multilineParser')
def _init_singleLineParser(self):
#self.singleLineParser = self.singleLineParser.setResultsName('singleLineParser')
self.singleLineParser = ((self.oneLineCommand + pyparsing.SkipTo(self.terminatorParser ^ self.stringEnd ^ self.pipe ^ self.outputParser, ignore=self.doNotParse).setParseAction(lambda x:x[0].strip())('args'))('statement') +
pyparsing.Optional(self.terminatorParser) + self.afterElements)
self.singleLineParser.setName('singleLineParser')
self._parsers.add('singleLineParser')
def _init_optionParser(self):
# Different from the other parsers.
# This one is based on optparse.OptionParser,
# not pyparsing.
#
# It's included here to keep all parsing-related
# code under one roof.
# TODO: Why isn't this using cmd2's OptionParser?
self.optionParser = optparse.OptionParser()
self._parsers.add('optionParser')
def parseString(self, *args, **kwargs):
'''Parses a string using `self.mainParser`.'''
return self.mainParser.parseString(*args, **kwargs)
There you have it. The ugly truth. ☺
Edited 2012-11-12: I incorrectly used the term “class attribute” in the original title for this question. It‘s a silly mistake, and I apologize for any confusion. It has now been corrected to “instance attribute”.
I’ll let you know how it goes… – Zearin Nov 11 '12 at 19:37