4

I tried this solution but it didn't seem to work for me

Here's an excerpt from my grammer:

module
    : BEGIN MODULE IDENT STRING module_element* END MODULE
    ;

module_element
    : element_1 | element_2 | element_3 | ...
    ;

There is a bigger tree below each element. Now when a RecognitionException occurs I want to consume tokens until either the next module_element matches or the parent END MODULE matches.

Any hints on how to do this inside a class inheriting from DefaultErrorStrategy?

edit: Here is a MCVE:

Program.cs

namespace AntlrExample
{
    class Program
    {
        static void Main(string[] args)
        {
            var fileToParse = @"C:\temp\MyGrammarExample.txt";

            try
            {
                Parse(fileToParse);
            }
            catch (Exception e)
            {
                Console.WriteLine("Exception: " + e);
            }
        }

        private static void Parse(string filePath)
        {
            var lexer = new MyGrammarLexer(new AntlrFileStream(filePath, Encoding.Default));

            var parser = new MyGrammarParser(new CommonTokenStream(lexer));

            parser.AddParseListener(new MyGrammarListener());

            parser.startnode();
        }
    }
}

MyGrammar.g4:

grammar MyGrammar;

@parser::members
{
    protected const int EOF = Eof;
}

@lexer::members
{
    protected const int EOF = Eof;
    protected const int HIDDEN = Hidden;
}

startnode
    :   module
    ;

module
    : BEGIN MODULE IDENT STRING module_element* END MODULE
    ;

module_element
    :   element_1 | element_2
    ;

element_1
    :   BEGIN ELEMENT1 name=IDENT desc=STRING other1=IDENT other2=IDENT END ELEMENT1
    ;

element_2
    :   BEGIN ELEMENT2 name=IDENT desc=STRING other1=IDENT other2=IDENT other3=INT END ELEMENT2
    ;

BEGIN : 'BEGIN';
MODULE: 'MODULE';
END: 'END';
ELEMENT1 : 'ELEMENT1';
ELEMENT2 : 'ELEMENT2';

IDENT 
    : LETTER (LETTER|'0'..'9'|'['|']'|'.')* 
    ;

fragment LETTER 
    : 'A'..'Z' | 'a'..'z' | '_' 
    ;

STRING
    : '"' ('\\' (.) | '"''"' | ~( '\\' | '"'))* '"'
    ;

INT
    : MINUS? DIGIT+
    ;

fragment MINUS
    : '-'
    ;

DIGIT
    : '0'..'9'
    ;

WS
    : ( ' ' | '\t' | '\r' | '\n')+ -> skip
    ;

MyGrammarListener.cs

namespace AntlrExample.Parser
{
    public class MyGrammarListener : MyGrammarBaseListener
    {
        public override void ExitElement_1(MyGrammarParser.Element_1Context context)
        {
            Console.WriteLine(string.Format("Just parsed an ELEMENT1: {0} {1} {2} {3}", context.name.Text, context.desc.Text, context.other1.Text, context.other2.Text));
        }

        public override void ExitElement_2(MyGrammarParser.Element_2Context context)
        {
            Console.WriteLine(string.Format("Just parsed an ELEMENT2: {0} {1} {2} {3} {4}", context.name.Text, context.desc.Text, context.other1.Text, context.other2.Text, context.other3.Text));
        }
    }
}

MyGrammarExample.txt

BEGIN MODULE MyModule "This is the main module"

    BEGIN ELEMENT1 MyElement1 "This is the first element"
        Something
        Anything
    END ELEMENT1

    BEGIN ELEMENT1 MyElement2 "This is the second element"
        SomethingMore
        AnythingMore
    END ELEMENT1

    BEGIN ELEMENT2 MyFirstElement2 "This one will fail"
        Foo
        Bar
        HereShouldBeAnInt
    END ELEMENT2

    BEGIN ELEMENT2 MySecondElement2 "This one should parse even though the parser failed to parse the one before"
        RealFoo
        RealBar
        34
    END ELEMENT2

END MODULE
Community
  • 1
  • 1
metacircle
  • 2,438
  • 4
  • 25
  • 39

1 Answers1

2

You should be able to accomplish this with this error strategy class:

internal class MyGrammarErrorStrategy : DefaultErrorStrategy
{
    public override void Recover(Parser recognizer, RecognitionException e)
    {
        // This should should move the current position to the next 'END' token
        base.Recover(recognizer, e);

        ITokenStream tokenStream = (ITokenStream)recognizer.InputStream;

        // Verify we are where we expect to be
        if (tokenStream.La(1) == MyGrammarParser.END)
        {
            // Get the next possible tokens
            IntervalSet intervalSet = GetErrorRecoverySet(recognizer);

            // Move to the next token
            tokenStream.Consume();

            // Move to the next possible token
            // If the errant element is the last in the set, this will move to the 'END' token in 'END MODULE'.
            // If there are subsequent elements in the set, this will move to the 'BEGIN' token in 'BEGIN module_element'.
            ConsumeUntil(recognizer, intervalSet);
        }
    }
}

And then set the error handler, accordingly:

parser.ErrorHandler = new MyGrammarErrorStrategy();

The idea is that we first allow the default Recover implementation to move the current position to the "resynchronization set," which in this case is the next END token. Subsequently, we consume additional tokens using the provided error recovery set to move the position to where we need it to be. This resulting position will differ based on whether or not the errant module_element is the last in the module.

cokeman19
  • 2,405
  • 1
  • 25
  • 40
  • Your solution works, I don't understand why the call to base.Recover() does not move the token stream to the next 'BEGIN' token automatically though. – metacircle Dec 02 '14 at 12:19
  • According to the documentation, "The default implementation resynchronizes the parser by consuming tokens until we find one in the resynchronization set--loosely the set of tokens that can follow the current rule." So, basically it tries to find the next token that it knows to be valid, but not necessarily where it would have been if everything had parsed successfully. – cokeman19 Dec 02 '14 at 23:10
  • HI @cokeman19 can you help me with my query here https://stackoverflow.com/questions/46706766/move-to-a-new-state-after-cosuming-tokens-in-antlr4 . This strategy is not working for me – haripcce Oct 12 '17 at 13:21