1

Clang is complaining about my ANTLR3 (antlr-3.5.2-complete.jar) parser. I need C now and so I'm stuck with ANTLR3. These are the syntax errors:

dwParser.c:2009:44: error: use of undeclared identifier 'FOLLOW_set_in_g_number387' EXCEPTION->expectingSet = &FOLLOW_set_in_g_number387; ^ dwParser.c:2011:43: error: use of undeclared identifier 'FOLLOW_set_in_g_number387' RECOVERFROMMISMATCHEDSET(&FOLLOW_set_in_g_number387);

dwParser.c:162:91: note: expanded from macro 'RECOVERFROMMISMATCHEDSET' define RECOVERFROMMISMATCHEDSET(s) RECOGNIZER->recoverFromMismatchedSet(RECOGNIZER, s)

So it's complaining about the follow set of g_number.

This is the entire grammar. I've expanded some declarations which would be in an include file. And I've verbosely pushed every literal into the Lexer half. But the error messages are pretty opaque. Any clues? Nothing in the ANTLR3 book on follow sets.

grammar dw;

options {
    language = C;
}

@parser::header {
    #include <assert.h>

    void dw_start(void);
    void dw_finish(void);
    void dw_arg_longlong(unsigned char *text);
    void dw_arg_long(unsigned char *text);
    void dw_arg_short(unsigned char *text);
    void dw_arg_char(unsigned char *text);
    void dw_arg_ulonglong(unsigned char *text);
    void dw_arg_ulong(unsigned char *text);
    void dw_arg_ushort(unsigned char *text);
    void dw_arg_uchar(unsigned char *text);
    void dw_arg_double(unsigned char *text);
    void dw_arg_float(unsigned char *text);
    void dw_label(unsigned char *text);
    void dw_address(unsigned char *text);
    void dw_opcode(unsigned char *text);
}

@lexer::header {
    #include <assert.h>

    void dw_start(void);
    void dw_finish(void);
    void dw_arg_longlong(unsigned char *text);
    void dw_arg_long(unsigned char *text);
    void dw_arg_short(unsigned char *text);
    void dw_arg_char(unsigned char *text);
    void dw_arg_ulonglong(unsigned char *text);
    void dw_arg_ulong(unsigned char *text);
    void dw_arg_ushort(unsigned char *text);
    void dw_arg_uchar(unsigned char *text);
    void dw_arg_double(unsigned char *text);
    void dw_arg_float(unsigned char *text);
    void dw_label(unsigned char *text);
    void dw_address(unsigned char *text);
    void dw_opcode(unsigned char *text);

    #define _empty NULL
}

g_program       : (instruction)+ ;

instruction     : { dw_start(); } g_label? g_ops g_args g_newline { dw_finish(); } ;

g_ops           : ID ID ID ID ;
g_args          :
                ( g_longlong
                | g_long
                | g_short
                | g_char
                | g_ulonglong
                | g_ulong
                | g_ushort
                | g_uchar
                | g_double
                | g_float
                | g_address
                ) ;

g_longlong      : g_ll  g_number    { dw_arg_longlong(  $g_number.text->chars); } ;
g_long          : g_l   g_number    { dw_arg_long(      $g_number.text->chars); } ;
g_short         : g_s   g_number    { dw_arg_short(     $g_number.text->chars); } ;
g_char          : g_c   g_number    { dw_arg_char(      $g_number.text->chars); } ;
g_ulonglong     : g_ull g_number    { dw_arg_ulonglong( $g_number.text->chars); } ;
g_ulong         : g_ul  g_number    { dw_arg_ulong(     $g_number.text->chars); } ;
g_ushort        : g_us  g_number    { dw_arg_ushort(    $g_number.text->chars); } ;
g_uchar         : g_uc  g_number    { dw_arg_uchar(     $g_number.text->chars); } ;
g_double        : g_d   FLOAT       { dw_arg_double(    $FLOAT.text->chars); } ;
g_float         : g_f   FLOAT       { dw_arg_float(     $FLOAT.text->chars); } ;
g_label         : ID    g_colon     { dw_label(         $ID.text->chars); } ;
g_address       : g_at  ID          { dw_address(       $ID.text->chars); } ;
g_opcode        : ID                { dw_opcode(        $ID.text->chars); } ;

g_number        : (HEX | DEC | OCT) ;

g_ll            : 'LL'  ;
g_l             : 'L'   ;
g_s             : 'S'   ;
g_c             : 'C'   ;
g_ull           : 'ULL' ;
g_ul            : 'UL'  ;
g_us            : 'US'  ;
g_uc            : 'UC'  ;
g_d             : 'D'   ;
g_f             : 'F'   ;
g_colon         : ':'   ;
g_at            : '@'   ;
g_newline       : '\n'  ;

OCT             : '0' ('0'..'7')+ ;
HEX             : '0' ('x' | 'X') ('0'..'9' | 'a'..'f' | 'A'..'F')+ ;
DEC             : '-'? ('0' | '1'..'9' '0'..'9'*) ;
FLOAT           : '-'? ('0'..'9')+ '.' ('0'..'9')* (('e' | 'E') ('+' | '-')? ('0'..'9')+)? ;
ID              : ('A'..'Z' | 'a'..'z')+ ;

WS              : (' ' | '\t' | '\r')+ ;
COMMENT         : '//' ~('\n' | '\r')* '\r'? '\n' ;

The actual block in dwParser.c is:

{
    // dw.g:52:11: ( ( HEX | DEC | OCT ) )
    // dw.g:
    {
        if ( LA(1) == DEC || LA(1) == HEX || LA(1) == OCT )
        {
            CONSUME();
            PERRORRECOVERY=ANTLR3_FALSE;
        }
        else
        {
            CONSTRUCTEX();
            EXCEPTION->type         = ANTLR3_MISMATCHED_SET_EXCEPTION;
            EXCEPTION->name         = (void *)ANTLR3_MISMATCHED_SET_NAME;
            EXCEPTION->expectingSet = &FOLLOW_set_in_g_number388;

            RECOVERFROMMISMATCHEDSET(&FOLLOW_set_in_g_number388);
            goto ruleg_numberEx;
        }
    }
}
Olsonist
  • 2,051
  • 1
  • 20
  • 35
  • Is it working fine in antlr itself? – miushock Feb 25 '15 at 02:04
  • If you mean *does ANTLR3 generates code?* Yes. ANTLR3 generates a ton of "template error" warnings, but that is a known issue. It then generates dwLexer.c dwParser.c. I futzed with the source and used ANTRL4. It generates clean on ANTLR4 in Java but I'm seeing if that parser will compile with javac. – Olsonist Feb 25 '15 at 02:49
  • Oops, this is a duplicate of http://stackoverflow.com/questions/14892689/regresion-antlr-c-target-follow-set-generates-reference-to-undeclared-identifie – Olsonist Feb 25 '15 at 04:45
  • Well, that approach seemed to recommend substituting g_number inline, so to speak, with (HEX | DEC | OCT) and that didn't work. Same error messages. While it had the same error message, that problem was for 1.5 and didn't really explain its solution, just that it worked. – Olsonist Feb 25 '15 at 04:55
  • Well, I tried the substitution trick again and it worked. I don't know why and it does cause a headache dealing with (HEX | DEC | OCT) but it works. – Olsonist Feb 25 '15 at 05:08
  • seems to be a duplicate of [this](http://stackoverflow.com/questions/20057063/follow-set-in-is-undefined-in-generated-parser) which is some sort of bug with the c target. try writing `(t=HEX | t=DEC | t=OCT)`. – lp_ Feb 25 '15 at 22:44

0 Answers0