0

As followup on this post:

I wrote a grammar that should be able to parse both of these inputs:

class x implements x: method hi() return ho1/* hi*/; end; end;
class x implements x: method hi() return 1 /* hi*/; end; end;

The first one parses fine, but when I invoke the parser with the second one I get this as output:

dec
Parse Error on line 0: syntax error

(the lexer prints dec when it encounters a decimal number; i used this to test where the error is.)

my question is: why is this not working?

as i understand it:

Stat: RETURN Expr  <<-
    ... 
    ;  

Expr: ... 
    | Term <<-
    | ...
    ;  

Term: ...
    | ID 
    | Num <<-
    | ...
    ;

Num : HEX
    | INT <<-
    ;

this should be the the production for this input; as the way is essentially the same for parsing ho1 (an ID);

i don't understand why the identifier can be parsed but not the number.

  • I tried adding errors to different productions to debug where the parser goes wrong but this just results in bison telling me that the rule is unusable because of shift-reduce conflicts.
  • I have also tried adding more debug output to the lexer to see how many more tokens the parser parses after the number, but apparently it is none.

i would appreciate some pointers/explanations as to where this behavior stems from and a hint at the solution.

here is my complete lexer:

%{
#include <stdio.h>
#include <stdlib.h>
#include "parser.tab.h"
#include <string.h>

int line_number = 0;
void lexerror(char *message);
%}

newline         (\n|\r\n)
whitespace      [\t \n\r]*
digit           [0-9]
alphaChar       [a-zA-Z]
alphaNumChar    ({digit}|{alphaChar})
hexDigit        ({digit}|[A-Fa-f])
decNum          {digit}+
hexNum          {digit}{hexDigit}*H
identifier      {alphaChar}{alphaNumChar}*
number          ({hexNum}|{decNum})
comment         "/*"[.\r\n]*"*/"
anything        .
%s InComment
%option noyywrap
%%
<INITIAL>{
interface       return INTERFACE;
end             return END;
class           return CLASS;
implements      return IMPLEMENTS;
var             return VAR;
method          return METHOD;
int             return INT;
return          return RETURN;
if              return IF;
then            return THEN;
else            return ELSE;
while           return WHILE;
do              return DO;
not             return NOT;
and             return AND;
new             return NEW;
this            return THIS;
null            return _NULL;
":"             return COL;
";"             return SCOL;
"("             return BRACL;
")"             return BRACR;
"."             return DOT;
","             return COMMA;
"="             return ASSIGNMENT;
"+"             return PLUS;
"-"             return MINUS;
"*"             return ASTERISK;
"<"             return LT;


{decNum}        {
            printf("dec\n");
                    //yylval = atoi(yytext);
                    return DEC; 
                }
{hexNum}        {   printf("hex\n");/*
                    const int len = strlen(yytext)-1;
                    char* substr = (char*) malloc(sizeof(char) * len);
                    strncpy(substr,yytext,len);
                    yylval = (int)strtol
                            ( substr
                            , NULL
                            , 16);
                    free (substr);*/
                    return HEX;
                }
{identifier}    {
                    //yylval= (char *) malloc(sizeof(char)*strlen(yytext));
                    //strcpy(yylval, yytext);
                    return ID; 
                }
{whitespace}    {}
"/*"        { printf("begin comment\n"); BEGIN InComment;}
}
{newline}       line_number++;
<InComment>{
"*/"        { printf("end comment\n"); BEGIN INITIAL; }
{anything} {}
}
"*/"        lexerror("illegal commenting token");
.               lexerror("Illegal input");

%%

void lexerror(char *message)
{
   fprintf(stderr,"Error: \"%s\" in line %d. = %s\n",
           message,line_number,yytext);
   exit(1);
}

and here my complete parser:

%{
# include <stdio.h>
# include <stdlib.h>

int yylex(void);
void yyerror(char *);
extern int line_number;
%}

%start Program

%token INTERFACE END CLASS IMPLEMENTS VAR METHOD INT RETURN IF THEN ELSE
%token WHILE DO NOT AND NEW THIS _NULL EOC SCOL COL BRACL BRACR DOT COMMA 
%token ASSIGNMENT PLUS ASTERISK MINUS LT EQ DEC HEX ID NEWLINE


%% 
Program: INTERFACE Interface SCOL Program     { printf("interface\n"); }
       | CLASS Class SCOL Program         { printf("class\n");} 
       |
       | error { printf("error on: %s\n", $$); }
       ;  
 
Interface: ID COL 
             InterfaceContents  
           END
         //| error              { printf("error in interface description: %s\n", $$); }
         ;  

InterfaceContents: AbstractMethod InterfaceContents //{ printf("many methods");}
        | AbstractMethod           //{ printf("one method"); }
        |                  //{ printf("no method"); }
        ;


AbstractMethod: ID BRACL Types BRACR COL Type
              //| error { printf("error in abstract method definition: %s\n", $$);}
              ;

Types : Type COMMA Types
      | Type 
      | 
      ;

Class: ID 
       IMPLEMENTS MaybeIDs COL
         Members
       END  
     | error { printf("error in class definition: %s\n", $$); }   
     ;  

MaybeIDs: ID MaybeIDs
    | ID
    | 
    ;

Members: Member SCOL Members
       | Member SCOL
       | 
       ;

Member: VAR ID COL Type  
      | METHOD ID BRACL Pars BRACR Stats END
      //| error { printf("error in member definition: %s\n", $$); }
      ;  
 
Type: INT  
    | ID 
    ;  
 
Pars: Par COMMA Pars
    | Par
    | 
    ;  
 
Par: ID COL Type 
   ;  
 
Stats: Stat SCOL Stats
     | Stat SCOL
     | error { printf("error in statement: %s\n" , $$); }
     ;  
 
Stat: RETURN Expr  
    | IF Expr THEN Stats MaybeElse END  
    | WHILE Expr DO Stats END  
    | VAR ID COL Type COL ASSIGNMENT Expr
    | ID COL ASSIGNMENT Expr 
    | Expr        
    | error { printf("error in statement: %s\n", $$); }
    ;  

MaybeElse : 
          | ELSE Stats
          ;

Expr: NOT Term  
    | NEW ID 
    | Term PLUS Term 
    | Term ASTERISK Term
    | Term AND Term
    | Term ArithOp Term  
    | Term
    | error { printf("error in expr: %s\n", $$); }
    ;  

ArithOp: MINUS
       | LT
       | ASSIGNMENT
       ;

Term: Term DOT ID BRACL Exprs BRACR
    | BRACL Expr BRACR
    | _NULL ID
    | ID 
    | Num
    | THIS  
    | error { printf("error in term: %i\n", $$); }
    ;

Num : HEX
    | INT
    //| error { printf("error in number"); }
    ;

Exprs : Expr COMMA Exprs
      | Expr
      //| error { printf("error in exprs\n"); }
      ;

%%
void yyerror(char *s) {
    fprintf(stderr, "Parse Error on line %i: %s\n", line_number, s);
    exit(2);
}

int main(void){
    yyparse();
}

here is also the makefile for the project:

CC    = gcc
LEX   = flex 
YAC   = bison

parser: parser.y lexer.l
    $(YAC) -d -Wconflicts-rr parser.y
    $(LEX) lexer.l
    $(CC)  parser.tab.c parser.tab.h lex.yy.c -o parser

clean:
    rm -f *.tab.h *.tab.c *.gch *.yy.c
    rm ./parser

i am testing with echo "some test" | ./parser or cat testfile | ./parser

Fabian Schneider
  • 799
  • 1
  • 13
  • 40
  • 1
    `Num: HEX | INT` should be `Num: HEX | DEC`. I suggest you learn how to use [Bison's built-in debugging traces](https://www.gnu.org/software/bison/manual/bison.html#Tracing) instead of trying to scatter printf calls into your actions. – rici Apr 19 '21 at 13:53
  • When you create string copies, you aren't leaving enough room for the null terminator. And using `strncpy` is a really bad idea because it doesn't null terminate. Whoever told you it was "safer" was wrong. – rici Apr 19 '21 at 13:56
  • @rici that was it thanks; apparently i am getting blind regarding my implementation; thanks for the link, this is most helpful to a beginner like myself ^^; regarding `strncpy`: I started using C in the course for which I have to write this assignment; I still have a lot to learn in C; – Fabian Schneider Apr 19 '21 at 14:02

0 Answers0