As followup on this post:
I wrote a grammar that should be able to parse both of these inputs:
class x implements x: method hi() return ho1/* hi*/; end; end;
class x implements x: method hi() return 1 /* hi*/; end; end;
The first one parses fine, but when I invoke the parser with the second one I get this as output:
dec
Parse Error on line 0: syntax error
(the lexer prints dec
when it encounters a decimal number; i used this to test where the error is.)
my question is: why is this not working?
as i understand it:
Stat: RETURN Expr <<-
...
;
Expr: ...
| Term <<-
| ...
;
Term: ...
| ID
| Num <<-
| ...
;
Num : HEX
| INT <<-
;
this should be the the production for this input; as the way is essentially the same
for parsing ho1
(an ID
);
i don't understand why the identifier can be parsed but not the number.
- I tried adding
error
s to different productions to debug where the parser goes wrong but this just results in bison telling me that the rule is unusable because of shift-reduce conflicts. - I have also tried adding more debug output to the lexer to see how many more tokens the parser parses after the
num
ber, but apparently it is none.
i would appreciate some pointers/explanations as to where this behavior stems from and a hint at the solution.
here is my complete lexer:
%{
#include <stdio.h>
#include <stdlib.h>
#include "parser.tab.h"
#include <string.h>
int line_number = 0;
void lexerror(char *message);
%}
newline (\n|\r\n)
whitespace [\t \n\r]*
digit [0-9]
alphaChar [a-zA-Z]
alphaNumChar ({digit}|{alphaChar})
hexDigit ({digit}|[A-Fa-f])
decNum {digit}+
hexNum {digit}{hexDigit}*H
identifier {alphaChar}{alphaNumChar}*
number ({hexNum}|{decNum})
comment "/*"[.\r\n]*"*/"
anything .
%s InComment
%option noyywrap
%%
<INITIAL>{
interface return INTERFACE;
end return END;
class return CLASS;
implements return IMPLEMENTS;
var return VAR;
method return METHOD;
int return INT;
return return RETURN;
if return IF;
then return THEN;
else return ELSE;
while return WHILE;
do return DO;
not return NOT;
and return AND;
new return NEW;
this return THIS;
null return _NULL;
":" return COL;
";" return SCOL;
"(" return BRACL;
")" return BRACR;
"." return DOT;
"," return COMMA;
"=" return ASSIGNMENT;
"+" return PLUS;
"-" return MINUS;
"*" return ASTERISK;
"<" return LT;
{decNum} {
printf("dec\n");
//yylval = atoi(yytext);
return DEC;
}
{hexNum} { printf("hex\n");/*
const int len = strlen(yytext)-1;
char* substr = (char*) malloc(sizeof(char) * len);
strncpy(substr,yytext,len);
yylval = (int)strtol
( substr
, NULL
, 16);
free (substr);*/
return HEX;
}
{identifier} {
//yylval= (char *) malloc(sizeof(char)*strlen(yytext));
//strcpy(yylval, yytext);
return ID;
}
{whitespace} {}
"/*" { printf("begin comment\n"); BEGIN InComment;}
}
{newline} line_number++;
<InComment>{
"*/" { printf("end comment\n"); BEGIN INITIAL; }
{anything} {}
}
"*/" lexerror("illegal commenting token");
. lexerror("Illegal input");
%%
void lexerror(char *message)
{
fprintf(stderr,"Error: \"%s\" in line %d. = %s\n",
message,line_number,yytext);
exit(1);
}
and here my complete parser:
%{
# include <stdio.h>
# include <stdlib.h>
int yylex(void);
void yyerror(char *);
extern int line_number;
%}
%start Program
%token INTERFACE END CLASS IMPLEMENTS VAR METHOD INT RETURN IF THEN ELSE
%token WHILE DO NOT AND NEW THIS _NULL EOC SCOL COL BRACL BRACR DOT COMMA
%token ASSIGNMENT PLUS ASTERISK MINUS LT EQ DEC HEX ID NEWLINE
%%
Program: INTERFACE Interface SCOL Program { printf("interface\n"); }
| CLASS Class SCOL Program { printf("class\n");}
|
| error { printf("error on: %s\n", $$); }
;
Interface: ID COL
InterfaceContents
END
//| error { printf("error in interface description: %s\n", $$); }
;
InterfaceContents: AbstractMethod InterfaceContents //{ printf("many methods");}
| AbstractMethod //{ printf("one method"); }
| //{ printf("no method"); }
;
AbstractMethod: ID BRACL Types BRACR COL Type
//| error { printf("error in abstract method definition: %s\n", $$);}
;
Types : Type COMMA Types
| Type
|
;
Class: ID
IMPLEMENTS MaybeIDs COL
Members
END
| error { printf("error in class definition: %s\n", $$); }
;
MaybeIDs: ID MaybeIDs
| ID
|
;
Members: Member SCOL Members
| Member SCOL
|
;
Member: VAR ID COL Type
| METHOD ID BRACL Pars BRACR Stats END
//| error { printf("error in member definition: %s\n", $$); }
;
Type: INT
| ID
;
Pars: Par COMMA Pars
| Par
|
;
Par: ID COL Type
;
Stats: Stat SCOL Stats
| Stat SCOL
| error { printf("error in statement: %s\n" , $$); }
;
Stat: RETURN Expr
| IF Expr THEN Stats MaybeElse END
| WHILE Expr DO Stats END
| VAR ID COL Type COL ASSIGNMENT Expr
| ID COL ASSIGNMENT Expr
| Expr
| error { printf("error in statement: %s\n", $$); }
;
MaybeElse :
| ELSE Stats
;
Expr: NOT Term
| NEW ID
| Term PLUS Term
| Term ASTERISK Term
| Term AND Term
| Term ArithOp Term
| Term
| error { printf("error in expr: %s\n", $$); }
;
ArithOp: MINUS
| LT
| ASSIGNMENT
;
Term: Term DOT ID BRACL Exprs BRACR
| BRACL Expr BRACR
| _NULL ID
| ID
| Num
| THIS
| error { printf("error in term: %i\n", $$); }
;
Num : HEX
| INT
//| error { printf("error in number"); }
;
Exprs : Expr COMMA Exprs
| Expr
//| error { printf("error in exprs\n"); }
;
%%
void yyerror(char *s) {
fprintf(stderr, "Parse Error on line %i: %s\n", line_number, s);
exit(2);
}
int main(void){
yyparse();
}
here is also the makefile for the project:
CC = gcc
LEX = flex
YAC = bison
parser: parser.y lexer.l
$(YAC) -d -Wconflicts-rr parser.y
$(LEX) lexer.l
$(CC) parser.tab.c parser.tab.h lex.yy.c -o parser
clean:
rm -f *.tab.h *.tab.c *.gch *.yy.c
rm ./parser
i am testing with echo "some test" | ./parser
or cat testfile | ./parser