Description
I want to implement a syntax detector for json file with bison.
- I used
to recognize syntax error likeArray: LB RB | LB Values RB | LB Values COMMA RB error { puts("extra comma, recovered"); };
["extra comma",]
, but it failed. But the similar pattern forObject
works. I think it may relate to the pattern order. - However, when I use
it succeeded. I wonder whyValues: Value | Value COMMA Values | Value COMMA error { puts("extra comma, recovered"); }
won't mismatch the text that should beValue COMMA error { puts("extra comma, recovered"); }
Value COMMA Values
?
Source code
syntax.y
%{
#include"lex.yy.c"
void yyerror(const char*);
%}
%token LC RC LB RB COLON COMMA
%token STRING NUMBER
%token TRUE FALSE VNULL
%%
Json:
Value
;
Value:
Object
| Array
| STRING
| NUMBER
| TRUE
| FALSE
| VNULL
;
Object:
LC RC
| LC Members RC
| LC Member COMMA RC error { puts("extra comma, recovered"); }
;
Members:
Member
| Member COMMA Members
;
Member:
STRING COLON Value
;
Array:
LB RB
| LB Values RB
| LB Values RC error { puts("unmatched right bracket, recovered"); }
;
Values:
Value
| Value COMMA Values
| Value COMMA error { puts("extra comma, recovered"); }
;
%%
void yyerror(const char *s){
printf("syntax error: ");
}
int main(int argc, char **argv){
if(argc != 2) {
fprintf(stderr, "Usage: %s <file_path>\n", argv[0]);
exit(-1);
}
else if(!(yyin = fopen(argv[1], "r"))) {
perror(argv[1]);
exit(-1);
}
yyparse();
return 0;
}
lex.l
%{
#include"syntax.tab.h"
%}
%option noyywrap
unic u[0-9a-fA-F]{4}
esc \\([\"\\/bfnrt]|{unic})
scp [^"\\\x00-\x1f]
string \"({esc}|{scp})*\"
int 0|[1-9][0-9]*
frac \.[0-9]+
exp [Ee][+-]?[0-9]+
number -?{int}{frac}?{exp}?
empty [ \n\r\t]
%%
"{" { return LC; }
"}" { return RC; }
"[" { return LB; }
"]" { return RB; }
":" { return COLON; }
"," { return COMMA; }
"true" { return TRUE; }
"false" { return FALSE; }
"null" { return VNULL; }
{string} { return STRING; }
{number} { return NUMBER; }
{empty} {}
. { printf("lexical error: %s\n", yytext); }