1

Lexer.l

%{
#include "y.tab.h"
%}

%%

"define" return(TK_KEY_DEFINE); 
"as" return(TK_KEY_AS);
"is" return(TK_KEY_IS);
"if" return(TK_KEY_IF);
"then" return(TK_KEY_THEN);
"else" return(TK_KEY_ELSE);
"endif" return(TK_KEY_ENDIF);
"with" return(TK_KEY_WITH);
"DEFINE" return(TK_KEY_DEFINE_UC);
"AS" return(TK_KEY_AS_UC);
"IS" return(TK_KEY_IS_UC);
"IF" return(TK_KEY_IF_UC);
"THEN" return(TK_KEY_THEN_UC);
"ELSE" return(TK_KEY_ELSE_UC);
"ENDIF" return(TK_KEY_ENDIF_UC);
"WITH" return(TK_KEY_WITH_UC);
"+" return(TK_PLUS);
"-" return(TK_MINUS); 
"*" return(TK_MUL);
"/" return(TK_DIV);
"~" return(TK_NOT); 
"&" return(TK_AND); 
"|" return(TK_OR);
"<=" return(TK_LEQ);
"<" return(TK_LESS);
">=" return(TK_GEQ); 
">" return(TK_GT); 
"==" return(TK_EQ);
"=" return(TK_ASSIGN);
"(" return(TK_OPEN);
")" return(TK_CLOSE);
";" return(TK_SEMI);
"," return(TK_COMMA);
[[:alpha:]_][[:alnum:]_]* return(IDENTIFIER);
[+-]?[0-9]+ return(INTEGER);
[+-]?([0-9]+([.][0-9]*)?|[.][0-9]+) return(REAL); 
[[:space:]]+ ;

%%

int yywrap(void)
{
  return 1;
}

Parser.y

%{

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

  typedef struct node
  {
    struct node *left;
    struct node *right;
    char *token;
  } node;

  node *mknode(node *left, node *right, char *token);
  void printtree(node *tree);

#define YYSTYPE struct node *

%}

%start Program

%token TK_KEY_DEFINE TK_KEY_DEFINE_UC
%token TK_KEY_AS TK_KEY_AS_UC
%token TK_KEY_IS TK_KEY_IS_UC 
%token TK_KEY_IF TK_KEY_IF_UC
%token TK_KEY_THEN TK_KEY_THEN_UC
%token TK_KEY_ELSE TK_KEY_ELSE_UC
%token TK_KEY_ENDIF TK_KEY_ENDIF_UC
%token TK_KEY_WITH TK_KEY_WITH_UC
%token TK_PLUS TK_MINUS
%token TK_MUL TK_DIV
%token TK_NOT
%token TK_AND
%token TK_OR 
%token TK_LEQ TK_LESS TK_GEQ TK_GT 
%token TK_EQ
%token TK_ASSIGN 
%token TK_OPEN TK_CLOSE
%token TK_SEMI
%token TK_COMMA
%token IDENTIFIER
%token INTEGER 
%token REAL


%left   TK_PLUS  TK_MINUS
%left   TK_MUL TK_DIV
%left   TK_LEG TK_LESS TK_GEQ TK_GT 
%left   TK_AND TK_OR
%left   TK_EQ
%right  TK_NOT TK_ASSIGN

%%


Program  : Macros Statements;

Macros   :   /* empty */
         | Macro Macros
         ;

Macro    : TK_KEY_DEFINE MacroTemplate TK_KEY_AS Expression;

MacroTemplate : IDENTIFIER MT;

MT            : /*empty*/
              | TK_OPEN IdentifierList TK_CLOSE 
              ; 

IdentifierList : IDENTIFIER I;

I : /*empty*/
  | TK_COMMA IdentifierList 
  ;

Statements : /*empty*/
           | Statement Statements 
           ;

IfStmt : TK_KEY_IF Condition TK_KEY_THEN Statements TK_KEY_ELSE Statements TK_KEY_ENDIF;

Statement : AssignStmt 
          | IfStmt
          ;

AssignStmt : IDENTIFIER TK_KEY_IS Expression;



Condition : C1 C11;

C11 : /*empty*/
    | TK_OR C1 C11
    ;

C1 : C2 C22;

C22 : /*empty*/
    | TK_AND C2 C22
    ; 

C2 : C3 C33;

C33 : TK_EQ C3 C33;

C3 : C4 C44;

C44 : /*empty*/
    | TK_LESS C4 C44 
    | TK_LEQ C4 C44 
    | TK_GT C4 C44  
    | TK_GEQ C4 C44
    ;

C4 : TK_NOT C5 | C5;

C5 : INTEGER | REAL | TK_OPEN Condition TK_CLOSE;

Expression : Term EE;

EE : /*empty*/
   | TK_PLUS Term EE
   | TK_MINUS Term EE
   ;

Term : Factor TT;

TT : /*empty*/
   | TK_MUL Factor TT 
   | TK_DIV Factor TT 
   ;

Factor : IDENTIFIER | REAL | INTEGER | TK_OPEN Expression TK_CLOSE;

%%

int main (void) {return yyparse ( );}

node *mknode(node *left, node *right, char *token)
{
  /* malloc the node */
  node *newnode = (node *)malloc(sizeof(node));
  char *newstr = (char *)malloc(strlen(token)+1);
  strcpy(newstr, token);
  newnode->left = left;
  newnode->right = right;
  newnode->token = newstr;
  return(newnode);
}

void printtree(node *tree)
{
  int i;
  if (tree->left || tree->right)
    printf("(");

  printf(" %s ", tree->token);

  if (tree->left)
    printtree(tree->left);
  if (tree->right)
    printtree(tree->right);

  if (tree->left || tree->right)
    printf(")");
}

int yyerror (char *s) 
{
fprintf (stderr, "%s\n", s);
}

I wish the output to a parse tree if no errors and indicate error if any . But I get a lot of warnings such as

 warning: rule useless in grammar 
 warning: nonterminal useless in grammar

I understood the reason of this by reading other similar questions but could not correct it myself. Please help me solve this . Thanks !

Hi rici ,

Thank you so much , so I need not worry about left recursion , left factored grammar etc and directly go ahead and use something like below in yacc ?

%% 

Program : Macros Statements;

Macros : /*empty*/ 
       |Macro Macros
       ; 

Macro : TK_KEY_DEFINE MacroTemplate TK_KEY_AS Expression;

MacroTemplate : VarTemplate
              | FunTemplate
          ;

VarTemplate : IDENTIFIER;

FunTemplate : IDENTIFIER TK_OPEN IdentifierList TK_CLOSE;

IdentifierList : IDENTIFIER TK_COMMA IdentifierList
               | IDENTIFIER
               ;

Statements : /*empty*/
           | Statement Statements 
           ;

IfStmt : TK_KEY_IF Condition TK_KEY_THEN Statements TK_KEY_ELSE Statements TK_KEY_ENDIF;

Statement : AssignStmt 
          | IfStmt
          ;

AssignStmt : IDENTIFIER TK_KEY_IS Expression;

Condition : Condition TK_AND Condition
          | Condition TK_OR Condition
          | Condition TK_LESS Condition 
          | Condition TK_LEQ Condition
          | Condition TK_GT Condition 
          | Condition TK_GEQ Condition 
      | Condition TK_EQ Condition
          | TK_NOT Condition 
          | TK_OPEN Condition TK_CLOSE
          | INTEGER
          | REAL
          ;

Expression : Expression TK_PLUS Expression
           | Expression TK_MINUS Expression 
           | Expression TK_MUL Expression 
           | Expression TK_DIV Expression 
           | TK_OPEN Expression TK_CLOSE
           | IDENTIFIER 
           | INTEGER
           | REAL
           ;

%%

Also yes , I noted your last point :)

Nick
  • 47
  • 1
  • 8

1 Answers1

1

Unlike C11, C22, C44 and other "tail" rules, which can produce %empty, C33 has only one production:

C33 : TK_EQ C3 C33;

Since it has no non-recursive production, it cannot possibly produce a sentence (consisting only of non-terminals). And since it is part of the only production for C2 which is part of the only production for C1 which is part of the only production for Condition which is part of the only production for IfStmt, none of those can produce any sentence either. A rule which cannot produce any sentence is technically described as "useless" and a non-terminal all of whose rules are useless (or whose only rule is useless) is a "useless non-terminal".

There is another category of useless non-terminals: those which cannot be produced by any useful rule. That will be the case with C4 (which can only be produced by C3, which has been discovered to be useless) and thus with C44 and C5.

It should be evident how to fix that, but I'd like to note that you are tying yourself into knots by trying to avoid left-recursion, which is both unnecessary and counter-productive when using a bottom-up parser generator such as bison/yacc. (See the last paragraph of this answer for a longer grumble about this.) The artificial productions (C33 and friends) serve only to complicate the parse tree.

Also, since your grammar is not ambiguous -- in effect, the production rules clearly define operator binding strengths -- the various precedence declarations are pointless. (Unlike "useless", that is not a technical term :-) ). Precedence declarations are only applied to resolve grammatical ambiguity, which is not present here.

Finally, I think you should re-examine your grammar for Conditions. What, for example, is the meaning of ~3 < ~4? And why is x * 2 < y not valid?

rici
  • 234,347
  • 28
  • 237
  • 341