7

I'm trying to learn about parsing expressions.I found recursive descent parse seems easy to do this. From wikipedia,I found an example in C. So,I start reading and editing this code to understand how it works. I written the missing routines according to descriptions on wikipedia's page,but it doesn't work from any expression as I expected. For example: 1+2*3+1; returns

error: statement: syntax error

Can someone explain what am I missing?

The current C code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef enum {ident, number, lparen, rparen, 
          times, // symbol *
          slash, // symbol \ not added yet
          plus, // symbol +
          minus, //symbol -
          eql, //symbol ==
          neq, // !=
          lss, // <
          leq,// <=
          gtr, // >
          geq, // >= 
          callsym, // not added yet 
          beginsym, // not added yet 
          semicolon, // ;
          endsym, 
          ifsym, whilesym, becomes, thensym, dosym, constsym, 
          comma, //:
          varsym, procsym, period, oddsym, 
          not, // !
          eq // =
} Symbol;

Symbol sym;
int peek;
void getsym(void);
void error(const char*);
void expression(void);
void program(void);
void nexttok(void);

#define is_num(c) ((c) >= '0' && (c) <= '9')
#define is_letter(c) ((c) <= 'a' && (c) <= 'z' || (c) >= 'A' && (c) <= 'Z')

int main(void)
{
  program();
  return 0;
}

void nexttok(void)
{
  peek = getchar();
}

void getsym(void)
{
  for(;;) {
    nexttok();
    if(peek == ' ' || peek == '\t') continue;
    else if(peek == EOF) break;
    else break;
  }
  //static char buf[256];
  switch(peek) {
  case '+': sym = plus; return;
  case '-': sym = minus; return;
  case '*': sym = times; return;
  case ';': sym = semicolon; return;
  case ':': sym = comma; return;
  case '=': 
    nexttok();
    if(peek == '=')
      sym = eql;
    else
      sym = eq;
    return;
  case '!': 
    nexttok();
    if(peek == '=')
      sym = neq;
    else
      sym = not;
    return;
  case '<':
    nexttok();
    if(peek == '=')
      sym = leq;
    else
      sym = lss;
    return;
  case '>':
    nexttok();
    if(peek == '=')
      sym = geq;
    else
      sym = gtr;
    return;
  }
  if(is_num(peek)) {
    sym = number;
    return;
  }
}

int accept(Symbol s) {
    if (sym == s) {
        getsym();
        return 1;
    }
    return 0;
}

int expect(Symbol s) {
    if (accept(s))
        return 1;
    error("expect: unexpected symbol");
    return 0;
}

void factor(void) {
    if (accept(ident)) {
        ;
    } else if (accept(number)) {
        ;
    } else if (accept(lparen)) {
        expression();
        expect(rparen);
    } else {
        error("factor: syntax error");
        getsym();
    }
}

void term(void) {
    factor();
    while (sym == times || sym == slash) {
        getsym();
        factor();
    }
}

void expression(void) {
    if (sym == plus || sym == minus)
        getsym();
    term();
    while (sym == plus || sym == minus) {
        getsym();
        term();
    }
}

void condition(void) {
    if (accept(oddsym)) {
        expression();
    } else {
        expression();
        if (sym == eql || sym == neq || sym == lss || sym == leq || sym == gtr || sym == geq) {
            getsym();
            expression();
        } else {
            error("condition: invalid operator");
            getsym();
        }
    }
}

void statement(void) {
    if (accept(ident)) {
        expect(becomes);
        expression();
    } else if (accept(callsym)) {
        expect(ident);
    } else if (accept(beginsym)) {
        do {
            statement();
        } while (accept(semicolon));
        expect(endsym);
    } else if (accept(ifsym)) {
        condition();
        expect(thensym);
        statement();
    } else if (accept(whilesym)) {
        condition();
        expect(dosym);
        statement();
    } else {
        error("statement: syntax error");
        getsym();
    }
}

void block(void) {
    if (accept(constsym)) {
        do {
            expect(ident);
            expect(eql);
            expect(number);
        } while (accept(comma));
        expect(semicolon);
    }
    if (accept(varsym)) {
        do {
            expect(ident);
        } while (accept(comma));
        expect(semicolon);
    }
    while (accept(procsym)) {
        expect(ident);
        expect(semicolon);
        block();
        expect(semicolon);
    }
    statement();
}

void program(void) {
    getsym();
    block();
    expect(period);
}

void error(const char *msg)
{
  fprintf(stderr,"error: %s\n",msg);
  exit(1);
}
Jack
  • 16,276
  • 55
  • 159
  • 284
  • [Another example of a recursive descent parser in a small C compiler](https://github.com/alexfru/SmallerC). – Alexey Frunze Apr 21 '13 at 03:53
  • @AlexeyFrunze: I'II check out this! Thanks very much! – Jack Apr 21 '13 at 03:58
  • @AlexeyFrunze: Was stack-based the model usded for parsing expression in this compiler? – Jack Apr 21 '13 at 04:32
  • In a way, yes. First, there's the stack due to recursive calls. Next, I'm using Shunting Yard algorithm, which implies the use of two stack data structures in the process, and in the end the result is in the Reverse Polish Notation, which needs to be evaluated in a stack data structure. And the latter is done either at compile time (e.g. when folding constant subexpressions) or at run time of the generated code (the CPU stack is used at that point). – Alexey Frunze Apr 21 '13 at 04:36

1 Answers1

10

statement never calls expression, so all expressions are going to be syntax errors. To fix this, you'd need to change statement so it will call expression if sym is a valid symbol to start an expression. (accept would be unacceptable because it would consume the symbol and expression would not see it.)

icktoofay
  • 126,289
  • 21
  • 250
  • 231