3

I have found a parsing algorithm here, however it is in ML and I'm not too familiar with it. For better understanding of the algorithm I am trying to translate it to an imperative language like C++. Now thee are a few things I'm not sure of or don't really understand.

Here is a header for parsing a postfix expression (AFAIK this is technically not a header, but a match but I am not familiar with functional terms):

parse_postfix(stack, (e, []), 
                    ipts as RATOR (irator as (_, _, POSTFIX)) :: ipts') =

This means that ipts is the head of the list ipts' and is a postfix operator? Why is there another match inside (irator as...)? Does it remove it from the list or advances anyway? Or is ipts the remainder of the list when the operator irator is removed?

I'm having a hard time translating this. Here's what I've coded so far:

#include <iostream>
#include <map>
#include <stack>
#include <string>
#include <vector>

enum Assoc { Left, Right, Noassoc };
enum Fixity { Prefix, Infix, Postfix };

struct Oper {
    std::string Symbol;
    int Precedence;
    Fixity Fix;     // We can't represent bound types that way (INFIX <assoc>)
    Assoc Asc;      // so we just make it have the operator anyway

    Oper(std::string const& s, int p, Fixity f, Assoc a)
        : Symbol(s), Precedence(p), Fix(f), Asc(a) { }
};

// A regular AST representation
struct Expr { };
struct ConstExpr : public Expr {
    int Value;

    ConstExpr(int i) : Value(i) { }
};
struct UryExpr : public Expr {
    const Expr *Sub;
    Oper *OP;

    UryExpr(const Expr *s, Oper *o)
        : Sub(s), OP(o) { }
};
struct BinExpr : public Expr {
    const Expr *LHS, *RHS;
    Oper *OP;

    BinExpr(const Expr *l, const Expr *r, Oper *o)
        : LHS(l), RHS(r), OP(o) { }
};

bool noparens(Oper *inner, Oper *outer, Assoc side) {
    int pi = inner->Precedence, po = outer->Precedence;
    Fixity fi = inner->Fix, fo = outer->Fix;
    Assoc ai = inner->Asc, ao = outer->Asc;
    if (pi > po) return true;
    if (side == Left && fi == Postfix) return true;
    if (side == Left && fi == Infix && ai == Left) return (fo == Infix && ao == Left);
    if (side == Right && fi == Postfix) return true;
    if (side == Right && fi == Infix && ai == Right) return (fo == Infix && ao == Right);
    if (side == Noassoc) {
        if (fi == Infix && fo == Infix) return ai == ao;
        return fi == fo;
    }
    return false;
}

struct StackElem {
    Oper *infixop;
    const Expr *exp;
    std::vector<Oper*> prefixes;

    StackElem(Oper* i, const Expr* e, std::vector<Oper*> pref) 
        : infixop(i), exp(e), prefixes(pref) {}
};
std::map<std::string, Oper*> OperatorMap;
Oper *juxtarator = new Oper(" <juxtarator> ", 100, Infix, Left);
Oper *minrator = new Oper(" <minimal precedence operator> ", -1, Infix, Noassoc);
Oper *srator(std::stack<StackElem> const& st) { return (st.empty() ? minrator : st.top().infixop); }

Oper* get_op(std::string s) {
    auto it = OperatorMap.find(s);
    if (it == OperatorMap.end()) return nullptr;
    return it->second;
}

Expr* parse_postfix(const std::stack<StackElem> stack, const Expr* e, const std::vector<Oper*> prefixes, const std::vector<std::string> ipts);

Expr* parse_prefix(const std::stack<StackElem> stack, const std::vector<Oper*> prefixes, const std::vector<std::string> ipts) {
    if (!ipts.empty()) {
        std::string head = ipts[0];
        std::vector<std::string> tail(ipts.begin() + 1, ipts.end());

        Oper* op = get_op(head);
        if (!op) return parse_postfix(stack, new ConstExpr(std::atoi(head.c_str())), prefixes, tail);
        if (op->Fix == Prefix) {
            std::vector<Oper*> newprefix = prefixes;
            newprefix.push_back(op);
            return parse_prefix(stack, prefixes, tail);
        }
        else throw std::string("Lookahead is not a prefix operator");
    }
    else throw std::string("Premature EOF");
}

Expr* parse_postfix(const std::stack<StackElem> stack, const Expr* e, const std::vector<Oper*> prefixes, const std::vector<std::string> ipts)
{
    if (prefixes.empty() && !ipts.empty()) {
        std::string head = ipts[0];
        std::vector<std::string> tail(ipts.begin() + 1, ipts.end());

        Oper* irator = get_op(head);
        if (irator) {
            if (irator->Fix == Postfix) {
                if (noparens(srator(stack), irator, Left)) {
                    if (!stack.empty()) {
                        StackElem el = stack.top();
                        std::stack<StackElem> stack_tail = stack;
                        stack_tail.pop();
                        return parse_postfix(stack_tail, new BinExpr(el.exp, e, el.infixop), el.prefixes, ipts);
                    } 
                    else throw std::string("Impossible");
                }
                else if (noparens(irator, srator(stack), Right)) {
                    return parse_postfix(stack, new UryExpr(e, irator), std::vector<Oper*>(), tail);
                }
                else throw std::string("Non-associative");
            }
            else if (irator->Fix == Infix) {
                if (noparens(srator(stack), irator, Left)) {
                    if (!stack.empty()) {
                        StackElem el = stack.top();
                        std::stack<StackElem> stack_tail = stack;
                        stack_tail.pop();
                        return parse_postfix(stack_tail, new BinExpr(el.exp, e, el.infixop), el.prefixes, ipts);
                    }
                    else throw std::string("Impossible");
                }
                else if (noparens(irator, srator(stack), Right)) {
                    std::stack<StackElem> newstack = stack;
                    newstack.push(StackElem(irator, e, std::vector<Oper*>()));
                    return parse_prefix(newstack, std::vector<Oper*>(), tail);
                }
                else throw std::string("Non-associative");
            }
        }
    }
    else if (!prefixes.empty() && !ipts.empty()) {
        std::string head = ipts[0];
        std::vector<std::string> tail(ipts.begin() + 1, ipts.end());
        Oper* op = prefixes[0];
        std::vector<Oper*> newprefixes(prefixes.begin() + 1, prefixes.end());

        Oper* irator = get_op(head);
        if (irator) {
            if (irator->Fix == Postfix) {
                if (noparens(op, irator, Noassoc)) {
                    return parse_postfix(stack, new UryExpr(e, op), newprefixes, ipts);
                }
                else if (noparens(irator, op, Noassoc)) {
                    return parse_postfix(stack, new UryExpr(e, irator), prefixes, tail);
                }
                else throw std::string("Equal precedence!");
            }
            else if (irator->Fix == Infix) {
                if (noparens(op, irator, Noassoc)) {
                    parse_postfix(stack, new UryExpr(e, op), newprefixes, ipts);
                }
                else if (noparens(irator, op, Noassoc)) {
                    std::stack<StackElem> newstack = stack;
                    newstack.push(StackElem(irator, e, prefixes));
                    return parse_prefix(newstack, std::vector<Oper*>(), tail);
                }
                else throw std::string("Equal precedence!");
            }
        }
    }

    std::vector<std::string> nnip = ipts;
    nnip.insert(nnip.begin(), juxtarator->Symbol);
    return parse_postfix(stack, e, prefixes, nnip);
}

Expr* parse(std::vector<std::string> input) {
    return parse_prefix(std::stack<StackElem>(), std::vector<Oper*>(), input);
}

int main(void)
{
    OperatorMap.insert(std::make_pair(minrator->Symbol, minrator));
    OperatorMap.insert(std::make_pair(juxtarator->Symbol, juxtarator));
    OperatorMap.insert(std::make_pair("+", new Oper("+", 3, Infix, Left)));
    std::vector<std::string> tokens = { "2", "+", "3" };
    try {
        Expr* e = parse(tokens);
    }
    catch (std::string err) {
        std::cout << "Error: " << err << std::endl;
    }

    system("PAUSE");
    return 0;
}

I'm hoping that this part is corect with parse prefix but I don't know how about implementing the parse_postfix function.

Edit:

Now this tries to be the full test program but it fails for some reason, as for the input "2" "+" "3" (or even just a single number) an exception is triggered (Premature EOF).

Community
  • 1
  • 1
Peter Lenkefi
  • 1,306
  • 11
  • 29

1 Answers1

2
parse_postfix(stack, (e, []),
              ipts as RATOR (irator as (_, _, POSTFIX)) :: ipts') = ...

This means that ipts is the head of the list ipts' and is a postfix operator?

Not exactly. the as match operator actually binds less tight than pattern constructors like ::; adding proper parentheses, ipts becomes the full list with RATOR ... as the head and ipts' (one element short) as the tail:

parse_postfix(stack, (e, []),
              ipts as (RATOR (irator as (_, _, POSTFIX)) :: ipts')) = ...

Why is there another match inside (irator as...)?

Here the as match operator is used for two distinct purposes:

  1. The ipts as (... :: ipts') and the irator as (_, _, POSTFIX) patterns are used to guarantee that the variables ipts and irator cover things of a particular sub-structure, so in the function body it is guaranteed that ipts is never empty and that irator is always a postfix-style rator (since otherwise it's not parse_postfix's job to handle it).

  2. As a small performance enhancement. Norman could also have written e.g.

    parse_postfix(stack, (e, []),
                  RATOR (text, prec, POSTFIX) :: ipts') = ...
    

    and subsequently refer to RATOR (text, prec, POSTFIX) whenever he refers to irator and RATOR (text, prec, POSTFIX :: ipts' whenever he refers to ipts. But this is both longer, harder to read, and requires re-construction of values that are already constructed in memory when referring to irator and ipts (i.e. less copying).

    Instead, the helper function noparens, the value constructor UNARY, the exception ParseError, etc. are all designed to handle the irator 3-tuple directly for that convenience.

Does it remove it from the list or advances anyway? Or is ipts the remainder of the list when the operator irator is removed?

Sometimes, and almost. ipts' is the remainder of the list when irator has been removed while ipts is the full list with no elements removed. Depending on whether ipts or ipts' are referred to in the if-then-elses, an element is popped or not.

I'm hoping that this part is corect with parse prefix but I don't know how about implementing the parse_postfix function.

I can't say right now. But one thing is certain: It will be much simpler to translate these functions if you stick with immutable data structures. It won't run as fast, though.

sshine
  • 15,635
  • 1
  • 41
  • 66