I have found a parsing algorithm here, however it is in ML and I'm not too familiar with it. For better understanding of the algorithm I am trying to translate it to an imperative language like C++. Now thee are a few things I'm not sure of or don't really understand.
Here is a header for parsing a postfix expression (AFAIK this is technically not a header, but a match but I am not familiar with functional terms):
parse_postfix(stack, (e, []),
ipts as RATOR (irator as (_, _, POSTFIX)) :: ipts') =
This means that ipts
is the head of the list ipts'
and is a postfix operator? Why is there another match inside (irator as...
)? Does it remove it from the list or advances anyway? Or is ipts
the remainder of the list when the operator irator
is removed?
I'm having a hard time translating this. Here's what I've coded so far:
#include <iostream>
#include <map>
#include <stack>
#include <string>
#include <vector>
enum Assoc { Left, Right, Noassoc };
enum Fixity { Prefix, Infix, Postfix };
struct Oper {
std::string Symbol;
int Precedence;
Fixity Fix; // We can't represent bound types that way (INFIX <assoc>)
Assoc Asc; // so we just make it have the operator anyway
Oper(std::string const& s, int p, Fixity f, Assoc a)
: Symbol(s), Precedence(p), Fix(f), Asc(a) { }
};
// A regular AST representation
struct Expr { };
struct ConstExpr : public Expr {
int Value;
ConstExpr(int i) : Value(i) { }
};
struct UryExpr : public Expr {
const Expr *Sub;
Oper *OP;
UryExpr(const Expr *s, Oper *o)
: Sub(s), OP(o) { }
};
struct BinExpr : public Expr {
const Expr *LHS, *RHS;
Oper *OP;
BinExpr(const Expr *l, const Expr *r, Oper *o)
: LHS(l), RHS(r), OP(o) { }
};
bool noparens(Oper *inner, Oper *outer, Assoc side) {
int pi = inner->Precedence, po = outer->Precedence;
Fixity fi = inner->Fix, fo = outer->Fix;
Assoc ai = inner->Asc, ao = outer->Asc;
if (pi > po) return true;
if (side == Left && fi == Postfix) return true;
if (side == Left && fi == Infix && ai == Left) return (fo == Infix && ao == Left);
if (side == Right && fi == Postfix) return true;
if (side == Right && fi == Infix && ai == Right) return (fo == Infix && ao == Right);
if (side == Noassoc) {
if (fi == Infix && fo == Infix) return ai == ao;
return fi == fo;
}
return false;
}
struct StackElem {
Oper *infixop;
const Expr *exp;
std::vector<Oper*> prefixes;
StackElem(Oper* i, const Expr* e, std::vector<Oper*> pref)
: infixop(i), exp(e), prefixes(pref) {}
};
std::map<std::string, Oper*> OperatorMap;
Oper *juxtarator = new Oper(" <juxtarator> ", 100, Infix, Left);
Oper *minrator = new Oper(" <minimal precedence operator> ", -1, Infix, Noassoc);
Oper *srator(std::stack<StackElem> const& st) { return (st.empty() ? minrator : st.top().infixop); }
Oper* get_op(std::string s) {
auto it = OperatorMap.find(s);
if (it == OperatorMap.end()) return nullptr;
return it->second;
}
Expr* parse_postfix(const std::stack<StackElem> stack, const Expr* e, const std::vector<Oper*> prefixes, const std::vector<std::string> ipts);
Expr* parse_prefix(const std::stack<StackElem> stack, const std::vector<Oper*> prefixes, const std::vector<std::string> ipts) {
if (!ipts.empty()) {
std::string head = ipts[0];
std::vector<std::string> tail(ipts.begin() + 1, ipts.end());
Oper* op = get_op(head);
if (!op) return parse_postfix(stack, new ConstExpr(std::atoi(head.c_str())), prefixes, tail);
if (op->Fix == Prefix) {
std::vector<Oper*> newprefix = prefixes;
newprefix.push_back(op);
return parse_prefix(stack, prefixes, tail);
}
else throw std::string("Lookahead is not a prefix operator");
}
else throw std::string("Premature EOF");
}
Expr* parse_postfix(const std::stack<StackElem> stack, const Expr* e, const std::vector<Oper*> prefixes, const std::vector<std::string> ipts)
{
if (prefixes.empty() && !ipts.empty()) {
std::string head = ipts[0];
std::vector<std::string> tail(ipts.begin() + 1, ipts.end());
Oper* irator = get_op(head);
if (irator) {
if (irator->Fix == Postfix) {
if (noparens(srator(stack), irator, Left)) {
if (!stack.empty()) {
StackElem el = stack.top();
std::stack<StackElem> stack_tail = stack;
stack_tail.pop();
return parse_postfix(stack_tail, new BinExpr(el.exp, e, el.infixop), el.prefixes, ipts);
}
else throw std::string("Impossible");
}
else if (noparens(irator, srator(stack), Right)) {
return parse_postfix(stack, new UryExpr(e, irator), std::vector<Oper*>(), tail);
}
else throw std::string("Non-associative");
}
else if (irator->Fix == Infix) {
if (noparens(srator(stack), irator, Left)) {
if (!stack.empty()) {
StackElem el = stack.top();
std::stack<StackElem> stack_tail = stack;
stack_tail.pop();
return parse_postfix(stack_tail, new BinExpr(el.exp, e, el.infixop), el.prefixes, ipts);
}
else throw std::string("Impossible");
}
else if (noparens(irator, srator(stack), Right)) {
std::stack<StackElem> newstack = stack;
newstack.push(StackElem(irator, e, std::vector<Oper*>()));
return parse_prefix(newstack, std::vector<Oper*>(), tail);
}
else throw std::string("Non-associative");
}
}
}
else if (!prefixes.empty() && !ipts.empty()) {
std::string head = ipts[0];
std::vector<std::string> tail(ipts.begin() + 1, ipts.end());
Oper* op = prefixes[0];
std::vector<Oper*> newprefixes(prefixes.begin() + 1, prefixes.end());
Oper* irator = get_op(head);
if (irator) {
if (irator->Fix == Postfix) {
if (noparens(op, irator, Noassoc)) {
return parse_postfix(stack, new UryExpr(e, op), newprefixes, ipts);
}
else if (noparens(irator, op, Noassoc)) {
return parse_postfix(stack, new UryExpr(e, irator), prefixes, tail);
}
else throw std::string("Equal precedence!");
}
else if (irator->Fix == Infix) {
if (noparens(op, irator, Noassoc)) {
parse_postfix(stack, new UryExpr(e, op), newprefixes, ipts);
}
else if (noparens(irator, op, Noassoc)) {
std::stack<StackElem> newstack = stack;
newstack.push(StackElem(irator, e, prefixes));
return parse_prefix(newstack, std::vector<Oper*>(), tail);
}
else throw std::string("Equal precedence!");
}
}
}
std::vector<std::string> nnip = ipts;
nnip.insert(nnip.begin(), juxtarator->Symbol);
return parse_postfix(stack, e, prefixes, nnip);
}
Expr* parse(std::vector<std::string> input) {
return parse_prefix(std::stack<StackElem>(), std::vector<Oper*>(), input);
}
int main(void)
{
OperatorMap.insert(std::make_pair(minrator->Symbol, minrator));
OperatorMap.insert(std::make_pair(juxtarator->Symbol, juxtarator));
OperatorMap.insert(std::make_pair("+", new Oper("+", 3, Infix, Left)));
std::vector<std::string> tokens = { "2", "+", "3" };
try {
Expr* e = parse(tokens);
}
catch (std::string err) {
std::cout << "Error: " << err << std::endl;
}
system("PAUSE");
return 0;
}
I'm hoping that this part is corect with parse prefix but I don't know how about implementing the parse_postfix
function.
Edit:
Now this tries to be the full test program but it fails for some reason, as for the input "2" "+" "3" (or even just a single number) an exception is triggered (Premature EOF).