I should note at the outset that I'm no computer scientist and have no real experience writing compiler code. So there may be glaring holes in the implementation or even the basic ideas. But if you want the thoughts of a work-a-day programmer who found this an interesting problem, here they are.
We can write a pda
function that simply recognizes our grammar, one that we can use like this. (Here we go only from aaa
to ccc
, but you could easily extend it to eee
or whatever.)
const {push: PUSH, pop: POP} = pda
const myParser = pda ('S', ['S'], [
// ^ ^
// | `----------------- accepting states
// +----------------------- initial state
// +----------------------------------------- current state
// | +-------------------------------- token
// | | +--------------------------- top of stack
// | | | +-------------------- new state
// | | | | +---------- stack action
// V V V V V
[ 'S', '+aaa', '', 'A', PUSH ('A') ],
[ 'S', '+bbb', '', 'B', PUSH ('B') ],
[ 'S', '+ccc', '', 'C', PUSH ('C') ],
[ 'A', '+aaa', '', 'A', PUSH ('A') ],
[ 'A', '-aaa', 'AA', 'A', POP ],
[ 'A', '-aaa', 'BA', 'B', POP ],
[ 'A', '-aaa', 'CA', 'C', POP ],
[ 'A', '-aaa', '', 'S', POP ],
[ 'A', '+bbb', '', 'B', PUSH ('B') ],
[ 'A', '+ccc', '', 'C', PUSH ('C') ],
[ 'B', '+aaa', '', 'A', PUSH ('A') ],
[ 'B', '+bbb', '', 'B', PUSH ('B') ],
[ 'B', '-bbb', 'AB', 'A', POP ],
[ 'B', '-bbb', 'BB', 'B', POP ],
[ 'B', '-bbb', 'CB', 'C', POP ],
[ 'B', '-bbb', '', 'S', POP ],
[ 'B', '+ccc', '', 'C', PUSH ('C') ],
[ 'C', '+aaa', '', 'A', PUSH ('A') ],
[ 'C', '+bbb', '', 'B', PUSH ('B') ],
[ 'C', '+ccc', '', 'C', PUSH ('C') ],
[ 'C', '-ccc', 'AC', 'A', POP ],
[ 'C', '-ccc', 'BC', 'B', POP ],
[ 'C', '-ccc', 'CC', 'C', POP ],
[ 'C', '-ccc', '', 'S', POP ],
])
And we would use it to test a series of tokens, like this:
myParser (['+aaa', '-aaa']) //=> true
myParser (['+aaa', '-bbb']) //=> false
myParser (['+aaa', '+bbb', '+ccc', '-ccc', '+aaa', '-aaa', '-bbb', '-aaa']) //=> true
This is not exactly to the mathematical definition of a PDA. We don't have a symbol to delineate the beginning of the stack, and we test the top two values of the stack, not just the top one. But it's reasonably close.
However, this just reports whether a string of tokens is in the grammar. You want something more than that. You need to use this to build a syntax tree. It's very difficult to see how to do this in the abstract. But it's easy enough to generate a sequence of events from that parsing that you could use. One approach would be just to capture the new node value at every push to the stack and capture every pop from the stack.
With that, we might tie to something like this:
const forestBuilder = () => { // multiple-rooted, so a forest not a tree
const top = (xs) => xs [ xs .length - 1 ]
const forest = {children: []}
let stack = [forest]
return {
push: (name) => {
const node = {name: name, children: []}
top (stack) .children .push (node)
stack.push (node)
},
pop: () => stack.pop(),
end: () => forest.children
}
}
const {push, pop, end} = forestBuilder ()
push ('aaa')
push ('bbb')
pop ()
push ('ccc')
push ('aaa')
pop()
pop()
pop()
push ('bbb')
push ('aaa')
end()
which would yield something like this:
[
{
"name": "aaa",
"children": [
{
"name": "bbb",
"children": []
},
{
"name": "ccc",
"children": [
{
"name": "aaa",
"children": []
}
]
}
]
},
{
"name": "bbb",
"children": [
{
"name": "aaa",
"children": []
}
]
}
]
So if we supply our pda function with some event listeners for the pushes and pops (also for completion and errors), we might be able to build your tree from a series of tokens.
Here is one attempt to do this:
console .clear ()
const pda = (() => {
const PUSH = Symbol(), POP = Symbol()
const id = (x) => x
return Object .assign (
(start, accepting, transitions) =>
(tokens = [], onPush = id, onPop = id, onComplete = id, onError = () => false) => {
let stack = []
let state = start
for (let token of tokens) {
const transition = transitions .find (([st, tk, top]) =>
st == state &&
tk == token &&
(top .length == 0 || stack .slice (-top.length) .join ('') == top)
)
if (!transition) {
return onError (token, stack)
}
const [, , , nst, action] = transition
state = nst
action (stack)
if (action [PUSH]) {onPush (token)}
if (action [POP]) {onPop ()}
}
return onComplete (!!accepting .includes (state))
},{
push: (token) => Object.assign((stack) => stack .push (token), {[PUSH]: true}),
pop: Object.assign ((stack) => stack .pop (), {[POP]: true}),
}
)
})()
const {push: PUSH, pop: POP} = pda
const myParser = pda ('S', ['S'], [
// ^ ^
// | `----------------- accepting states
// +----------------------- initial state
// +----------------------------------------- current state
// | +-------------------------------- token
// | | +--------------------------- top of stack
// | | | +-------------------- new state
// | | | | +---------- stack action
// V V V V V
[ 'S', '+aaa', '', 'A', PUSH ('A') ],
[ 'S', '+bbb', '', 'B', PUSH ('B') ],
[ 'S', '+ccc', '', 'C', PUSH ('C') ],
[ 'A', '+aaa', '', 'A', PUSH ('A') ],
[ 'A', '-aaa', 'AA', 'A', POP ],
[ 'A', '-aaa', 'BA', 'B', POP ],
[ 'A', '-aaa', 'CA', 'C', POP ],
[ 'A', '-aaa', '', 'S', POP ],
[ 'A', '+bbb', '', 'B', PUSH ('B') ],
[ 'A', '+ccc', '', 'C', PUSH ('C') ],
[ 'B', '+aaa', '', 'A', PUSH ('A') ],
[ 'B', '+bbb', '', 'B', PUSH ('B') ],
[ 'B', '-bbb', 'AB', 'A', POP ],
[ 'B', '-bbb', 'BB', 'B', POP ],
[ 'B', '-bbb', 'CB', 'C', POP ],
[ 'B', '-bbb', '', 'S', POP ],
[ 'B', '+ccc', '', 'C', PUSH ('C') ],
[ 'C', '+aaa', '', 'A', PUSH ('A') ],
[ 'C', '+bbb', '', 'B', PUSH ('B') ],
[ 'C', '+ccc', '', 'C', PUSH ('C') ],
[ 'C', '-ccc', 'AC', 'A', POP ],
[ 'C', '-ccc', 'BC', 'B', POP ],
[ 'C', '-ccc', 'CC', 'C', POP ],
[ 'C', '-ccc', '', 'S', POP ],
])
const forestBuilder = () => {
const top = (xs) => xs [ xs .length - 1 ]
const forest = {children: []}
let stack = [forest]
return {
push: (name) => {
const node = {name: name .slice (1), children: []}
top (stack) .children .push (node)
stack.push (node)
},
pop: () => stack.pop(),
end: () => forest.children
}
}
const {push, pop, end} = forestBuilder ()
console .log (myParser (
["+ccc", "-ccc", "+aaa", "+bbb", "-bbb", "-aaa", "+bbb", "+aaa", "+ccc", "+bbb", "-bbb", "-ccc", "+ccc", "-ccc", "+aaa", "+ccc", "+ccc", "-ccc", "-ccc", "-aaa", "-aaa", "-bbb"],
push,
pop,
(accepted) => accepted ? end () : 'Error: ill-formed',
(token, stack) => `Error: token = '${token}', stack = ${JSON.stringify(stack)}`
))
.as-console-wrapper {max-height: 100% !important; top: 0}
There are lots of ways this could go. Perhaps the opening event should contain not only the token but the value pushed on the stack. There might be a good way to generate that table of transitions from a more declarative syntax. We might want a different version of the stack action column, one that takes strings instead of take functions. And so on. But it still might be a decent start.