3

I am trying to build code that can generate MathML for Traditional Math Input. I am using JavaScript for coding. Are there any references or recommended readings I can go through to get a grasp of required algorithm? I am reading W3C standards for MathML which is a reference for standards but not for algorithm.

For Instance, for a sample input of

sqrt 9 * 5 + 20

I want to generate the MathML expression like below

<math xmlns='w3.org/1998/Math/MathML'>; <mrow> <mrow> <mn>5</mn> <mo>&#8290;</mo> <mn>9</mn> <mo>&#8290;</mo> <mi>SQRT</mi> </mrow> <mo>+</mo> <mn>20</mn> </mrow> </math>
JMD
  • 337
  • 4
  • 16

1 Answers1

1

I found a nice tutorial on MathML here: http://rypress.com/tutorials/mathml/basic-algebra.html and started to work out a very basic algebraic parser (for example, 4*sqrt(x+6)=(5-z)*y/7) with a crude stack for handling parentheses and an example sqrt function. Is this the direction you are after?

jsfiddle here: http://jsfiddle.net/alhambra1/bSJyE/

JavaScript code:

<script>
document.write('<p><input id="input" size=50>')
document.write('<button onclick="convertToMathML()">Convert</button></p>')
document.write('<div id="output"></div>')

function lex(str,ptr){
    var ascii = str.charCodeAt(ptr),
        lexeme = {string: "", type: ""},
        operators = {"+": "+"
                    , "-": "-"
                    , "*": "&times;"
                    , "/": "&divide;"
                    , "=": "="},
        functions = {sqrt: "msqrt"}

    //identify type
    if (ascii == 41)
        lexeme.type = "closeBracket"
    else if (ascii == 40){
        lexeme.type = "func"
        lexeme.func = "mfenced"
    }
    else if (ascii > 45 && ascii < 58 && ascii != 47)
        lexeme.type = "mn"
    else if ((ascii > 64 && ascii < 91) || (ascii > 96 && ascii < 123)){
        for (i in functions){
            if (str.substr(ptr,i.length).toLowerCase() == i){
                lexeme.type = "func"
                lexeme.func = functions[i]
                ptr += i.length - 1
            } else
                lexeme.type = "mi"
        }
    } else if (!operators[str.charAt(ptr)])
        return {string: str.charAt(ptr), type: "error", pointer: ptr}
    else
        lexeme.type = "mo"

    switch (lexeme.type){
        case "mo":
            lexeme.string = operators[str.charAt(ptr++)]
            break
        default:
            lexeme.string = str.charAt(ptr++)
            break
    }

    ascii = str.charCodeAt(ptr)

    //identify numbers and functions 
    if (lexeme.type == "mn"){
        while (ptr < str.length && ascii > 45 && ascii < 58 && ascii != 47){
            lexeme.string += str.charAt(ptr)
            ascii = str.charCodeAt(++ptr)
        }
    } else if (lexeme.type == "func" && lexeme.func != "mfenced"){
        while (ptr < str.length && str.substr(ptr).match(/^\s/)){
            ascii = str.charCodeAt(++ptr)
        }
        if (str.charAt(ptr) != "(")
            return {string: str.charAt(ptr), type: "error", pointer: ptr}
        else
            ptr++
    }

    lexeme["pointer"] = ptr

    return lexeme
}

function markup(lexeme){
    return "<" + lexeme.type + ">\n"
           + lexeme.string + "\n"
           + "</" + lexeme.type + ">\n"
}

function convertToMathML(){
    var str = document.getElementById('input').value,
        expression = "",
        ptr = 0,
        stack = []

    while (ptr < str.length){
        var currLexeme = lex(str,ptr)

        if (currLexeme.type == "closeBracket"){
            if (stack.length == 0)
                expression = "Extra bracket at: " + (currLexeme.pointer - 1)
            else
                expression += "</" + stack.pop().func + ">\n"
                            + "</mrow>"          
            ptr = currLexeme.pointer
        } else if (currLexeme.type == "error"){
            expression = "Cannot parse \"" + currLexeme.string
                       + "\" at " + currLexeme.pointer
            break
        } else if (currLexeme.type == "func"){
            expression += "<" + currLexeme.func + ">\n"
                        + "<mrow>\n"
            stack.push(currLexeme)
            ptr = currLexeme.pointer
        } else {
            expression += markup (currLexeme)
            ptr = currLexeme.pointer
        }
    }

    if (ptr >= str.length && stack.length > 0)
        expression = "Missing " +  stack.length + " closing bracket/s."

    expression = "<math xmlns='http://www.w3.org/1998/Math/MathML'>"
               + expression + "</math>"

    document.getElementById('output').innerHTML = expression
}
</script>
גלעד ברקן
  • 23,602
  • 3
  • 25
  • 61
  • Thank you Groovy. This helps, I will go through http://rypress.com/tutorials/mathml/basic-algebra.html. That will help me get the algorithm I need! – JMD Feb 09 '14 at 15:13
  • I also have another question, would you know of any webpage that can evaluate math expressions? I have designed one and its working good so far but I would like to go through any good algorithms too to see if there is a scope for improvement in what I have done. – JMD Feb 09 '14 at 15:16
  • @user1998463 For evaluating math expressions, google.com or wolframalpha.com usually work well for my purposes, although their back-end maybe not be so obvious. From the little that I know, it is common to convert mathematical expressions to prefix-notation (http://en.wikipedia.org/wiki/Polish_notation) and then to evaluate them using the Shunting-yard algorithm (http://en.wikipedia.org/wiki/Shunting-yard_algorithm). JavaScript also has an `eval` function that can return the value of JavaScript mathematical expressions input as strings. – גלעד ברקן Feb 09 '14 at 15:27
  • @groovy: No. `eval` is not supposed to evaluate mathematical expressions, and should not be used here. – Bergi Feb 09 '14 at 15:38
  • Thank you. I tried "eval" and I got JavaScript errors.. But the information I have from you so far is helpful. – JMD Feb 09 '14 at 15:45
  • @user1998463 oops, I made a mistake...Shunting-yard is used to convert infix to RPN (http://en.wikipedia.org/wiki/Reverse_Polish_notation) which may be easier to evaluate. This page might help you experiment with `eval` if you are interested, http://www.w3schools.com/jsref/tryit.asp?filename=tryjsref_eval – גלעד ברקן Feb 09 '14 at 16:03
  • @Bergi I disagree. Here's a more official example http://www.w3schools.com/jsref/jsref_eval.asp – גלעד ברקן Feb 09 '14 at 16:04
  • @groovy: schools is by no way official, and their examples are known not to be the best. Check out http://www.w3fools.com/. And if you have already parsed the equation into an AST, you should by no way use `eval` to evaluate it when you can do it properly. – Bergi Feb 09 '14 at 16:10
  • @Bergi oh, I did not know w3schools had some bad reputation; I guess as an amateur, you learn from wherever you can. It makes sense that doing your own parsing and evaluation would grant you the most control. – גלעד ברקן Feb 09 '14 at 16:14
  • Hey do you also know the exact reverse process i.e. parsing MathML to expression. See my this thread: http://stackoverflow.com/questions/26357109/convert-mathml-to-expression – Saurabh Palatkar Oct 14 '14 at 17:22
  • @SaurabhLprocks I am not familiar with an open source application for the reverse process. It seems like it might have some things in common with XML parsing, but I'm not too much of an expert on that. – גלעד ברקן Oct 14 '14 at 20:37