1

I am trying to make a token based scala parser for UCB Logo. The problem I am facing is that in Logo any expression that lists in UCB Logo values in a list can be delimited by one of ']', '[', ' '. If there are any other kinds of delimiters the content in the list should be treated as a word.

In short, how can I make a token parser that will consider the following:
[ 4 3 2 ] - should be a list
[ [ 4 3 2 ] ] - should be a list within a list
[ 1 + 2 ] - should be a word inside a list
[ [ 1 2 3 ] + ] - should be a word inside a list

The following

 '[' ~ rep(chrExcept('[', ']')) ~ ']'

produces these tokens: Tokens: List([, [1 2 3], +, ])

from [ [ 1 2 3 ] + ]. I believe it should produce the tokens: List([, [1 2 3] +, ]) -> merge the + sign with the token [1 2 3].

This is the current code of the Lexical I am using:

package lexical

import scala.language.postfixOps

import scala.util.parsing.combinator.lexical.Lexical
import scala.util.parsing.input.CharSequenceReader._

/**
 * Created by Marin on 28/03/16.
*/
class MyLexical extends Lexical with MyTokens {

def token: Parser[Token] = (
    //procDef                                   ^^ { case first ~ chars => processNewProcedure(chars  mkString "") }
  word2 ^^ { case rest => {

      /*val s = if (second.isEmpty) "" else second mkString ""
      val t = if(third.isEmpty) "" else  third mkString ""
      val f = if(fourth.isEmpty) "" else fourth mkString ""

      StringLit(s"$first$s$t$f$rest")*/

      println(rest)
      StringLit("Smth")
  }
  }
  | formalChar ~ rep(identChar | digit)       ^^ { case first ~ rest => Formal(first :: rest mkString "") }
  | identChar ~ rep(identChar | digit)        ^^ { case first ~ rest => processIdent(first :: rest mkString "") }
  | procDigit                                 ^^ { case first ~ second ~ rest => NumericLit((first mkString "") :: second.getOrElse("") :: rest mkString "") }
  | '\"' ~ rep(chrExcept('\"', EofCh)) ~ ' '  ^^ { case '\"' ~ chars ~ ' ' => StringLit(chars mkString "") }
  | EofCh                                     ^^^ EOF
  | delim
  | failure("Illegal character")
  )

def processNewProcedure(chars: String) =
    if(reserved.contains(chars)) throw new RuntimeException
    else {
        Identifier(chars)
    }

def procDef = toSeq ~> identChar ~ rep(identChar | elem('_')) <~ formalChar.* <~ endSeq

def toSeq = 't' ~ 'o' ^^^ "to"
def endSeq = 'e' ~ 'n' ~ 'd' ^^^ "end"

def processIdent(name: String) = {

    if (reserved contains name) {
        Keyword(name)
    } else {
        Identifier(name)
    }
}

def word = {

    '[' ~ ((whitespaceChar | digit)*) ~ (_delim | identChar) ~ rep(whitespaceChar | digit) ~ ']'
}

def word2 = {

    //'[' ~> rep(whitespaceChar | digit) ~> rep(_delim | identChar) <~ rep(whitespaceChar | digit) <~ ']'
    //'[' ~ rep(chrExcept('[', ']')) ~ ']'

    rep1('[') ~ rep1(chrExcept('[', ']') | digit) ~ rep(_delim) ~ rep1(']')

    //rep1('[') ~ identChar ~ rep(']') ~ rep('+') ~ rep1(']')
    //'[' ~ (_delim | chrExcept('[', ']')) ~ ']'
}

def word3 = {

    '[' ~> rep(digit | letter | _delim) <~ ']'
}

def procDigit = digit.+ ~ '.'.? ~ digit.*

def identChar = letter | elem('_')

def formalChar =  ':' ~ identChar

override def whitespace: Parser[Any] = rep[Any] (
    whitespaceChar
    | ';' ~ comment
)

def comment: Parser[Any] = rep(chrExcept(EofCh, ';')) ^^ { case _ => ' ' }


/****** Pure copy-paste ******/

/** The set of reserved identifiers: these will be returned as `Keyword`s. */
val reserved = new scala.collection.mutable.HashSet[String]

/** The set of delimiters (ordering does not matter). */
val delimiters = new scala.collection.mutable.HashSet[String]

private lazy val _delim: Parser[Token] = {
    // construct parser for delimiters by |'ing together the parsers for the individual delimiters,
    // starting with the longest one -- otherwise a delimiter D will never be matched if there is
    // another delimiter that is a prefix of D
    def parseDelim(s: String): Parser[Token] = accept(s.toList) ^^ { x => Keyword(s) }

    val d = new Array[String](delimiters.size)
    delimiters.copyToArray(d, 0)
    scala.util.Sorting.quickSort(d)
    (d.toList map parseDelim).foldRight(failure("no matching delimiter"): Parser[Token])((x, y) => y | x)
}
protected def delim: Parser[Token] = _delim
}
Marin
  • 861
  • 1
  • 11
  • 27
  • The middle clause in your parse expression `'[' ~ rep(chrExcept('[', ']')) ~ ']'` matches everything up to (but not including) the ']'. If you want to collect the trailing non-bracket characters too you can add another clause: `'[' ~ rep(chrExcept('[', ']')) ~ ']' ~ rep(chrExcept('[', ']'))`. – Dennis Sosnoski Apr 18 '16 at 22:33

0 Answers0