I want to lex the following code example:
prop levelBasedAlerter uni { a b } \I ->
levelBasedAlerter a
| a > I ->
b: "ALERT: %a"
this should be
Prop
Var "levelBasedAlerter"
Uni
PortSpecS " { a b }"
Lam
Var "I"
PatternMatchEnd
Indent 2
Var "levelBasedAlerter"
Var "a"
Indent 4
PatternGuard
Var "a"
Var ">"
Var "I"
PatternMatchEnd
Indent 6
Var "b"
DefinedByCol
StringLit "Alert: %a"
however, my alex lexer stops with an error on encountering \
in the first line (with and without a space behind the \
).
Why is this the case? The Lexer:
{
{-# LANGUAGE DeriveDataTypeable #-}
module Lexer where
import Data.Typeable
import Data.Data
import Data.List
import Data.List.Split
import Data.Char
import Debug.Trace
import Prelude hiding (lex)
import Control.Monad (liftM)
}
%wrapper "posn"
$digit = 0-9
@string = (. # [\" \\] )
$alpha = [a-zA-Z]
@real = ($digit+ \. | $digit * \. $digit +)
@boolLit = ("True"|"False")
@alphaNum = ($alpha|$digit)+
$bracketsOpen = [\(\[\{]
$bracketsClose = [\)\]\}]
$brackets = [ $bracketsOpen $bracketsClose]
@identifier = [^ : ! = \\ \ " $brackets]+
@commaOrSpace = (\,\ * | \ +)
@scopedIdentifier = @identifier(\.@identifier)+
@globalKeyword = (prop|mesh|let|omni|uni|let|using|module|import|where)
@port = (@identifier:\ *)?@identifier
@portSpec = ((@identifier|@scopedIdentifier):)?
" "*
\{\ * @port
(@commaOrSpace @port)*
" "*\}
@deepPortSpec = ((@identifier|@scopedIdentifier):)?
" "*
\{\ * @identifier: (. # \})+ \}
@indent = \n[\t\ ]+
tokens :-
@indent { \_ s -> Indent $ length s }
$white+ ;
"--".* ;
@globalKeyword { \_ keyword -> getTokenOf keyword }
$digit+ { \_ s -> IntL (read s) }
@real+ { \_ s -> DoubleL (read s) }
@boolLit { \_ s -> BoolL (read s) }
\" @string \" { \_ s -> StringLit (tail . init $ s) }
@portSpec { \_ s -> PortSpecS s }
@deepPortSpec { \_ s -> DeepPortSpecS s }
":" { \_ s -> DefinedByCol }
"," { \_ s -> Comma }
"!" { \_ s -> Negate }
"==" { \_ s -> Eq }
"=" { \_ s -> LetAssOp }
"~>" { \_ s -> Wire }
"->" { \_ s -> PatternMatchEnd }
$bracketsOpen { \_ s -> BracO s}
$bracketsClose { \_ s -> BracC s}
"||" { \_ s -> Or }
"|" { \_ s -> PatternGuard}
"!!" { \_ s -> AccessPort }
"\\" { \_ s -> Lam }
@scopedIdentifier {\_ s -> ScopedVar s }
@identifier { \_ s -> Var s }
{
clean :: String -> String
clean s = reverse $ rmWs $ reverse $ rmWs s
where rmWs = dropWhile (\c -> c ==' ' || c == '\t')
traceThis :: (Show a) => a -> a
traceThis a = trace ("DEBUG: " ++ show a) a
data Token
= Prop
| Mesh
| Module
| Import
| Where
| Var String
| BracO String
| BracC String
| Comma
| Eq
| PatternGuard
| Or
| ScopedVar String
| Omni
| Uni
| PortSpecS String
| DeepPortSpecS String
| DefinedByCol -- ':' after definitions
| Indent Int
| PatternMatchEnd -- '->' after PM
| Negate
| Let
| LetAssOp -- '=' in let x = ...
| Wire
| AccessPort
| Using
| Lam
| StringLit String
| IntL Int
| DoubleL Double
| BoolL Bool
| EOF
deriving (Eq,Show,Data)
getTokenOf :: String -> Token
getTokenOf s = fromConstr
$ head $ filter ((==s) . map toLower . showConstr)
$ dataTypeConstrs $ dataTypeOf $ Prop
}
I think it has something to do with how i match for the \
token.
However, I have tried matching it like
'\'
'\\'
"\"
"\\"
\\
\
and also with a regular expression, but nothing seems to work.
Is there some weird behavior regarding \
in alex? or is it some other trivial error that I am unable to see?
Update
I tried changing the @identifier
to this now:
@identifier = (. # [ : ! = \\ \ " $brackets])+
to do "anything except for x" matching in an alexy way but this did not change anything in the output.