My lex needs to identify if the input is a Bin, Hex, Float, Int or ID. It identifies all correctly but if the input is, for example, 2i, it says that it is an Int and doesn't say its an error. Can I have some advice, please?
Here is my code:
# ------------------------------------------------------------
# lex.py
#
# Lex
#
# ------------------------------------------------------------
import sys
sys.path.insert(0, "../..")
import ply.lex as lex
#------------------------------RESERVED--------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------
reserved = {
'if' : 'IF',
'then' : 'THEN',
'else' : 'ELSE',
'while' : 'WHILE',
'true' : 'TRUE',
'false' : 'FALSE',
'print' : 'PRINT',
'input' : 'INPUT',
'output' : 'OUTPUT',
'end' : 'END',
#---------------TYPES----------------
#---------------------------------------------
'void' : 'VOID',
'char' : 'CHAR',
'unsigned' : 'UNSIGNED',
'bool' : 'BOOL',}
#-----------------------TOKENS-------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------
tokens = [
'ID',
'INT',
'FLOAT',
'HEX',
'BIN',
#------------------OP-----------------
#---------------------------------------------
'PLUS',
'MINUS',
'TIMES',
'DIVIDE',
'OR',
'AND',
'NOT',
'XOR',
#------------------DEL--------------
#---------------------------------------------
'LPAREN', 'RPAREN',
'LBRACKET', 'RBRACKET',
'LBRACE', 'RBRACE',
'COMMA', 'PERIOD', 'SEMI', 'COLON',
'COMMENT',
#----------------ASSIGN------------------
#---------------------------------------------
'EQUALS'
] + list(reserved.values())
#-----------------------------DEFINITIONS-------------------------------------------------------------
#---------------------------------------------------------------------------------------------------------------
#------------------OP-----------------
#---------------------------------------------
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
#----------------DEL----------------
#---------------------------------------------
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'
#----------------ASSIGN------------------
#---------------------------------------------
t_EQUALS = r'='
reserved_map = { }
for r in reserved:
reserved_map[r.lower()] = r
#-------------BIN-----------
#---------------------------------------------
def t_BIN(t):
r'0b\d+'
t.value = int(t.value,2)
return t
#-----------HEX---------
#---------------------------------------------
def t_HEX(t):
r'0x([abcdef]|\d)+'
t.value = int(t.value,16)
return t
#-----------FLOAT------------
#---------------------------------------------
def t_FLOAT(t):
r'\d+\.\d+'
t.value = float(t.value)
return t
#-----------INT--------------
#---------------------------------------------
def t_INT(t):
r'\d+'
t.value = int(t.value)
return t
#-------------ID----------------
#---------------------------------------------
def t_ID(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
t.type = reserved.get(t.value,'ID') # Check for reserved words
return t
#-----------LINES-----------
#---------------------------------------------
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
#-------------COMMENT-----------
#---------------------------------------------
def t_COMMENT(t):
r'\#.*'
pass
#---------IGNORE--------
#---------------------------------------------
t_ignore = ' \t'
#--------------ERROR--------------
#---------------------------------------------
def t_error(t):
print("Wrong Character: '%s'" % t.value[0])
t.lexer.skip(1)
# Lexer
lexer = lex.lex()
#lex.lex(debug=0)
while True:
s = input('input > ')
lexer.input(s)
tok = lexer.token()
if not tok:
break
print(tok)