-1

I am writting a lexer speficications for Jflex, which is supposed to read an input file and return a token. I do successfully complete this part as shown below:

    /*Super-FORTAN language lexer specification*/
%%
%public
%class LexicalAnalyser
%unicode
%line
%column
%type Symbol
%standalone

%{
  private ArrayList<Symbol> tokens = new ArrayList<Symbol>();

  public LexicalAnalyser() {
    //ArrayList<Symbol> tokens = new ArrayList<Symbol>();
  }

  /**
  * This method will be called as action to be taken in the rules and actions section
  * of the lexer.
  * @para  m unit the lexical unit of the lexer
  * @param value the matched input characters
  * @return return an object of type symbol
  *
  */

  public Symbol symbol(LexicalUnit unit, Object value){
    Symbol token = new Symbol(unit, yyline+1, yycolumn+1, value);

    if(token != null) {
      tokens.add(token);  //add a token to the token list
      //System.out.println("Token added && the size is: " + tokens.size()); //Checking whether a token has been successfully added
    } else{
      System.out.println("Failed to add token");
    }

    System.out.println(token); //print out the list of token to standard output
    return  token;
  }

  public ArrayList<Symbol> getTokens(){
    System.out.println("In total " + tokens.size() + " tokens have been found");
    return tokens;
  }

  public boolean isZzAtEOF() {
    return zzAtEOF;
  }
  %}

  %eofval{
    return new Symbol(LexicalUnit.EOS, yyline, yycolumn);
    %eofval}

    /*Program Name */
    ProgramName = [:uppercase:][:jletterdigit:]*[:lowercase:][:jletterdigit:]*

    /*Variables names*/
    VarName = [:lowercase:][a-z0-9]*

    /*Carriage Return*/
    EndLine = \r|\n|\r\n

    /*Number*/
    Number = [1-9][0-9]*

    EOS = {EndLine} | [\t\f\b]

    /*Input character*/
    InputCharacter = [^\r|\n]

    /*Comments*/
    Shortcomment ="//"{InputCharacter}*{EndLine}?
    Longcomment = "/*"[^'*']~"*/"
    Comment = {Shortcomment}|{Longcomment}
    FileMetaData ="rtf1"~"cf0 "

    %state STRING, CHARLITERAL
    %%

    <YYINITIAL> {
      /*Program Name*/
      {ProgramName} {return symbol(LexicalUnit.PROGNAME, yytext());}

      /* keywords */
      "BEGINPROG" {return symbol(LexicalUnit.BEGINPROG, yytext());
      }
      "DO" {return symbol(LexicalUnit.DO, yytext());}
      "ENDPROG"   {return symbol(LexicalUnit.ENDPROG, yytext());}
      "ENDIF"  {return symbol(LexicalUnit.ENDIF, yytext());}
      "ENDFOR" {return symbol(LexicalUnit.ENDFOR, yytext());}
      "ENDWHILE" {return symbol(LexicalUnit.ENDWHILE, yytext());}
      "ELSE" {return symbol(LexicalUnit.ELSE, yytext());}
      "FOR" {return symbol(LexicalUnit.FOR, yytext());}
      "IF" {return symbol(LexicalUnit.IF, yytext());}
      "PRINT" {return symbol(LexicalUnit.PRINT, yytext());}
      "THEN" {return symbol(LexicalUnit.THEN, yytext());}
      "TO" {return symbol(LexicalUnit.TO, yytext());}
      "READ"  {return symbol(LexicalUnit.READ, yytext());}
      "VARIABLES" {return  symbol(LexicalUnit.VARIABLES, yytext());}

      /*Binary operators */
      "AND"   {return symbol(LexicalUnit.AND, yytext());}
      "OR"    {return symbol(LexicalUnit.OR, yytext());}

      /*operators */
      "+"  {return symbol(LexicalUnit.PLUS, yytext());}
      "-"  {return symbol(LexicalUnit.MINUS, yytext());}
      "*"  {return symbol(LexicalUnit.TIMES, yytext());}
      "/"  {return symbol(LexicalUnit.DIVIDE, yytext());}

      /*Comparator */
      "="  {return symbol(LexicalUnit.EQ, yytext());}
      ">=" {return symbol(LexicalUnit.GEQ, yytext());}
      ">"  {return symbol(LexicalUnit.GT, yytext());}
      "<=" {return symbol(LexicalUnit.LEQ, yytext());}
      "<"  {return symbol(LexicalUnit.LT, yytext());}
      "NOT" {return symbol(LexicalUnit.NOT, yytext());}
      "<>" {return symbol(LexicalUnit.NEQ, yytext());}

      /* separators */
      {EndLine} {return new Symbol(LexicalUnit.ENDLINE, yyline, yycolumn);}
      "(" {return symbol(LexicalUnit.LPAREN, yytext());}
      ")" {return symbol(LexicalUnit.RPAREN, yytext());}
      "," {return symbol(LexicalUnit.COMMA, yytext());}

      /*Assignment */
      ":=" {return symbol(LexicalUnit.ASSIGN, yytext());}

      /*identifiers*/
      {VarName} {return symbol(LexicalUnit.VARNAME, yytext());}

      /*numbers */
      {Number} {return symbol(LexicalUnit.NUMBER, yytext());}

      {Comment} {}
        {FileMetaData} {}
    }

The specification above does generated a lexer called when giving it as input to Jflex. The issue is I create an array list and trying to add each symbol that list as show the method below:

public Symbol symbol(LexicalUnit unit, Object value){
    Symbol token = new Symbol(unit, yyline+1, yycolumn+1, value);
    if(token != null) {
      tokens.add(token);  //add a token to the token list
      //System.out.println("Token added && the size is: " +   tokens.size()); //Checking whether a token has been successfully added
    } else{
      System.out.println("Failed to add token");
 }

Using the print statement in that method I confirm that the each object are successfully added. However when I call the getTokens() getter from my main class (as in bellow code). I get an empty list:

public class Main{
    /**
    * Runs the scanner on input files.
    *
    * This is a standalone scanner, it will print any unmatched
    * text to System.out unchanged.
    *
    * @param argv   the command line, contains the filenames to run
    *               the scanner on.
    */

    public static void main(String argv[]){
    LexicalAnalyser lexer = new LexicalAnalyser();

    lexer.main(argv);

    System.out.println("\nIdentifiers");

    ArrayList<Symbol> tokenList = lexer.getTokens(); //Retrieving the token list
    System.out.println("Token added && the size is: " + tokenList.size());

    for(Symbol tk: tokenList){
        if(tk !=null){
            if (tk.getType() == LexicalUnit.VARIABLES){
                System.out.println(tk.getValue() + " " + tk.getLine() );
            }
        }
    }
}

}

Do I missed some point? the only piece of code I don't provide here is the Symbol object class. I would like to know which point I do miss here. Thank you so much.

1 Answers1

1

The size is 0 because you inizialized a new instance of TokenList in your main class and you did a for-loop on that empty instance.
You add the token to the tokenList in your LexicalAnalyser class, which is private, so you should provide a getter method and retrieve it.

Add this in your LexicalAnalyser class:

public ArrayList<Symbol> getTokenList() {
    return tokens.getTokenList();
}

In the main class change tokenList = tokens.getTokenList(); to tokenList = lexer.getTokenList();

RubenDG
  • 1,365
  • 1
  • 13
  • 18