4

My little project is a lexical analysis program in which i have to take every word found in an arbitrary .java file and list every line it appears on in the file. I need to have one look up table dedicated just to the reserved words and another for all additional words found in the document. So for a program like:

    public class xxxx {
    int xyz;
    xyz = 0;
}

The output should be:

Reserved words:
class: 1
int: 2
public: 1

Other words:
xxxx: 1
xyz: 2, 3

But there are a lot of problems with my current program and so i have no idea whats going on, so amendments to my program or a complete rewrite is welcome. I'm just trying to get a hang of the java language as a hobby so all help is welcome as long as i can understand whats going on. I'm sure there is a simple solution to this problem but my attempt didn't work :( Thanks for any help ^^

import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Scanner;

public class LexicalAnalysis {

    private String[] keywords = { "abstract", "boolean", "byte", "case",
            "catch", "char", "class", "continue", "default", "do", "double",
            "else", "extends", "final", "finally", "float", "for", "if",
            "implements", "import", "instanceof", "int", "interface", "long",
            "native", "new", "package", "private", "protected", "public",
            "return", "short", "static", "super", "switch", "synchronized",
            "this", "throw", "throws", "transient", "try", "void", "volatile",
            "while", "false", "true", "null" };
    HashMap<String, ArrayList<Integer>> keywordsTable;

    HashMap<String, ArrayList<Integer>> otherWords = new HashMap<String, ArrayList<Integer>>();

    public LexicalAnalysis(String fileName){

        Scanner kb = null;
        int lineNumber = 0;

        try {
            kb = new Scanner(new File(fileName));
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }

          keywordsTable = new HashMap<String, ArrayList<Integer>>();
          for(int i = 0; i < 47; i++){
              keywordsTable.put(keywords[i], new ArrayList<Integer>());
          }

        while(kb.hasNextLine()){

            lineNumber++;

            String line = kb.nextLine();

            String[] lineparts = line.split("\\s+|\\.+|\\;+|\\(+|\\)+|\\\"+|\\:+|\\[+|\\]+");

            for(String x: lineparts){

                ArrayList<Integer> list = keywordsTable.get(x);
                if(list == null){
                    list = otherWords.get(x);
                    if(list == null){
                        ArrayList<Integer> temp = new ArrayList<Integer>();
                        temp.add(lineNumber);
                        otherWords.put(x,temp);
                    }else{
                        otherWords.remove(x);
                        ArrayList<Integer> temp = new ArrayList<Integer>();
                        temp.add(lineNumber);
                        otherWords.put(x, temp);
                    }
                }else{
                    keywordsTable.remove(x);
                    ArrayList<Integer> temp = new ArrayList<Integer>();
                    temp.add(lineNumber);
                    keywordsTable.put(x, temp);
                }
            }
        }
        System.out.println("Keywords:");
        printMap(keywordsTable);
        System.out.println();
        System.out.println("Other Words:");
        printMap(otherWords);

    }
    public static void printMap(Map<String, ArrayList<Integer>> mp) {    
        Iterator<Map.Entry<String, ArrayList<Integer>>> it = mp.entrySet().iterator();    
        while (it.hasNext()) {        
            Map.Entry<String, ArrayList<Integer>> pairs = (Map.Entry<String, ArrayList<Integer>>)it.next();    
            System.out.print(pairs.getKey() + " = ");
            printList(pairs.getValue());
            System.out.println();
            it.remove();
        }
    }
    public static void printList(List x){

        for(Object m : x){
            System.out.print(m + ", ");
        }

    }
    public static void main(String args[]){
        new LexicalAnalysis("lexitest.txt");
    }


}
Uphill_ What '1
  • 683
  • 6
  • 15
user1152918
  • 71
  • 1
  • 1
  • 3
  • What is the problem? Please give an example – calebds Jan 17 '12 at 00:58
  • If you run it for the example above it does the spaces and brackets as well, also it won't add the position of each place the word appears so if null appears 8 times it will only show it once. =( – user1152918 Jan 17 '12 at 01:40
  • It would seem that i need a regular expression that only gets words that have no puncuation characters in them. So it will ignore ALL words with punctuation. – user1152918 Jan 17 '12 at 03:26

2 Answers2

1

The simplest way of doing this is by using JFlex with correct lex file defining keywords. Once you have that, counting identifiers and keywords is trivial.

RokL
  • 2,663
  • 3
  • 22
  • 26
0

I found one bug that i think fixed everything. You need to state the directory of the file you are recovering in your main. For example what you have now is new LexicalAnalysis("lexitest.txt");

For my example I'm using my flashdrive so it would be new LexicalAnalysis("F"\lexitest.txt");