1

How to retrieve a random word of a given length from a Trie

The answer above explains how to select the first character but I am confused how we will proceed after that. I want words of Length L but when I start traversing the tree, I wouldn't know if the branch that is being traversed has depth L.

Dictionary

package com.FastDictionary;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;

import sun.rmi.runtime.Log;

/**
 * Dictionary implementation.
 * Uses Trie Data Structure
 * Creates a singleton object
 */
public class FastDictionary {

    private int nineWordCount;
    private int totalWordCount;

    // Root Node
    private DictionaryNode root;

    // Singleton object
    private static FastDictionary fastDictionary;

    // Flag; True if words.txt has been processed once
    private boolean isProcessed;

    private FastDictionary() {

        this.root = new DictionaryNode();
        isProcessed = false;
        this.nineWordCount = 0;
        this.totalWordCount = 0;
    }

    private boolean sanitiseSearch(String text) {

        if (text == null) {
            return false;
        }
        else {
            return text.matches("[a-zA-Z]");
        }
    }

    /**
     * Add a word to Dictionary
     * @param word word to be added
     */
    public void addWord(String word) {

        if (word == null) {

            throw new IllegalArgumentException("Word to be added to Dictionary can't be null");
        }

        // Sanitise input
        if (word.contains(" ")) {

            throw new IllegalArgumentException(
                    "Word to be added to Dictionary can't contain white spaces");
        }

        DictionaryNode currentNode = this.root;

        for (char c: word.toCharArray()) {

            DictionaryNode child = currentNode.getChild(c);

            if (child == null) {

                currentNode = currentNode.addChild(c);
            }
            else {

                currentNode = child;
            }
        }
        // Last node contains last character of valid word
        // Set that node as Leaf Node for valid word
        currentNode.setLeaf();
    }

    /**
     *
     * @param word String to be checked if it is a valid word
     * @return True if valid word
     */
    public boolean isWord(String word) {

        if (word == null) {

            throw new IllegalArgumentException("Word to be added to Dictionary can't be null");
        }

        // Sanitise input
        if (word.contains(" ")) {

            throw new IllegalArgumentException(
                    "Word to be added to Dictionary can't contain white spaces");
        }

        DictionaryNode currentNode = this.root;
        for (char c: word.toCharArray()) {

            DictionaryNode child = currentNode.getChild(c);

            if (child == null) {

                return false;
            }
            currentNode = child;
        }

        // Returns true if Last Character was leaf
        return currentNode.isLeaf();
    }

    /**
     *
     * @param text String that needs to be searched
     * @return List of Strings which are valid words searched using 'text'
     *
     */
    public ArrayList<String> getWords(String text) {

        ArrayList<String> words = new ArrayList<String>();
        DictionaryNode currentNode = this.root;

        for (int i = 0; i < text.length() ; i++) {

            DictionaryNode child = currentNode.getChild(text.charAt(i));

            if (child == null) {

                return words;
            }

            if (child.isLeaf()) {
                words.add(text.substring(0,i+1));
            }

            currentNode = child;

        }
        return words;
    }

    /**
     *
     * @param inputFileStream Text file containing list of valid words
     * Switches Flag isProcessed to True
     */
    public void processFile(InputStream inputFileStream) {

        try {

            BufferedReader br = new BufferedReader(new InputStreamReader(inputFileStream));
            String line;

            while((line = br.readLine()) != null) {
                line = line.trim();
                this.addWord(line);

                // Nine Word test
                if (line.length() == 9) {
                    this.nineWordCount++;
                }
                this.totalWordCount++;
            }

        }
        catch(Exception e){
            System.out.print(e);
        }
        this.isProcessed = true;
    }

    /**
     *
     * @return True if valid words text file has been processed
     * Word file needs to be processed just once
     */
    public boolean isProcessed() {

        return this.isProcessed;
    }

    /**
     * Factory method to create Singleton Object
     * @return Singleton object
     */
    public static FastDictionary getInstance() {

        if (fastDictionary == null) {

            fastDictionary = new FastDictionary();
        }

        return fastDictionary;
    }

    public int getNineWordCount() {
        return this.nineWordCount;
    }
}

**Node**

package com.FastDictionary;

import java.util.HashMap;

/**
 * Node of the Trie Data Structure used for FastDictionary
 */
public class DictionaryNode {

    // Character which the Node represents
    private char nodeChar;

    // Points to children
    private HashMap<Character, DictionaryNode> children = new HashMap<Character,DictionaryNode>();

    // Is Node the last character for a valid word
    private boolean isLeaf;

    /**
     * To create Root Node
     */
    DictionaryNode() {

        this.nodeChar = '.';
        this.isLeaf   = false;

    }

    /**
     * To create Child Node
     * @param c Character that Node represents
     */
    DictionaryNode(char c) {

        this.nodeChar = c;
        isLeaf        = false;
    }

    /**
     *
     * @param c Character that Node represents
     * @return Child Node which was created
     */
    public DictionaryNode addChild(char c) {

        DictionaryNode child = new DictionaryNode(c);
        this.children.put(c, child);
        return child;
    }

    /**
     *
     * @return true if Node is the last character for a valid word; default is false
     */
    public boolean isLeaf() {

        return this.isLeaf;
    }

    /**
     * Set Node as Leaf Node for a valid word
     */
    public void setLeaf() {

        this.isLeaf = true;
    }

    /**
     *
     * @param c the character which the Child Node represnts
     * @return Child Node representing character c; null if no such Child exists
     */
    public DictionaryNode getChild(char c) {

        DictionaryNode child = this.children.get(c);

        return child;
    }
}
zeko
  • 13
  • 4
  • Please add what you have tried so far! – Joe Iddon Oct 13 '17 at 19:43
  • Till now, I have implemented the Trie for the dictionary and it works fine for searching. Should I add the code here or should I just explain how it is working? Apologies if I am not asking correctly. This is my first time on SO. – zeko Oct 13 '17 at 19:47
  • Definitely add the code here, adding code of what you have tried so far is very important on stack overflow! – Joe Iddon Oct 13 '17 at 19:48
  • I've added the code. – zeko Oct 13 '17 at 20:07

1 Answers1

0

Yes, he only shows how to choose first character from root node. However, after you update your currentNode following that character, you can apply exact same principal to find next character from the new node. Another way of viewing what his algorithm did is, given a node, an integer L(5 in his example), finds i'th (1234 in his example) word which is in the subtree of that node and is exactly L depth away from it.

So after you have made your first move, you can recursively call that algorithm from new node, with L-1 as depth. This is basic idea. Of course,some details need to be filled.

Firstly, updating i before next recursive call. Say algorithm chose first character to be d. And first 3 letters i.e a b c combinedly had 1000 5-letter words. So now, you need to find (1234-1000)=234th word from this new node.

Secondly, instead of having lengthFrequencyByLetter and totalLengthFrequency for entire tree,now you need to have them for every single node, which will require lots of ram. (you can optimize that by using HashMap though.)

A very high level implementation could be:

String randomWord(Node currentNode,int L,int index){
    if(L==0) return node.wordContainedWithin();
    char ch = find_next_character(node,L,index); //'d' in our example
    newNode = currentNode.getChild(ch); //node following d
    //following example, words_before = 1000
    int words_before = sum(lengthFrequencyByLetter[x][L] of all x before ch)
    int new_index = index - words_before;
    return randomWord(newNode,L-1,new_index);
}

Now to get a random L-letter word, look up root's totalLengthFrequency[L], generate a number i (1234 here) between 0 to that value, and call randomWord as:

randomWord(tree.root,L,i)
Shihab Shahriar Khan
  • 4,930
  • 1
  • 18
  • 26