1

I want to create a program that recognizes speech from a file .wav. I tried the code below, but it throws an exception

Exception in thread "main" java.lang.OutOfMemoryError: GC overhead limit exceeded

Even if my "eclipse.ini" have this property:

--launcher.XXMaxPermSize 2048M 
--launcher.XXMaxPermSize 2048m 
-Xms2048m 
-Xmx2048m

How can I fix this exception?

Java code:

import java.net.MalformedURLException;
import java.net.URL;

import edu.cmu.sphinx.frontend.util.AudioFileDataSource;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.util.props.ConfigurationManager;

public class TestRecognizer {

    public static void main(String[] args) {
        ConfigurationManager cm;

        if (args.length > 0) {
            cm = new ConfigurationManager(args[0]);
        } else {
            cm = new ConfigurationManager("english_use_LexTreeLinguist.xml");
        }

        URL audioURL = null;
        try {
            audioURL = new URL("file:./10001-90210-01803.wav");
        } catch (MalformedURLException e) {
            e.printStackTrace();
        }
        if(audioURL == null)
            throw new IllegalArgumentException("Given audio file doesn't exist.");

        // allocate the recognizer
        System.out.println("Loading recognizer");
        Recognizer recognizer = (Recognizer) cm.lookup("recognizer");
        recognizer.allocate();
        System.out.println("Loading audio");
        AudioFileDataSource dataSource = (AudioFileDataSource) cm.lookup("audioFileDataSource");
        dataSource.setAudioFile(audioURL, null);

        // loop the recognition until the programm exits.
        Result result;
        System.out.println("recognizing");`enter code here`
        while ((result = recognizer.recognize())!= null) {
            String resultText = result.getBestResultNoFiller();
            System.out.println(resultText);
        }
    }

}

XML file configuration:

<config>
    <!-- ******************************************************** -->
    <!-- frequently tuned properties                              -->
    <!-- ******************************************************** -->

    <property name="absoluteBeamWidth"           value="-1"/>
    <property name="relativeBeamWidth"           value="1E-80"/>
    <property name="wordInsertionProbability"    value=".1"/>
    <property name="languageWeight"              value="8"/>
    <property name="silenceInsertionProbability" value="1"/>
    <property name="fillerInsertionProbability" value="1E-10"/>
    <property name="logLevel"                    value="WARNING"/>
    <property name="recognizer" value="recognizer"/>
    <property name="linguist"   value="lexTreeLinguist"/>
    <property name="frontend"   value="mfcFrontEnd"/>

    <!-- ******************************************************** -->
    <!-- The Recognizer configuration               -->
    <!-- ******************************************************** -->

    <component name="recognizer" 
               type="edu.cmu.sphinx.recognizer.Recognizer">
        <property name="decoder" value="decoder"/>
        <propertylist name="monitors">
        </propertylist>
    </component>

    <!-- ******************************************************** -->
    <!-- The Decoder configuration                              -->
    <!-- ******************************************************** -->

    <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
        <property name="searchManager" value="searchManager"/>
    </component>

    <component name="searchManager" 
        type="edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager">
        <property name="logMath" value="logMath"/>
        <property name="linguist" value="${linguist}"/>
        <property name="pruner" value="trivialPruner"/>
        <property name="scorer" value="threadedScorer"/>
        <property name="activeListFactory" value="activeList"/>
    </component>

    <component name="activeList" 
             type="edu.cmu.sphinx.decoder.search.SortingActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
        <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>

    <component name="trivialPruner" 
                type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>

    <component name="threadedScorer" 
                type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
        <property name="frontend" value="${frontend}"/>
        <property name="isCpuRelative" value="true"/>
        <property name="numThreads" value="0"/>
        <property name="minScoreablesPerThread" value="10"/>
        <property name="scoreablesKeepFeature" value="true"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The linguist  configuration                              -->
    <!-- ******************************************************** -->

    <component name="lexTreeLinguist" 
                type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
        <property name="logMath" value="logMath"/>
        <property name="acousticModel" value="wsj"/>
        <property name="languageModel" value="trigramModel"/>
        <property name="dictionary" value="englishDict"/>
        <property name="addFillerWords" value="false"/>
        <property name="fillerInsertionProbability" value="${fillerInsertionProbability}"/>
        <property name="generateUnitStates" value="false"/>
        <property name="wantUnigramSmear" value="true"/>
        <property name="unigramSmearWeight" value="1"/>
        <property name="wordInsertionProbability" 
                value="${wordInsertionProbability}"/>
        <property name="silenceInsertionProbability" 
                value="${silenceInsertionProbability}"/>
        <property name="languageWeight" value="${languageWeight}"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Language Model configuration                         -->
    <!-- ******************************************************** -->
    <component name="trigramModel" 
            type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
        <property name="unigramWeight" value=".5"/>
        <property name="maxDepth" value="3"/>
        <property name="logMath" value="logMath"/>
        <property name="dictionary" value="englishDict"/>
        <property name="location"
            value="resource:/edu/cmu/sphinx/models/language/en-us.lm.dmp"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Dictionary configuration                            -->
    <!-- ******************************************************** -->
    <component name="englishDict" 
        type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
        <property name="dictionaryPath"
                  value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/dict/cmudict.0.6d"/>
        <property name="fillerPath" 
              value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/noisedict"/>
        <property name="addSilEndingPronunciation" value="false"/>
        <property name="wordReplacement" value="&lt;sil&gt;"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The acoustic model configuration                         -->
    <!-- ******************************************************** -->

    <component name="wsj"
               type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
        <property name="loader" value="wsjLoader"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
        <property name="logMath" value="logMath"/>
        <property name="unitManager" value="unitManager"/>
        <property name="location" value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The unit manager configuration                           -->
    <!-- ******************************************************** -->

    <component name="unitManager" 
        type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>

    <!-- ******************************************************** -->
    <!-- The frontend configuration                               -->
    <!-- ******************************************************** -->

    <component name="mfcFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
        <propertylist name="pipeline">
            <!--item>streamDataSource </item-->
            <item>audioFileDataSource </item>
            <item>preemphasizer </item>
            <item>windower </item>
            <item>fft </item>
            <item>melFilterBank </item>
            <item>dct </item>
            <item>batchCMN </item>
            <item>featureExtraction </item>
        </propertylist>
    </component>

    <component name="streamDataSource" 
                type="edu.cmu.sphinx.frontend.util.StreamDataSource">
        <property name="sampleRate" value="16000"/>
    <property name="bitsPerSample" value="16"/>
    <property name="bigEndianData" value="false"/>
    <property name="signedData" value="true"/>
    </component>

    <component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>

    <component name="preemphasizer"
        type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>

    <component name="windower" 
               type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"/>

    <component name="fft" 
            type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/>

    <component name="melFilterBank" 
          type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"/>

    <component name="dct" 
            type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>

    <component name="batchCMN" 
               type="edu.cmu.sphinx.frontend.feature.BatchCMN"/>

    <component name="featureExtraction"
        type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>

    <!-- ******************************************************* -->
    <!--  Miscellaneous components                               -->
    <!-- ******************************************************* -->

    <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
        <property name="logBase" value="1.0001"/>
        <property name="useAddTable" value="true"/>
    </component>

</config>
Nikolay Shmyrev
  • 24,897
  • 5
  • 43
  • 87
Dorin
  • 2,167
  • 4
  • 20
  • 32

1 Answers1

0

Your configuration is totally wrong, the beams are too wide, the frontend is not configured correctly.

You need to use default configuration default.config.xml if you want to make modifications or just use high-level API without XML files. For the best decoding accuracy you need to use en-us-8khz acoustic model available in downloads.

If you want to transcribe 8khz audio you also need to call recognizer.setSampleRate(8000);

Nikolay Shmyrev
  • 24,897
  • 5
  • 43
  • 87