0

I finally succeed to make my program using Sphinx4 compiling with a xml configuration file.

My probleme is, when i execute it, the excecution stay blocked on the recognizer.recognize() method when i talk, and nothing happened.

I think my problem come from my configuration file but I don't know where.

I tested my microphone, it's working fine.

Here is my java code:

import edu.cmu.sphinx.frontend.util.Microphone;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.util.props.ConfigurationManager;


public class Main {

 private ConfigurationManager configManager;
 private static Recognizer recognizer;

 private boolean debug = true;
 private static Microphone microphone;

 public Main()
 {
    /* Sphinx Using Configuration*/

     configManager = new ConfigurationManager("conf\\conf.xml");
     recognizer = (Recognizer) configManager.lookup("recognizer");
     recognizer.allocate();

     microphone = (Microphone) configManager.lookup("microphone");
        if (!microphone.startRecording()) {
            System.out.println("Cannot start microphone.");
            recognizer.deallocate();
            System.exit(1);
        }
    /*end using configuration*/
 }


public static void main(String[] args) throws IOException {

        Main m = new Main();

        Result result;
        String hypothesis = "";

        /*Iterator iterator = null;
        Collection<String> hypothesisList;*/

        try{

            System.out.println("starting live recognition");
            System.out.println("microphone.isRecording(): "+microphone.isRecording());
            System.out.println("recognizer.getState(): "+recognizer.getState());                

            while (true) {
                result = recognizer.recognize(); //here the execution stay blocked
                if (result != null) {
                    hypothesis = result.getBestFinalResultNoFiller();

                    System.out.println(hypothesis);

                }
                System.out.println();
            }
        }finally{
            if(m.isTalkToLRS())
            {
                m.closeConnexion();
            }
          }

}
} 

My configuration file:

<?xml version="1.0" encoding="UTF-8"?>

<config>

<component name="trivialPruner" type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>

 <component name="threadedScorer" type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
        <property name="frontend" value="frontEnd"/>
        </component>

  <!-- ******************************************************** -->
  <!-- The Grammar  configuration                               -->
  <!-- ******************************************************** -->

 <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
        <property name="logBase" value="1.0001"/>
        <property name="useAddTable" value="true"/>
    </component>


  <component name="jsgfGrammar" type="edu.cmu.sphinx.jsgf.JSGFGrammar">
    <property name="dictionary" value="dictionary"/>
    <property name="grammarLocation" value="models/grammar"/>
    <property name="grammarName" value="simpleGrammar"/>
    <property name="logMath" value="logMath"/>
  </component>

<!-- ******************************************************** -->
    <!-- The unit manager configuration                           -->
    <!-- ******************************************************** -->

    <component name="unitManager" type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>

<!-- ******************************************************** -->
    <!-- The frontend pipelines                                   -->
    <!-- ******************************************************** -->

    <component name="dataBlocker" type="edu.cmu.sphinx.frontend.DataBlocker">
        <!--<property name="blockSizeMs" value="10"/>-->
    </component>

    <component name="speechClassifier"
               type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier">
        <property name="threshold" value="13"/>
    </component>

    <component name="nonSpeechDataFilter"
               type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>

    <component name="speechMarker"
               type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker" >
        <property name="speechTrailer" value="50"/>
    </component>


    <component name="preemphasizer"
               type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>

    <component name="windower"
               type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower">
    </component>

    <component name="fft"
            type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform">
    </component>

    <component name="melFilterBank"
        type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">
    </component>

    <component name="dct"
            type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>

    <component name="liveCMN"
               type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>

    <component name="featureExtraction"
               type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>

    <component name="microphone"
               type="edu.cmu.sphinx.frontend.util.Microphone">
               <property name="bytesPerRead" value="320"/>
        <property name="closeBetweenUtterances" value="false"/>
    </component>
<!-- ******************************************************** -->
    <!-- The frontend configuration                               -->
    <!-- ******************************************************** -->
    <component name="frontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
        <propertylist name="pipeline">
            <item>microphone </item>
            <item>preemphasizer </item>
            <item>windower </item>
            <item>fft </item>
            <item>melFilterBank </item>
            <item>dct </item>
            <item>liveCMN </item>
            <item>featureExtraction </item>
        </propertylist>
    </component>
  <!-- ******************************************************** -->
  <!-- The Dictionary configuration                            -->
  <!-- ******************************************************** -->
  <component name="dictionary" type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
    <property name="dictionaryPath" value="models/DictionnaireMyRoom.dic"/>
    <property name="fillerPath" value="models/lium_french_f2/noisedict"/>
    <property name="addSilEndingPronunciation" value="false"/>
    <property name="wordReplacement" value="&lt;sil&gt;"/>
    <property name="unitManager" value="unitManager"/>
</component>
  <!-- ******************************************************** -->
  <!-- Reconizer                                                -->
  <!-- ******************************************************** -->
<component name="recognizer" type="edu.cmu.sphinx.recognizer.Recognizer">
  <property name="decoder" value="decoder" /> 
  </component>

  <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
        <property name="searchManager" value="searchManager"/>
    </component>

    <component name="searchManager"
        type="edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager">
        <property name="logMath" value="logMath"/>
        <property name="linguist" value="flatLinguist"/>
        <property name="pruner" value="trivialPruner"/>
        <property name="scorer" value="threadedScorer"/>
        <property name="activeListFactory" value="activeList"/>
    </component>

     <component name="activeList" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="-1"/>
        <property name="relativeBeamWidth" value="1E-80"/>
    </component>

   <component name="flatLinguist" type="edu.cmu.sphinx.linguist.flat.FlatLinguist">
    <property name="addOutOfGrammarBranch" value="true"/>
    <property name="outOfGrammarProbability" value="1E-20"/>
    <property name="phoneInsertionProbability" value="1E-10"/>
    <property name="acousticModel" value="acousticModel"/>
    <property name="grammar" value="jsgfGrammar"/>
    <property name="unitManager" value="unitManager"/>
    <property name="phoneLoopAcousticModel" value="acousticModel"/>
   </component>

<!-- ******************************************************** -->
    <!-- The acoustic model configuration                         -->
    <!-- ******************************************************** -->
    <component name="wsj" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
        <property name="logMath" value="logMath"/>
        <property name="unitManager" value="unitManager"/>
        <property name="dataLocation" value="french/model_parameters/french_f0.cd_cont_5725_22/"/>
        <property name="modelDefinition" value="french/model_architecture/french_f0.5725.mdef"/>
         <property name="location" value="models"/>
  </component>

  <component name="acousticModel" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
    <property name="loader" value="wsj"/>
    <property name="unitManager" value="unitManager"/>
  </component>
   </config>

my grammar

#JSGF V1.0;

grammar grammaire;

public <command> = (bonjour | merci) ;

my dictionnay

bonjour bb on jj ou rr

merci mm ai rr ss ii

my output

starting live recognition
microphone.isRecording(): true
recognizer.getState(): READY

Thanks a lot for your help.

EDIT: In order to have a french recognition, I created my configuration file using this link: http://code.google.com/p/voicecmdr/wiki/VoiceRecognitionFR (in french sorry)

  • Your configuration is wrong and doesn't match proper configuration for a French model. For proper results use the latest code as described at: http://cmusphinx.sourceforge.net/wiki/tutorialsphinx4 – Nikolay Shmyrev Apr 28 '14 at 11:28
  • Thanks for your answer, I already made an implementation with the configuration object and methods like configuration.setAcousticModelPath etc. and it worked great. But now I need to use the outOfGrammarProbability property of the flatLinguist component as explain in this post: http://stackoverflow.com/questions/16994980/keyword-or-keyphrase-spotting-with-sphinx4/16996733#16996733 Can I do that without configuration file? – JackylRadis Apr 28 '14 at 11:38
  • You can edit default configuration file default.config.xml from sphinx4 sources, try to keep the rest of the file as is. – Nikolay Shmyrev Apr 28 '14 at 12:30
  • Thanks for your help, can I just add the outOfGrammarProbability property to the flatlinguist component? After I add it, can I use LiveSpeechRecognizer class? – JackylRadis Apr 28 '14 at 12:55
  • Thanks, I have results now! maybe too often but this is another problem – JackylRadis Apr 28 '14 at 13:02
  • Ok, you can tune phoneInsertionProbability to get best detection/false alarm rate – Nikolay Shmyrev Apr 28 '14 at 13:41

0 Answers0