1

I had implemented the sphinx4 speech recognition in java successfully..now i just wanna use that sphinx 4 speech recognition with the ready made .wave file. right now i just speak and its recognize the words as per ma grammar.. but how can i just give the .wave file as input and its been recognize the word of that .wave file play.. please ..can u help me out from this...

this is my code for recognize with the use of microphone

My .java file

/*
 * Copyright 1999-2004 Carnegie Mellon University.
 * Portions Copyright 2004 Sun Microsystems, Inc.
 * Portions Copyright 2004 Mitsubishi Electric Research Laboratories.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * See the file "license.terms" for information on usage and
 * redistribution of this file, and for a DISCLAIMER OF ALL
 * WARRANTIES.
 *
 */

package edu.cmu.sphinx.demo.helloworld;

import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;

import javax.swing.ButtonModel;
import javax.swing.JButton;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.Timer;
import javax.swing.event.ChangeEvent;
import javax.swing.event.ChangeListener;

import edu.cmu.sphinx.frontend.util.Microphone;
import edu.cmu.sphinx.recognizer.Recognizer;
import edu.cmu.sphinx.result.Result;
import edu.cmu.sphinx.util.props.ConfigurationManager;


/**
 * A simple HelloWorld demo showing a simple speech application built using Sphinx-4. This application uses the Sphinx-4
 * endpointer, which automatically segments incoming audio into utterances and silences.
 */
public class HelloWorld {

    static Recognizer recognizer;


    public static void main(String[] args) {


        ConfigurationManager cm;

        if (args.length > 0) {
            cm = new ConfigurationManager(args[0]);
        } else {
            cm = new ConfigurationManager(HelloWorld.class.getResource("helloworld.config.xml"));
        }

       recognizer = (Recognizer) cm.lookup("recognizer");
        recognizer.allocate();




        // start the microphone or exit if the programm if this is not possible
        Microphone microphone = (Microphone) cm.lookup("microphone");
        if (!microphone.startRecording()) {
            System.out.println("Cannot start microphone.");
            recognizer.deallocate();
            System.exit(1);
        }

        System.out.println("Say: (Good morning | Hello) ( Bhiksha | Evandro | Paul | Philip | Rita | Will )");


        int timerDelay = 100;
        final Timer timer = new Timer(timerDelay , new ActionListener() {

           @Override
           public void actionPerformed(ActionEvent e) {
             // System.out.println("Button Pressed!");

              System.out.println("Start speaking..\n");

              Result result = recognizer.recognize();

              if (result != null) {
                  String resultText = result.getBestFinalResultNoFiller();
                  System.out.println("You said: " + resultText + '\n');
              } else {
                  System.out.println("I can't hear what you said.\n");
              }


           }
        });

        JButton button = new JButton("Press Me!");
        final ButtonModel bModel = button.getModel();
        bModel.addChangeListener(new ChangeListener() {

           @Override
           public void stateChanged(ChangeEvent cEvt) {
              if (bModel.isPressed() && !timer.isRunning()) {
                 timer.start();
                // System.out.println("Process under construction");

              } else if (!bModel.isPressed() && timer.isRunning()) {
                 timer.stop();
              }
           }
        });

        JPanel panel = new JPanel();
        panel.add(button);


        JOptionPane.showMessageDialog(null, panel);

    }


}

______here goes my config.xml file____________________

<?xml version="1.0" encoding="UTF-8"?>

<!--
   Sphinx-4 Configuration file
-->

<!-- ******************************************************** -->
<!--  an4 configuration file                             -->
<!-- ******************************************************** -->

<config>

    <!-- ******************************************************** -->
    <!-- frequently tuned properties                              -->
    <!-- ******************************************************** -->

    <property name="logLevel" value="WARNING"/>

    <property name="absoluteBeamWidth"  value="-1"/>
    <property name="relativeBeamWidth"  value="1E-80"/>
    <property name="wordInsertionProbability" value="1E-36"/>
    <property name="languageWeight"     value="8"/>

    <property name="frontend" value="epFrontEnd"/>
    <property name="recognizer" value="recognizer"/>
    <property name="showCreations" value="false"/>


    <!-- ******************************************************** -->
    <!-- word recognizer configuration                            -->
    <!-- ******************************************************** -->

    <component name="recognizer" type="edu.cmu.sphinx.recognizer.Recognizer">
        <property name="decoder" value="decoder"/>
        <propertylist name="monitors">
            <item>accuracyTracker </item>
            <item>speedTracker </item>
            <item>memoryTracker </item>
        </propertylist>
    </component>

    <!-- ******************************************************** -->
    <!-- The Decoder   configuration                              -->
    <!-- ******************************************************** -->

    <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
        <property name="searchManager" value="searchManager"/>
    </component>

    <component name="searchManager"
        type="edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager">
        <property name="logMath" value="logMath"/>
        <property name="linguist" value="flatLinguist"/>
        <property name="pruner" value="trivialPruner"/>
        <property name="scorer" value="threadedScorer"/>
        <property name="activeListFactory" value="activeList"/>
    </component>


    <component name="activeList"
             type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
        <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>

    <component name="trivialPruner"
                type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>

    <component name="threadedScorer"
                type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
        <property name="frontend" value="${frontend}"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The linguist  configuration                              -->
    <!-- ******************************************************** -->

    <component name="flatLinguist"
                type="edu.cmu.sphinx.linguist.flat.FlatLinguist">
        <property name="logMath" value="logMath"/>
        <property name="grammar" value="jsgfGrammar"/>
        <property name="acousticModel" value="wsj"/>
        <property name="wordInsertionProbability"
                value="${wordInsertionProbability}"/>
        <property name="languageWeight" value="${languageWeight}"/>
        <property name="unitManager" value="unitManager"/>
    </component>


    <!-- ******************************************************** -->
    <!-- The Grammar  configuration                               -->
    <!-- ******************************************************** -->

    <component name="jsgfGrammar" type="edu.cmu.sphinx.jsgf.JSGFGrammar">
        <property name="dictionary" value="dictionary"/>
        <property name="grammarLocation"
             value="resource:/edu/cmu/sphinx/demo/helloworld/"/>
        <property name="grammarName" value="hello"/>
    <property name="logMath" value="logMath"/>
    </component>


    <!-- ******************************************************** -->
    <!-- The Dictionary configuration                            -->
    <!-- ******************************************************** -->

    <component name="dictionary"
        type="edu.cmu.sphinx.linguist.dictionary.FastDictionary">
        <property name="dictionaryPath"
     value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/cmudict.0.6d"/>
        <property name="fillerPath"
     value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/noisedict"/>
        <property name="addSilEndingPronunciation" value="false"/>
        <property name="allowMissingWords" value="false"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The acoustic model configuration                         -->
    <!-- ******************************************************** -->
    <component name="wsj"
               type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
        <property name="loader" value="wsjLoader"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
        <property name="logMath" value="logMath"/>
        <property name="unitManager" value="unitManager"/>
        <property name="location" value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz"/>
    </component>


    <!-- ******************************************************** -->
    <!-- The unit manager configuration                           -->
    <!-- ******************************************************** -->

    <component name="unitManager"
        type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>

    <!-- ******************************************************** -->
    <!-- The frontend configuration                               -->
    <!-- ******************************************************** -->

    <component name="frontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
        <propertylist name="pipeline">
            <item>microphone </item>
            <item>preemphasizer </item>
            <item>windower </item>
            <item>fft </item>
            <item>melFilterBank </item>
            <item>dct </item>
            <item>liveCMN </item>
            <item>featureExtraction </item>
        </propertylist>
    </component>

    <!-- ******************************************************** -->
    <!-- The live frontend configuration                          -->
    <!-- ******************************************************** -->
    <component name="epFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
        <propertylist name="pipeline">
            <item>microphone </item>
            <item>dataBlocker </item>
            <item>speechClassifier </item>
            <item>speechMarker </item>
            <item>nonSpeechDataFilter </item>
            <item>preemphasizer </item>
            <item>windower </item>
            <item>fft </item>
            <item>melFilterBank </item>
            <item>dct </item>
            <item>liveCMN </item>
            <item>featureExtraction </item>
        </propertylist>
    </component>

    <!-- ******************************************************** -->
    <!-- The frontend pipelines                                   -->
    <!-- ******************************************************** -->

    <component name="dataBlocker" type="edu.cmu.sphinx.frontend.DataBlocker">
        <!--<property name="blockSizeMs" value="10"/>-->
    </component>

    <component name="speechClassifier"
               type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier">
        <property name="threshold" value="13"/>
    </component>

    <component name="nonSpeechDataFilter"
               type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>

    <component name="speechMarker"
               type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker" >
        <property name="speechTrailer" value="50"/>
    </component>


    <component name="preemphasizer"
               type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>

    <component name="windower"
               type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower">
    </component>

    <component name="fft"
            type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform">
    </component>

    <component name="melFilterBank"
        type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">
    </component>

    <component name="dct"
            type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>

    <component name="liveCMN"
               type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>

    <component name="featureExtraction"
               type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>

    <component name="microphone"
               type="edu.cmu.sphinx.frontend.util.Microphone">
        <property name="closeBetweenUtterances" value="false"/>
    </component>


    <!-- ******************************************************* -->
    <!--  monitors                                               -->
    <!-- ******************************************************* -->

    <component name="accuracyTracker"
                type="edu.cmu.sphinx.instrumentation.BestPathAccuracyTracker">
        <property name="recognizer" value="${recognizer}"/>
        <property name="showAlignedResults" value="true"/>
        <property name="showRawResults" value="true"/>
    </component>




    <component name="memoryTracker"
                type="edu.cmu.sphinx.instrumentation.MemoryTracker">
        <property name="recognizer" value="${recognizer}"/>
    <property name="showSummary" value="false"/>
    <property name="showDetails" value="false"/>
    </component>

    <component name="speedTracker"
                type="edu.cmu.sphinx.instrumentation.SpeedTracker">
        <property name="recognizer" value="${recognizer}"/>
        <property name="frontend" value="${frontend}"/>
    <property name="showSummary" value="true"/>
    <property name="showDetails" value="false"/>
    </component>


    <!-- ******************************************************* -->
    <!--  Miscellaneous components                               -->
    <!-- ******************************************************* -->

    <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
        <property name="logBase" value="1.0001"/>
        <property name="useAddTable" value="true"/>
    </component>

</config>
barryhunter
  • 20,886
  • 3
  • 30
  • 43
BhavikKama
  • 8,566
  • 12
  • 94
  • 164
  • Hey, i need to talk to you; i am a 15 year old newbie to java. I have installed sphinx and my helloworld works, but nothing else... if i do not include the helloworld.jar i cannot run a java file but if i include that jar, no matter what i type in the java file the output does not change. What to do? – Daksh Shah Jan 06 '14 at 22:54
  • there are some predefine words they put in dictionary file if you want to include some other words then u need to change the dictionary file you including in the config.xml – BhavikKama Jan 07 '14 at 05:23
  • Actually the thing is that what i am trying to edit at first is just the sentence it outputs, i found where that code was in my HelloWorld.java and i changed it but the output is still the same... can you please have a chat with me? – Daksh Shah Jan 07 '14 at 05:57
  • okie you can contact me on my fb Id...just give me ping after 9pm IST – BhavikKama Jan 07 '14 at 10:16
  • I am not on facebook; any other way to contact? – Daksh Shah Jan 07 '14 at 15:00
  • give me chat request here at tomorrow 2 pm... – BhavikKama Jan 08 '14 at 05:38
  • Can we chat now?? sorry got 1 hour late – Daksh Shah Jan 09 '14 at 09:38
  • http://chat.stackoverflow.com/rooms/44871/sphinx @Bhavik Kama – Daksh Shah Jan 09 '14 at 09:39

1 Answers1

2

Done!!Got Succeeded with the transcriber class of the sphinx4..

BhavikKama
  • 8,566
  • 12
  • 94
  • 164
  • if you have implemented the sphinx hello world demo for speech recognition.there is one another sample program sphinx provides name is transcriber..just got to this link and you will find it http://cmusphinx.sourceforge.net/sphinx4/#demos – BhavikKama Sep 24 '12 at 04:54