1

Im in the process of creating a subtitle generator for generic videos. One of the major blockers is getting the timestamp for each word to align with the video, which is kinda killing me at the moment. The result class has a getTimedBestResult() function which is supposed to return something in the form of word(timestamp) eg: the(0:20-0:22). at the moment when i call it i get an output, something in the terms of : "WordToken one null" where one is the word being recognized, null is supposed to be the timestamp. Ive read through the documentation and could not arrive at a solution as of now.

String resultText = result.getTimedBestResult(false,false);

I use the hub4 models and the CMUdict0.6, if thats of any importance, with wordPruningBreadthFirstSearchManager (lexTree linguist). I dont know if im doing anything wrong in the config.xml,

<?xml version="1.0" encoding="UTF-8"?>
<!-- ********************************************************
     Sphinx-4 Configuration file
     ******************************************************** 
-->
<config>
    <!-- ******************************************************** -->
    <!-- frequently tuned properties -->
    <!-- ******************************************************** -->
    <property name="absoluteBeamWidth" value="10000"/>
    <property name="relativeBeamWidth" value="1E-80"/>
    <property name="absoluteWordBeamWidth" value="20"/>
    <property name="relativeWordBeamWidth" value="1E-60"/>
    <property name="wordInsertionProbability" value="0.2"/>
    <property name="languageWeight" value="10.5"/>
    <property name="silenceInsertionProbability" value=".05"/>
    <property name="frontend" value="epFrontEnd"/>
    <property name="recognizer" value="recognizer"/>
    <property name="showCreations" value="false"/>
    <config>
        <property name="logLevel" value="SEVERE"/>
    </config>

    <!-- ************************************************** -->
    <!-- Batch mode -->
    <!-- ************************************************** -->
  <component name="batch" type="edu.cmu.sphinx.tools.batch.BatchModeRecognizer">
        <propertylist name="inputDataProcessors">
            <item>audioFileDataSource</item>
        </propertylist>
        <property name="skip" value="0"/>
        <property name="recognizer" value="${recognizer}"/>
    </component> 

    <!-- ******************************************************** -->
    <!-- word recognizer configuration -->
    <!-- ******************************************************** -->
    <component name="recognizer" type="edu.cmu.sphinx.recognizer.Recognizer">
        <property name="decoder" value="decoder"/>
        <propertylist name="monitors">
            <item>accuracyTracker </item>
            <item>speedTracker </item>
            <item>memoryTracker </item>
            <item>recognizerMonitor </item>
        </propertylist>
    </component>

    <!-- ******************************************************** -->
    <!-- The Decoder configuration -->
    <!-- ******************************************************** -->
    <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
        <property name="searchManager" value="wordPruningSearchManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Search Manager -->
    <!-- ******************************************************** -->
    <component name="wordPruningSearchManager" type="edu.cmu.sphinx.decoder.search.WordPruningBreadthFirstSearchManager">
        <property name="logMath" value="logMath"/>
        <property name="linguist" value="lexTreeLinguist"/>
        <property name="pruner" value="trivialPruner"/>
        <property name="scorer" value="threadedScorer"/>
        <property name="activeListManager" value="activeListManager"/>
        <property name="growSkipInterval" value="0"/>
        <property name="checkStateOrder" value="false"/>
        <property name="buildWordLattice" value="false"/>
        <property name="maxLatticeEdges" value="3"/>
        <property name="acousticLookaheadFrames" value="1.7"/>
        <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Active Lists -->
    <!-- ******************************************************** -->
    <component name="activeListManager" type="edu.cmu.sphinx.decoder.search.SimpleActiveListManager">
        <propertylist name="activeListFactories">
            <item>standardActiveListFactory</item>
            <item>wordActiveListFactory</item>
            <item>wordActiveListFactory</item>
            <item>standardActiveListFactory</item>
            <item>standardActiveListFactory</item>
            <item>standardActiveListFactory</item>
        </propertylist>
    </component>

    <component name="standardActiveListFactory" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
        <property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
    </component>

    <component name="wordActiveListFactory" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
        <property name="logMath" value="logMath"/>
        <property name="absoluteBeamWidth" value="${absoluteWordBeamWidth}"/>
        <property name="relativeBeamWidth" value="${relativeWordBeamWidth}"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Pruner -->
    <!-- ******************************************************** -->
    <component name="trivialPruner" type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>

    <!-- ******************************************************** -->
    <!-- The Scorer -->
    <!-- ******************************************************** -->
    <component name="threadedScorer" type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
        <property name="frontend" value="${frontend}"/>
        <property name="isCpuRelative" value="false"/>
        <property name="numThreads" value="10"/>
        <property name="minScoreablesPerThread" value="10"/>
        <property name="scoreablesKeepFeature" value="false"/>
    </component>
    <!-- ******************************************************** -->
    <!-- The linguist configuration -->
    <!-- ******************************************************** -->
    <component name="lexTreeLinguist" type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
        <property name="logMath" value="logMath"/>
        <property name="acousticModel" value="wsj"/>
        <property name="languageModel" value="language"/>
        <property name="dictionary" value="dict"/>
        <property name="addFillerWords" value="false"/>
        <property name="fillerInsertionProbability" value="1E-10"/>
        <property name="generateUnitStates" value="true"/>
        <property name="wantUnigramSmear" value="true"/>
        <property name="unigramSmearWeight" value="1"/>
        <property name="wordInsertionProbability" value="${wordInsertionProbability}"/>
        <property name="silenceInsertionProbability" value="${silenceInsertionProbability}"/>
        <property name="languageWeight" value="${languageWeight}"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Dictionary configuration -->
    <!-- ******************************************************** -->
    <component name="dict" type="edu.cmu.sphinx.linguist.dictionary.FullDictionary">
        <property name="dictionaryPath" value="file:///C:/Users/Asgard/Documents/workspace/AsgardGit/asgard/dict/cmudict.06d.dict"/> 
        <property name="fillerPath"     value="file:///C:/Users/Asgard/Documents/workspace/AsgardGit/asgard/dict/filler.filler"/>
        <property name="addSilEndingPronunciation" value="false"/>
        <property name="wordReplacement" value="&lt;sil&gt;"/>
        <property name="allowMissingWords" value="false"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <!-- ******************************************************** -->
    <!-- The Language Model configuration -->
    <!-- ******************************************************** -->

    <component name="language" type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
        <property name="logMath" value="logMath"/>
        <property name="maxDepth" value="3"/>
        <property name="unigramWeight" value=".5"/>
        <property name="dictionary" value="dict"/>
        <property name="location" value="file:///C:/Users/Asgard/Documents/workspace/lang/language_model.arpaformat.DMP"/>
    </component>


    <!-- ******************************************************** -->
    <!-- The acoustic model configuration-->
    <!-- ******************************************************** -->
    <component name="wsj" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
        <property name="loader" value="wsjLoader"/>
        <property name="unitManager" value="unitManager"/>
    </component>

    <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
        <property name="logMath" value="logMath"/>
        <property name="unitManager" value="unitManager"/>
        <property name="location" value="file:///C:/Users/Asgard/Documents/workspace/acc/hub4_cd_continuous_8gau_1s_c_d_dd/"/>
    </component>


    <!-- ******************************************************** -->
    <!-- The unit manager configuration -->
    <!-- ******************************************************** -->
    <component name="unitManager" type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>
    <!-- ******************************************************** -->
    <!-- The frontend configuration -->
    <!-- ******************************************************** -->
    <component name="epFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
        <propertylist name="pipeline">
            <item>audioFileDataSource </item>
            <item>dataBlocker </item>
            <item>speechClassifier </item>
            <item>speechMarker </item>
            <item>nonSpeechDataFilter </item>
            <item>preemphasizer </item>
            <item>windower </item>
            <item>fft </item>
            <item>melFilterBank </item>
            <item>dct </item>
            <item>liveCMN </item>
            <item>featureExtraction </item>
        </propertylist>
    </component>
    <component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>

    <component name="dataBlocker" type="edu.cmu.sphinx.frontend.DataBlocker"/>

    <component name="speechClassifier" type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier"/>

    <component name="nonSpeechDataFilter" 
               type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>

    <component name="speechMarker" type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker" />

    <component name="preemphasizer"
               type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>

    <component name="windower" 
               type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower">
    </component>

    <component name="fft" 
               type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform">
    </component>

    <component name="melFilterBank" 
               type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">
    </component>

    <component name="dct" 
               type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>

    <component name="liveCMN" 
               type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>

    <component name="featureExtraction" 
               type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>


    <!-- ******************************************************* -->
    <!-- monitors -->
    <!-- ******************************************************* -->
    <component name="accuracyTracker" type="edu.cmu.sphinx.instrumentation.BestConfidenceAccuracyTracker">
        <property name="confidenceScorer" value="confidenceScorer"/>
        <property name="recognizer" value="${recognizer}"/>
        <property name="showRawResults" value="true"/>
        <property name="showAlignedResults" value="true"/>
    </component>

    <component name="confidenceScorer" type="edu.cmu.sphinx.result.SausageMaker"/>

    <component name="memoryTracker" type="edu.cmu.sphinx.instrumentation.MemoryTracker">
        <property name="recognizer" value="${recognizer}"/>
        <property name="showDetails" value="false"/>
        <property name="showSummary" value="false"/>
    </component>

    <component name="speedTracker" type="edu.cmu.sphinx.instrumentation.SpeedTracker">
        <property name="recognizer" value="${recognizer}"/>
        <property name="frontend" value="${frontend}"/>
        <property name="showDetails" value="false"/>
    </component>

    <component name="recognizerMonitor" type="edu.cmu.sphinx.instrumentation.RecognizerMonitor">
        <property name="recognizer" value="${recognizer}"/>
        <propertylist name="allocatedMonitors">
            <item>configMonitor</item>
        </propertylist>
    </component>

    <component name="configMonitor" type="edu.cmu.sphinx.instrumentation.ConfigMonitor">
        <property name="showConfig" value="false"/>
    </component>


    <!-- ******************************************************* -->
    <!-- Miscellaneous components -->
    <!-- ******************************************************* -->
    <component name="logMath" type="edu.cmu.sphinx.util.LogMath">
        <property name="logBase" value="1.0001"/>
        <property name="useAddTable" value="true"/>
    </component>
</config>

Thank you for your replies

  • I answered you already on forum, it's hard to answer same questions in multiple places so it's better to avoid crossposting. https://sourceforge.net/p/cmusphinx/discussion/sphinx4/thread/0a44039c/?limit=25#f5be – Nikolay Shmyrev Feb 15 '14 at 09:49
  • yea, i know, niko :) i had asked it here before posting it on the forums. ikr what was i thinking – Navaneeth Krishnan Feb 16 '14 at 12:57

0 Answers0