0

Attempted: Transcribing a small .wav file

Problem: Running into an error - details and code below. The code is a ditto of a sample at https://github.com/cmusphinx/sphinx4/blob/master/sphinx4-samples/src/main/java/edu/cmu/sphinx/demo/transcriber/TranscriberDemo.java

Not sure what is incorrect here. Totally stumped! Please help..

Error:

Exception in thread "main" java.lang.NullPointerException
at edu.cmu.sphinx.result.Lattice.<init>(Lattice.java:171)
at edu.cmu.sphinx.api.SpeechResult.<init>(SpeechResult.java:38)
at     edu.cmu.sphinx.api.AbstractSpeechRecognizer.getResult(AbstractSpeechRecognizer.java:61)
at com.microsoft.SpeechRecognitionSamples.TranscriberDemo.main(TranscriberDemo.java:43)

Source code:

package com.microsoft.SpeechRecognitionSamples;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.logging.Level;
import java.util.logging.Logger;

import edu.cmu.sphinx.api.Configuration;
import edu.cmu.sphinx.api.SpeechResult;
import edu.cmu.sphinx.api.StreamSpeechRecognizer;
import edu.cmu.sphinx.result.WordResult;



public class TranscriberDemo {

  public static void main(String[] args) throws Exception {
    try {
        System.out.println("Loading models...");

        Configuration configuration = new Configuration();

        // Load model from the jar
        configuration.setAcousticModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us");

        // You can also load model from folder
        // configuration.setAcousticModelPath("file:en-us");

        configuration.setDictionaryPath("resource:/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict");
        configuration.setLanguageModelPath("resource:/edu/cmu/sphinx/models/en-us/en-us.lm.bin");

        StreamSpeechRecognizer recognizer = new StreamSpeechRecognizer(configuration);
        InputStream stream = new FileInputStream(new File("C:\\Users\\vajra\\Desktop\\Projects\\whatstheweatherlike.wav"));


        stream.skip(44);

        // Simple recognition with generic model
        recognizer.startRecognition(stream);
        SpeechResult result;
        while ((result = recognizer.getResult()) != null) {

            System.out.format("Hypothesis: %s\n", result.getHypothesis());

            System.out.println("List of recognized words and their times:");
            for (WordResult r : result.getWords()) {
                System.out.println(r);
            }

            System.out.println("Best 3 hypothesis:");
            for (String s : result.getNbest(3))
                System.out.println(s);

        }
        recognizer.stopRecognition();



    } catch (IOException ex) {

        Logger.getLogger(TranscriberDemo.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println(ex);
    }
  }
}

Output:

Loading models...
18:22:39.036 INFO unitManager          CI Unit: *+NSN+
18:22:39.040 INFO unitManager          CI Unit: *+SPN+
18:22:39.040 INFO unitManager          CI Unit: AA
18:22:39.040 INFO unitManager          CI Unit: AE
18:22:39.040 INFO unitManager          CI Unit: AH
18:22:39.040 INFO unitManager          CI Unit: AO
18:22:39.040 INFO unitManager          CI Unit: AW
18:22:39.041 INFO unitManager          CI Unit: AY
18:22:39.041 INFO unitManager          CI Unit: B
18:22:39.041 INFO unitManager          CI Unit: CH
18:22:39.041 INFO unitManager          CI Unit: D
18:22:39.042 INFO unitManager          CI Unit: DH
18:22:39.042 INFO unitManager          CI Unit: EH
18:22:39.042 INFO unitManager          CI Unit: ER
18:22:39.042 INFO unitManager          CI Unit: EY
18:22:39.042 INFO unitManager          CI Unit: F
18:22:39.042 INFO unitManager          CI Unit: G
18:22:39.043 INFO unitManager          CI Unit: HH
18:22:39.043 INFO unitManager          CI Unit: IH
18:22:39.043 INFO unitManager          CI Unit: IY
18:22:39.043 INFO unitManager          CI Unit: JH
18:22:39.043 INFO unitManager          CI Unit: K
18:22:39.043 INFO unitManager          CI Unit: L
18:22:39.044 INFO unitManager          CI Unit: M
18:22:39.044 INFO unitManager          CI Unit: N
18:22:39.044 INFO unitManager          CI Unit: NG
18:22:39.044 INFO unitManager          CI Unit: OW
18:22:39.044 INFO unitManager          CI Unit: OY
18:22:39.044 INFO unitManager          CI Unit: P
18:22:39.045 INFO unitManager          CI Unit: R
18:22:39.045 INFO unitManager          CI Unit: S
18:22:39.045 INFO unitManager          CI Unit: SH
18:22:39.045 INFO unitManager          CI Unit: T
18:22:39.045 INFO unitManager          CI Unit: TH
18:22:39.045 INFO unitManager          CI Unit: UH
18:22:39.046 INFO unitManager          CI Unit: UW
18:22:39.046 INFO unitManager          CI Unit: V
18:22:39.046 INFO unitManager          CI Unit: W
18:22:39.046 INFO unitManager          CI Unit: Y
18:22:39.046 INFO unitManager          CI Unit: Z
18:22:39.046 INFO unitManager          CI Unit: ZH
18:22:39.917 INFO autoCepstrum         Cepstrum component auto-configured as follows: autoCepstrum {MelFrequencyFilterBank, Denoise, DiscreteCosineTransform2, Lifter}
18:22:39.941 INFO dictionary           Loading dictionary from: jar:file:/C:/Users/vajra/.m2/repository/edu/cmu/sphinx/sphinx4-data/5prealpha-SNAPSHOT/sphinx4-data-5prealpha-SNAPSHOT.jar!/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict
18:22:40.111 INFO dictionary           Loading filler dictionary from: jar:file:/C:/Users/vajra/.m2/repository/edu/cmu/sphinx/sphinx4-data/5prealpha-SNAPSHOT/sphinx4-data-5prealpha-SNAPSHOT.jar!/edu/cmu/sphinx/models/en-us/en-us/noisedict
18:22:40.112 INFO acousticModelLoader  Loading tied-state acoustic model from: jar:file:/C:/Users/vajra/.m2/repository/edu/cmu/sphinx/sphinx4-data/5prealpha-SNAPSHOT/sphinx4-data-5prealpha-SNAPSHOT.jar!/edu/cmu/sphinx/models/en-us/en-us
18:22:40.112 INFO acousticModelLoader  Pool means Entries: 16128
18:22:40.112 INFO acousticModelLoader  Pool variances Entries: 16128
18:22:40.112 INFO acousticModelLoader  Pool transition_matrices Entries: 42
18:22:40.112 INFO acousticModelLoader  Pool senones Entries: 5126
18:22:40.112 INFO acousticModelLoader  Gaussian weights: mixture_weights. Entries: 15378
18:22:40.112 INFO acousticModelLoader  Pool senones Entries: 5126
18:22:40.112 INFO acousticModelLoader  Context Independent Unit Entries: 42
18:22:40.113 INFO acousticModelLoader  HMM Manager: 137095 hmms
18:22:40.113 INFO acousticModel        CompositeSenoneSequences: 0
18:22:40.114 INFO trieNgramModel       Loading n-gram language model from: jar:file:/C:/Users/vajra/.m2/repository/edu/cmu/sphinx/sphinx4-data/5prealpha-SNAPSHOT/sphinx4-data-5prealpha-SNAPSHOT.jar!/edu/cmu/sphinx/models/en-us/en-us.lm.bin
18:22:41.581 INFO dictionary           The dictionary is missing a phonetic transcription for the word '3-d'
18:22:41.582 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word '3-d'
18:22:41.602 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'adjustors'
18:22:41.602 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'adjustors'
18:22:41.604 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'adulyadej'
18:22:41.604 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'adulyadej'
18:22:41.786 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'chloroflourocarbons'
18:22:41.786 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'chloroflourocarbons'
18:22:41.869 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'd�j�'
18:22:41.870 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'd�j�'
18:22:41.937 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'iife'
18:22:41.938 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'iife'
18:22:41.961 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'mm-hm'
18:22:41.961 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'mm-hm'
18:22:41.961 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'mm-hmm'
18:22:41.961 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'mm-hmm'
18:22:41.961 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'mmmm'
18:22:41.961 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'mmmm'
18:22:41.966 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'ngo's'
18:22:41.966 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'ngo's'
18:22:41.968 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'occured'
18:22:41.968 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'occured'
18:22:41.968 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'offical'
18:22:41.968 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'offical'
18:22:41.968 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'officals'
18:22:41.968 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'officals'
18:22:41.988 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'port_au_prince'
18:22:41.988 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'port_au_prince'
18:22:41.989 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'possiblity'
18:22:41.989 WARNING trieNgramModel    The dictionary is missing a phonetic transcription for the word 'possiblity'
18:22:42.032 WARNING trieNgramModel    Dictionary is missing 15 words that are contained in the language model.
18:22:42.992 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'occured'
18:22:42.994 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'iife'
18:22:42.994 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'port_au_prince'
18:22:43.000 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'd�j�'
18:22:43.001 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'mmmm'
18:22:43.002 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'offical'
18:22:43.005 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'possiblity'
18:22:43.011 INFO dictionary           The dictionary is missing a phonetic transcription for the word '3-d'
18:22:43.016 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'adulyadej'
18:22:43.029 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'ngo's'
18:22:43.029 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'mm-hm'
18:22:43.029 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'adjustors'
18:22:43.031 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'officals'
18:22:43.036 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'mm-hmm'
18:22:43.037 INFO dictionary           The dictionary is missing a phonetic transcription for the word 'chloroflourocarbons'
18:22:43.875 INFO lexTreeLinguist      Max CI Units 43
18:22:43.875 INFO lexTreeLinguist      Unit table size 79507
18:22:43.878 INFO speedTracker         # ----------------------------- Timers----------------------------------------
18:22:43.878 INFO speedTracker         # Name               Count   CurTime   MinTime   MaxTime   AvgTime   TotTime   
18:22:43.879 INFO speedTracker         Compile              1       1.8430s   1.8430s   1.8430s   1.8430s   1.8430s   
18:22:43.879 INFO speedTracker         Load AM              1       2.0140s   2.0140s   2.0140s   2.0140s   2.0140s   
18:22:43.879 INFO speedTracker         Load LM              1       1.9190s   1.9190s   1.9190s   1.9190s   1.9190s   
18:22:43.879 INFO speedTracker         Load Dictionary      1       0.1700s   0.1700s   0.1700s   0.1700s   0.1700s   
18:22:46.140 INFO speedTracker            This  Time Audio: 1.22s  Proc: 2.20s  Speed: 1.80 X real time
18:22:46.141 INFO speedTracker            Total Time Audio: 1.22s  Proc: 2.20s 1.80 X real time
18:22:46.141 INFO memoryTracker           Mem  Total: 630.50 Mb  Free: 220.90 Mb
18:22:46.141 INFO memoryTracker           Used: This: 409.60 Mb  Avg: 409.60 Mb  Max: 409.60 Mb
18:22:46.141 INFO trieNgramModel       LM Cache Size: 1730 Hits: 153479 Misses: 1730
Exception in thread "main" java.lang.NullPointerException
at edu.cmu.sphinx.result.Lattice.<init>(Lattice.java:171)
at edu.cmu.sphinx.api.SpeechResult.<init>(SpeechResult.java:38)
at edu.cmu.sphinx.api.AbstractSpeechRecognizer.getResult(AbstractSpeechRecognizer.java:61)
at com.microsoft.SpeechRecognitionSamples.TranscriberDemo.main(TranscriberDemo.java:43)

Maven POM:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0    </modelVersion>

  <groupId>com.microsoft.SpeechRecognitionSamples    </groupId>
  <artifactId>SpeechToTextCMUSphinx    </artifactId>
  <version>0.0.1-SNAPSHOT    </version>
  <packaging>jar    </packaging>

  <name>SpeechToTextCMUSphinx    </name>
  <url>http://maven.apache.org    </url>

  <properties>
    <project.build.sourceEncoding>UTF-8    </project.build.sourceEncoding>
  </properties>

  <repositories>
        <repository>
            <id>snapshots-repo    </id>
            <url>https://oss.sonatype.org/content/repositories/snapshots    </url>
            <releases>    <enabled>false    </enabled>    </releases>
        <snapshots>    <enabled>true    </enabled>    </snapshots>
        </repository>
    </repositories>

  <dependencies>
  <dependency>
  <groupId>edu.cmu.sphinx    </groupId>
  <artifactId>sphinx4-core    </artifactId>
  <version>5prealpha-SNAPSHOT    </version>
</dependency>

<dependency>
  <groupId>edu.cmu.sphinx    </groupId>
  <artifactId>sphinx4-data    </artifactId>
  <version>5prealpha-SNAPSHOT    </version>
</dependency>


    <dependency>
      <groupId>junit    </groupId>
      <artifactId>junit    </artifactId>
      <version>3.8.1    </version>
      <scope>test    </scope>
    </dependency>
  </dependencies>

  <build>
        <sourceDirectory>src/main/    </sourceDirectory>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins    </groupId>
                <artifactId>maven-shade-plugin    </artifactId>
                <version>2.3    </version>
                <executions>
                    <execution>
                        <phase>package    </phase>
                        <goals>
                            <goal>shade    </goal>
                        </goals>
                        <configuration>
                            <filters>
                                <filter>
                                    <artifact>*:*    </artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF    </exclude>
                                        <exclude>META-INF/*.DSA    </exclude>
                                        <exclude>META-INF/*.RSA    </exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <!-- Additional configuration. -->
                        </configuration>
                    </execution>

                </executions>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins    </groupId>
                <artifactId>maven-surefire-plugin    </artifactId>
                <version>2.18.1    </version>
                <configuration>
                    <useFile>false    </useFile>
                    <disableXmlReport>true    </disableXmlReport>
                    <!-- If you have classpath issue like NoDefClassError,... -->
                    <!-- useManifestOnlyJar>false    </useManifestOnlyJar -->
                    <includes>
                        <include>**/*Test.*    </include>
                        <include>**/*Suite.*    </include>
                    </includes>
                </configuration>
            </plugin>

        </plugins>
    </build>

</project>
Vajra
  • 471
  • 1
  • 4
  • 4
  • This issue has been fixed yesterday and new jars were published, you need to run the build and it will be resolved. You already asked your question here: https://sourceforge.net/p/cmusphinx/discussion/sphinx4/thread/229e578b – Nikolay Shmyrev May 30 '16 at 11:00
  • That was not me, but thanks for the fix Nikolay Shymrev! – Vajra Jun 01 '16 at 02:31

0 Answers0