0

I am trying to find the number of MAC operations needed for an inference task (speech to text conversion). I have used thop before but it only works for pytorch Modules. How do I find the number of operations done in the process_file function?

import pvleopard
from thop import  profile
handle = pvleopard.create(access_key='<your_key>') 

audio_file = "audio.wav"
text = handle.process_file(audio_file)

get key from here

def process_file(self, audio_path: str) -> Tuple[str, Sequence[Word]]:
    """
    Processes a given audio file and returns its transcription.

    :param audio_path: Absolute path to the audio file. The file needs to have a sample rate equal to or greater
    than `.sample_rate`. The supported formats are: `FLAC`, `MP3`, `Ogg`, `Opus`, `Vorbis`, `WAV`, and `WebM`.
    :return: Inferred transcription and sequence of transcribed words and their associated metadata.
    """

    if not os.path.exists(audio_path):
        raise LeopardIOError("Could not find the audio file at `%s`" % audio_path)

    c_transcript = c_char_p()
    num_words = c_int32()
    c_words = POINTER(self.CWord)()
    status = self._process_file_func(
        self._handle,
        audio_path.encode(),
        byref(c_transcript),
        byref(num_words),
        byref(c_words))
    if status is not self.PicovoiceStatuses.SUCCESS:
        if status is self.PicovoiceStatuses.INVALID_ARGUMENT:
            if not audio_path.lower().endswith(self._VALID_EXTENSIONS):
                raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
                    "Specified file with extension '%s' is not supported" % pathlib.Path(audio_path).suffix
                )
        raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]()

    transcript = c_transcript.value.decode('utf-8')
    self._transcript_delete_func(c_transcript)

    words = list()
    for i in range(num_words.value):
        word = self.Word(
            word=c_words[i].word.decode('utf-8'),
            start_sec=c_words[i].start_sec,
            end_sec=c_words[i].end_sec,
            confidence=c_words[i].confidence)
        words.append(word)

    self._words_delete_func(c_words)

    return transcript, words
afsara_ben
  • 542
  • 1
  • 11
  • 30

0 Answers0