I am trying to find the number of MAC operations needed for an inference task (speech to text conversion). I have used thop
before but it only works for pytorch
Modules. How do I find the number of operations done in the process_file
function?
import pvleopard
from thop import profile
handle = pvleopard.create(access_key='<your_key>')
audio_file = "audio.wav"
text = handle.process_file(audio_file)
get key from here
def process_file(self, audio_path: str) -> Tuple[str, Sequence[Word]]:
"""
Processes a given audio file and returns its transcription.
:param audio_path: Absolute path to the audio file. The file needs to have a sample rate equal to or greater
than `.sample_rate`. The supported formats are: `FLAC`, `MP3`, `Ogg`, `Opus`, `Vorbis`, `WAV`, and `WebM`.
:return: Inferred transcription and sequence of transcribed words and their associated metadata.
"""
if not os.path.exists(audio_path):
raise LeopardIOError("Could not find the audio file at `%s`" % audio_path)
c_transcript = c_char_p()
num_words = c_int32()
c_words = POINTER(self.CWord)()
status = self._process_file_func(
self._handle,
audio_path.encode(),
byref(c_transcript),
byref(num_words),
byref(c_words))
if status is not self.PicovoiceStatuses.SUCCESS:
if status is self.PicovoiceStatuses.INVALID_ARGUMENT:
if not audio_path.lower().endswith(self._VALID_EXTENSIONS):
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status](
"Specified file with extension '%s' is not supported" % pathlib.Path(audio_path).suffix
)
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]()
transcript = c_transcript.value.decode('utf-8')
self._transcript_delete_func(c_transcript)
words = list()
for i in range(num_words.value):
word = self.Word(
word=c_words[i].word.decode('utf-8'),
start_sec=c_words[i].start_sec,
end_sec=c_words[i].end_sec,
confidence=c_words[i].confidence)
words.append(word)
self._words_delete_func(c_words)
return transcript, words