I am trying to create a simple Pyqt5 GUI for Windows 10 that uses OpenAI's model Whisper to transcribe a sound file and outputting the results in an Excel-file. It works on my own computer where I have installed the necessary dependencies for Whisper as stated on their github i.e. FFMEG. I provide a minimal example of my code below:
# Import library
import whisper
import os
from PyQt5 import QtCore, QtGui, QtWidgets
import pandas as pd
import xlsxwriter
class Ui_Dialog(QtWidgets.QDialog):
# Define functions to use in GUI
# Define function for selecting input files
def browsefiles(self, Dialog):
# Make Dialog box and save files into tuple of paths
files = QtWidgets.QFileDialog().getOpenFileNames(self, "Select soundfiles", os.getcwd(), "lyd(*mp2 *.mp3 *.mp4 *.m4a *wma *wav)")
self.liste = []
for url in range(len(files[0])):
self.liste.append(files[0][url])
def model_load(self, Dialog):
# Load picked model
self.model = whisper.load_model(r'C:\Users\Søren\Downloads\Whisper_gui\models' + "\\" + self.combo_modelSize.currentText() + ".pt") ##the path is set to where the models are on the other machine
def run(self, Dialog):
# Make list for sound files
liste_df = []
# Running loop for interpreting and encoding sound files
for url in range(len(self.liste)):
# Make dataframe
df = pd.DataFrame(columns=["filename", "start", "end", "text"])
# Run model
result = self.model.transcribe(self.liste[url])
# Extract results
for i in range(len(result["segments"])):
start = result["segments"][i]["start"]
end = result["segments"][i]["end"]
text = result["segments"][i]["text"]
df = df.append({"filename": self.liste[url].split("/")[-1],
"start": start,
"end": end,
"text": text}, ignore_index=True)
# Add detected language to dataframe
df["sprog"] = result["language"]
liste_df.append(df)
# Make excel output
# Concatenate list of dfs
dataframe = pd.concat(liste_df)
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(self.liste[0].split(".")[0] + '_OUTPUT.xlsx', engine='xlsxwriter')
writer_wrap_format = writer.book.add_format({"text_wrap": True, 'num_format': '@'})
# Write the dataframe data to XlsxWriter. Turn off the default header and
# index and skip one row to allow us to insert a user defined header.
dataframe.to_excel(writer, sheet_name="Output", startrow=1, header=False, index=False)
# Get the xlsxwriter workbook and worksheet objects.
#workbook = writer.book
worksheet = writer.sheets["Output"]
# Get the dimensions of the dataframe.
(max_row, max_col) = dataframe.shape
# Create a list of column headers, to use in add_table().
column_settings = [{'header': column} for column in dataframe.columns]
# Add the Excel table structure. Pandas will add the data.
worksheet.add_table(0, 0, max_row, max_col - 1, {'columns': column_settings})
# Make the columns wider for clarity.
worksheet.set_column(0, max_col - 1, 12)
in_col_no = xlsxwriter.utility.xl_col_to_name(dataframe.columns.get_loc("text"))
worksheet.set_column(in_col_no + ":" + in_col_no, 30, writer_wrap_format)
# Close the Pandas Excel writer and output the Excel file.
writer.save()
writer.close()
## Design setup
def setupUi(self, Dialog):
Dialog.setObjectName("Dialog")
Dialog.resize(730, 400)
self.select_files = QtWidgets.QPushButton(Dialog)
self.select_files.setGeometry(QtCore.QRect(40, 62, 81, 31))
font = QtGui.QFont()
font.setPointSize(6)
self.select_files.setFont(font)
self.select_files.setObjectName("select_files")
self.combo_modelSize = QtWidgets.QComboBox(Dialog)
self.combo_modelSize.setGeometry(QtCore.QRect(40, 131, 100, 21))
font = QtGui.QFont()
font.setPointSize(6)
self.combo_modelSize.setFont(font)
self.combo_modelSize.setObjectName("combo_modelSize")
self.runButton = QtWidgets.QPushButton(Dialog)
self.runButton.setGeometry(QtCore.QRect(40, 289, 71, 21))
font = QtGui.QFont()
font.setPointSize(6)
self.runButton.setFont(font)
self.runButton.setObjectName("runButton")
self.retranslateUi(Dialog)
QtCore.QMetaObject.connectSlotsByName(Dialog)
modelSize_options = ['Chose model', 'tiny', 'base', 'small', 'medium', 'large']
self.combo_modelSize.addItems(modelSize_options)
# Do an action!
self.select_files.clicked.connect(self.browsefiles)
self.combo_modelSize.currentIndexChanged.connect(self.model_load)
self.runButton.clicked.connect(self.run)
def retranslateUi(self, Dialog):
_translate = QtCore.QCoreApplication.translate
Dialog.setWindowTitle(_translate("Dialog", "Dialog"))
self.runButton.setText(_translate("Dialog", "Go!"))
self.select_files.setText(_translate("Dialog", "Select"))
if __name__ == "__main__":
import sys
app = QtWidgets.QApplication(sys.argv)
Dialog = QtWidgets.QDialog()
ui = Ui_Dialog()
ui.setupUi(Dialog)
Dialog.show()
sys.exit(app.exec_())
I compile this app with pyinstaller using the following code. I had some issues to begin with so I found other with similar problems and ended up with this:
pyinstaller --onedir --hidden-import=pytorch --collect-data torch --copy-metadata torch --copy-metadata tqdm --copy-metadata tokenizers --copy-metadata importlib_metadata --hidden-import="sklearn.utils._cython_blas" --hidden-import="sklearn.neighbors.typedefs" --hidden-import="sklearn.neighbors.quad_tree" --hidden-import="sklearn.tree" --hidden-import="sklearn.tree._utils" --copy-metadata regex --copy-metadata requests --copy-metadata packaging --copy-metadata filelock --copy-metadata numpy --add-data "./ffmpeg/*;./ffmpeg/" --hidden-import=whisper --copy-metadata whisper --collect-data whisper minimal_example_whisper.py
When I take the outputtet dist directory and try to run the app on another Windows machine without FFMPEG installed (or Whisper or any other things), I get the following error from the terminal as I push the "run" button in the app (otherwise the app does run).
C:\Users\Søren>"G:\minimal_example_whisper\minimal_example_whisper.exe"
whisper\transcribe.py:70: UserWarning: FP16 is not supported on CPU; using FP32 instead
Traceback (most recent call last):
File "minimal_example_whisper.py", line 45, in run
File "whisper\transcribe.py", line 76, in transcribe
File "whisper\audio.py", line 111, in log_mel_spectrogram
File "whisper\audio.py", line 42, in load_audio
File "ffmpeg\_run.py", line 313, in run
File "ffmpeg\_run.py", line 284, in run_async
File "subprocess.py", line 951, in __init__
File "subprocess.py", line 1420, in _execute_child
FileNotFoundError: [WinError 2] Den angivne fil blev ikke fundet
I suspect this has something to do with FFMPEG not being installed on the other machines system? Does anyone have an automatic solution for this when compiling the app or can it simply only run on machines that has FFMPEG installed?
Thanks in advance!