The code is meant to extract data from PRAAT, whereby it should extract the data found in the wav file and textgrid that were used to annotate in PRAAT. The code should then output the information that is extracted into a csv file.
It has returned the following error, however, after debugging the issue, it seems to be that the extractor is not extracting any features and therefore the arrays are empty.
from praatio import tgio
from praatio import tgio
from os.path import join
from scipy.io import wavfile
import subprocess
import os
import numpy as np
import csv
def extract_features(wavfile):
"""
Extract features for an audio file.
:param wavfile: Absolute path to a WAV file.
:return: Praat emotion features
"""
features_script_path = join(os.getcwd(),'features.praat')
result = subprocess.check_output(['C:\Intel\Praat.exe',
'--run',
features_script_path, wavfile])
result = result.decode("utf-8")
result = result.split()
features = np.asarray([float(val) for val in result])
return features
def get_snippet_features(wav_file_path, start_time, end_time):
fs, data = wavfile.read(wav_file_path)
start_time_sample = int(start_time*fs)
end_time_sample = int(end_time*fs)
temp_file_path = join(os.getcwd(), 'data', 'temp', 'temp.wav')
wavfile.write(temp_file_path,rate=fs,data=data[start_time_sample:end_time_sample])
features = extract_features(wavfile=temp_file_path)
os.remove(temp_file_path)
return features
def get_textgrid_features(filename,normalize=True):
file_path = join(os.getcwd(), 'data', filename)
tg = tgio.openTextgrid(fnFullPath=file_path)
wav_file_path = file_path.replace('.TextGrid','.wav')
print(tg.tierNameList)
code_switch_tier = tg.tierDict['Orthographic']
print(code_switch_tier.entryList)
orthographic_tier = tg.tierDict['CodeSwitch']
print(orthographic_tier.entryList)
por_tier = tg.tierDict['PointOfReference']
print(por_tier.entryList)
features = []
labels = []
for item in por_tier.entryList:
file_features = get_snippet_features(wav_file_path=wav_file_path,start_time=item.start,end_time=item.end)
labels.append(item.label)
features.append(file_features)
# normalization
if normalize:
mean = np.mean(features,axis=0)
std = np.std(features,axis=0)
features = (features-mean)/std
return labels, features
def generate_csv(labels,features,output_path):
if os.path.isfile(output_path):
os.remove(output_path)
with open(output_path,'w') as csvfile:
filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
filewriter.writerow(['CS_TYPE','PITCH_AVG','PITCH_DIR','PITCH_DYN','PITCH_JIT','INT_AVG','INT_DYN','INT_JIT','SPC_SLO','SPC_JIT'])
for index, label in enumerate(labels):
filewriter.writerow([label,
features[index][0],
features[index][1],
features[index][2],
features[index][3],
features[index][4],
features[index][5],
features[index][6],
features[index][7],
features[index][8]])
csvfile.close()
labels, features = get_textgrid_features(filename='Ian.TextGrid',normalize=False)
print(labels, features)
generate_csv(labels=labels,features=features,output_path=join(os.getcwd(),'data','csv','ian.csv'))