-1

The code is meant to extract data from PRAAT, whereby it should extract the data found in the wav file and textgrid that were used to annotate in PRAAT. The code should then output the information that is extracted into a csv file.

It has returned the following error, however, after debugging the issue, it seems to be that the extractor is not extracting any features and therefore the arrays are empty.

            from praatio import tgio
            from praatio import tgio
            from os.path import join
            from scipy.io import wavfile
            import subprocess
            import os
            import numpy as np
            import csv


            def extract_features(wavfile):
                """
                Extract features for an audio file.
                :param wavfile: Absolute path to a WAV file.
                :return: Praat emotion features
                """
                features_script_path = join(os.getcwd(),'features.praat')

                result = subprocess.check_output(['C:\Intel\Praat.exe',
                                                  '--run',
                                                  features_script_path, wavfile])
                result = result.decode("utf-8")
                result = result.split()
                features = np.asarray([float(val) for val in result])
                return features


            def get_snippet_features(wav_file_path, start_time, end_time):
                fs, data = wavfile.read(wav_file_path)
                start_time_sample = int(start_time*fs)
                end_time_sample = int(end_time*fs)

                temp_file_path = join(os.getcwd(), 'data', 'temp', 'temp.wav')
                wavfile.write(temp_file_path,rate=fs,data=data[start_time_sample:end_time_sample])

                features = extract_features(wavfile=temp_file_path)
                os.remove(temp_file_path)
                return features


            def get_textgrid_features(filename,normalize=True):
                file_path = join(os.getcwd(), 'data', filename)
                tg = tgio.openTextgrid(fnFullPath=file_path)

                wav_file_path = file_path.replace('.TextGrid','.wav')

                print(tg.tierNameList)

                code_switch_tier = tg.tierDict['Orthographic']
                print(code_switch_tier.entryList)

                orthographic_tier = tg.tierDict['CodeSwitch']
                print(orthographic_tier.entryList)

                por_tier = tg.tierDict['PointOfReference']
                print(por_tier.entryList)

                features = []
                labels = []
                for item in por_tier.entryList:
                    file_features = get_snippet_features(wav_file_path=wav_file_path,start_time=item.start,end_time=item.end)
                    labels.append(item.label)
                    features.append(file_features)

                # normalization
                if normalize:
                    mean = np.mean(features,axis=0)
                    std = np.std(features,axis=0)
                    features = (features-mean)/std

                return labels, features


            def generate_csv(labels,features,output_path):
                if os.path.isfile(output_path):
                    os.remove(output_path)

                with open(output_path,'w') as csvfile:
                    filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
                    filewriter.writerow(['CS_TYPE','PITCH_AVG','PITCH_DIR','PITCH_DYN','PITCH_JIT','INT_AVG','INT_DYN','INT_JIT','SPC_SLO','SPC_JIT'])
                    for index, label in enumerate(labels):
                        filewriter.writerow([label,
                                             features[index][0],
                                             features[index][1],
                                             features[index][2],
                                             features[index][3],
                                             features[index][4],
                                             features[index][5],
                                             features[index][6],
                                             features[index][7],
                                             features[index][8]])
                csvfile.close()


            labels, features = get_textgrid_features(filename='Ian.TextGrid',normalize=False)
            print(labels, features)
            generate_csv(labels=labels,features=features,output_path=join(os.getcwd(),'data','csv','ian.csv'))
  • 1
    Your code is pretty long, and without more clues (exact error trace, pointers, ..), it will be hard to help you out. What about len() ? – zarak May 02 '19 at 14:36
  • My last error trace is as follows: File "C:/Users/Warren/Desktop/example/praat_utils.py", line 96, in generate_csv(labels=labels,features=features,output_path=join(os.getcwd(),'data','csv','ian.csv')) File "C:/Users/Warren/Desktop/example/praat_utils.py", line 82, in generate_csv features[index][0], IndexError: index 0 is out of bounds for axis 0 with size 0 – Warren Farrugia May 02 '19 at 14:41
  • The error seems to indicate that it is coming from the array, however, when looking at what is being outputted till that point no features that I am trying to extract are returning. – Warren Farrugia May 02 '19 at 14:43

1 Answers1

0

Have similar problem and im just here for an answer.

Pio Sammut
  • 23
  • 1
  • 10