In speech translation, I attempted to map an audio preprocessing function using librosa that was used mfcc and tensorflow.data.Dataset.from_tensor_slices.map, but there was an error. Please assist...
def encode_single_sample(wav_file, label):
###########################################
## Process the Audio
##########################################
# 1. Read wav file
signal, sr = librosa.load(wavs_path + wav_file, res_type='kaiser_fast')
mfccs = librosa.feature.mfcc(y=signal, n_mfcc=13, sr=sr)
delta_mfccs = librosa.feature.delta(mfccs)
delta2_mfccs = librosa.feature.delta(mfccs, order=2)
mfccs_features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs))
# 5. We only need the magnitude, which can be derived by applying tf.abs
spectrogram = tf.abs(mfccs_features)
spectrogram = tf.math.pow(spectrogram, 0.5)
# 6. normalisation
means = tf.math.reduce_mean(spectrogram, 1, keepdims=True)
stddevs = tf.math.reduce_std(spectrogram, 1, keepdims=True)
spectrogram = (spectrogram - means) / (stddevs + 1e-10)
###########################################
## Process the label
##########################################
# 7. Convert label to Lower case
label = tf.strings.lower(label)
# 8. Split the label
label = tf.strings.unicode_split(label, input_encoding="UTF-8")
# 9. Map the characters in label to numbers
label = char_to_num(label)
# 10. Return a dict as our model is expecting two inputs
return spectrogram, label
------------------------
batch_size = 32
#Define the trainig dataset
train_dataset = tf.data.Dataset.from_tensor_slices(
(list(trainSet_DF["file_name"]), list(trainSet_DF["Translation"]))
)
train_dataset = (
train_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)
.padded_batch(batch_size)
.prefetch(buffer_size=tf.data.AUTOTUNE)
)
TypeError: in user code:
File "C:\Users\AhmedPro\AppData\Local\Temp\ipykernel_12368\3502750540.py", line 6, in encode_single_sample *
signal, sr = librosa.load(wavs_path + wav_file, res_type='kaiser_fast')
File "C:\Users\AhmedPro\anaconda3\lib\site-packages\librosa\util\decorators.py", line 51, in inner_f *
return f(*args, **kwargs)
File "C:\Users\AhmedPro\anaconda3\lib\site-packages\librosa\core\audio.py", line 164, in load *
y, sr_native = __soundfile_load(path, offset, duration, dtype)
File "C:\Users\AhmedPro\anaconda3\lib\site-packages\librosa\core\audio.py", line 195, in __soundfile_load *
context = sf.SoundFile(path)
File "C:\Users\AhmedPro\anaconda3\lib\site-packages\soundfile.py", line 629, in __init__ **
self._file = self._open(file, mode_int, closefd)
File "C:\Users\AhmedPro\anaconda3\lib\site-packages\soundfile.py", line 1182, in _open
raise TypeError("Invalid file: {0!r}".format(self.name))
TypeError: Invalid file: <tf.Tensor 'add:0' shape=() dtype=string>
please, help Thanks.........