1

In speech translation, I attempted to map an audio preprocessing function using librosa that was used mfcc and tensorflow.data.Dataset.from_tensor_slices.map, but there was an error. Please assist...

def encode_single_sample(wav_file, label):
    ###########################################
    ##  Process the Audio
    ##########################################
    # 1. Read wav file
    signal, sr = librosa.load(wavs_path + wav_file, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=signal, n_mfcc=13, sr=sr)
    delta_mfccs = librosa.feature.delta(mfccs)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    mfccs_features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs))
    # 5. We only need the magnitude, which can be derived by applying tf.abs
    spectrogram = tf.abs(mfccs_features)
    spectrogram = tf.math.pow(spectrogram, 0.5)
    # 6. normalisation
    means = tf.math.reduce_mean(spectrogram, 1, keepdims=True)
    stddevs = tf.math.reduce_std(spectrogram, 1, keepdims=True)
    spectrogram = (spectrogram - means) / (stddevs + 1e-10)
    ###########################################
    ##  Process the label
    ##########################################
    # 7. Convert label to Lower case
    label = tf.strings.lower(label)
    # 8. Split the label
    label = tf.strings.unicode_split(label, input_encoding="UTF-8")
    # 9. Map the characters in label to numbers
    label = char_to_num(label)
    # 10. Return a dict as our model is expecting two inputs
    return spectrogram, label
    ------------------------
    batch_size = 32
   #Define the trainig dataset
   train_dataset = tf.data.Dataset.from_tensor_slices(
            (list(trainSet_DF["file_name"]), list(trainSet_DF["Translation"]))
   )
   train_dataset = (
      train_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)
      .padded_batch(batch_size)
      .prefetch(buffer_size=tf.data.AUTOTUNE)
    )

TypeError: in user code:

    File "C:\Users\AhmedPro\AppData\Local\Temp\ipykernel_12368\3502750540.py", line 6, in encode_single_sample  *
        signal, sr = librosa.load(wavs_path + wav_file, res_type='kaiser_fast')
    File "C:\Users\AhmedPro\anaconda3\lib\site-packages\librosa\util\decorators.py", line 51, in inner_f  *
        return f(*args, **kwargs)
    File "C:\Users\AhmedPro\anaconda3\lib\site-packages\librosa\core\audio.py", line 164, in load  *
        y, sr_native = __soundfile_load(path, offset, duration, dtype)
    File "C:\Users\AhmedPro\anaconda3\lib\site-packages\librosa\core\audio.py", line 195, in __soundfile_load  *
        context = sf.SoundFile(path)
    File "C:\Users\AhmedPro\anaconda3\lib\site-packages\soundfile.py", line 629, in __init__  **
        self._file = self._open(file, mode_int, closefd)
    File "C:\Users\AhmedPro\anaconda3\lib\site-packages\soundfile.py", line 1182, in _open
        raise TypeError("Invalid file: {0!r}".format(self.name))

    TypeError: Invalid file: <tf.Tensor 'add:0' shape=() dtype=string>

please, help Thanks.........

0 Answers0