I've been trying to learn audio classification with the tensorflow implementation but I am running into an error when I am testing it on another dataset.
Code:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import glob
import os
filenames = tf.io.gfile.glob("C:/Users/Natha/Downloads/voice_emotions/audio_speech_actors_01-24/*/*")
filenames = tf.random.shuffle(filenames)
print("Number of files: ", len(filenames))
print("Sample file: ", filenames[0])
train_files = filenames[:1200]
val_files = filenames[1200:1300]
test_files = filenames[1300:]
def decode_audio(audio_binary):
audio, _ = tf.audio.decode_wav(audio_binary)
return tf.squeeze(audio, axis=-1)
def get_label(file_path):
parts = tf.strings.split(file_path, "-")
return parts[-4]
def get_waveform_and_label(file_path):
label = get_label(file_path)
audio_binary = tf.io.read_file(file_path)
waveform = decode_audio(audio_binary)
return waveform, label
AUTOTUNE = tf.data.AUTOTUNE
files_ds = tf.data.Dataset.from_tensor_slices(train_files)
waveform_ds = files_ds.map(get_waveform_and_label, num_parallel_calls=AUTOTUNE)
rows = 3
cols = 3
n = rows * cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 12))
for i, (audio, label) in enumerate(waveform_ds.take(n)):
r = i // cols
c = i % cols
ax = axes[r][c]
ax.plot(audio.numpy())
ax.set_yticks(np.arange(-1.2, 1.2, 0.2))
label = label.numpy().decode('utf-8')
ax.set_title(label)
plt.show()
Error:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-108-8a7064627ebf> in <module>
3 n = rows * cols
4 fig, axes = plt.subplots(rows, cols, figsize=(10, 12))
----> 5 for i, (audio, label) in enumerate(waveform_ds.take(n)):
6 r = i // cols
7 c = i % cols
~\anaconda3\lib\site-packages\tensorflow\python\data\ops\iterator_ops.py in __next__(self)
759 def __next__(self):
760 try:
--> 761 return self._next_internal()
762 except errors.OutOfRangeError:
763 raise StopIteration
~\anaconda3\lib\site-packages\tensorflow\python\data\ops\iterator_ops.py in _next_internal(self)
742 # to communicate that there is no more data to iterate over.
743 with context.execution_mode(context.SYNC):
--> 744 ret = gen_dataset_ops.iterator_get_next(
745 self._iterator_resource,
746 output_types=self._flat_output_types,
~\anaconda3\lib\site-packages\tensorflow\python\ops\gen_dataset_ops.py in iterator_get_next(iterator, output_types, output_shapes, name)
2725 return _result
2726 except _core._NotOkStatusException as e:
-> 2727 _ops.raise_from_not_ok_status(e, name)
2728 except _core._FallbackException:
2729 pass
~\anaconda3\lib\site-packages\tensorflow\python\framework\ops.py in raise_from_not_ok_status(e, name)
6939 message = e.message + (" name: " + name if name is not None else "")
6940 # pylint: disable=protected-access
-> 6941 six.raise_from(core._status_to_exception(e.code, message), None)
6942 # pylint: enable=protected-access
6943
~\anaconda3\lib\site-packages\six.py in raise_from(value, from_value)
InvalidArgumentError: Data too short when trying to read string
[[{{node DecodeWav}}]] [Op:IteratorGetNext]
I've made sure that all items in the dataset are all the right size but the error might still be the dataset. I've seen many other people get this error but none of their solutions helped.
Versions: Tensorflow: 2.6.0 Dataset: https://www.kaggle.com/uwrfkaggler/ravdess-emotional-speech-audio