1

i want to cut an .wav file into multiple segments with the same length.

I found this code: https://gist.github.com/kylemcdonald/c8e62ef8cb9515d64df4

But its splitted into parts based on onset detection with librosa. I assume that the answer to my question is simple, but i would appreciate any help.

That's the code i used with Python 3.7.6 on Ubuntu (in conda):


import matplotlib
import matplotlib.pyplot as plt # For displaying the output
import librosa
import numpy as np # For some mathematical operations
from glob import glob # To grab files
import os



# Set directory for cutted files
save_dir = './cut_4s'

### Load the audio_file
data_dir = './' # Set Path, in this case it looks at the path where this python file is
audio_files = glob(data_dir + '/*.wav') # Grab audio files (.wav) in the data_dir
found = len(audio_files)
print("Audiofiles found: " + str(found))
input("Press Enter to continue...")

y, sr = librosa.load(audio_files[0])
length = librosa.get_duration(y=y, sr=sr) # Get the length of the file
time = np.arange(0, len(y)) / sr # Create the time array (timeline)
print(str(length))

# Plot audio over time
fig, ax = plt.subplots()
ax.plot(time, y)
ax.set(xlabel='Time (s)', ylabel='Sound Amplitude')
plt.show()

C = np.abs(librosa.cqt(y=y, sr=sr))
o_env = librosa.onset.onset_strength(sr=sr, S=librosa.amplitude_to_db(C, ref=np.max))
#o_env = librosa.onset.onset_strength(y, sr=sr, feature=librosa.cqt)
onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)

def prepare(y, sr=22050):
    y = librosa.to_mono(y)
    y = librosa.util.fix_length(y, sr) # 1 second of audio
    y = librosa.util.normalize(y)
    return y

def get_fingerprint(y, sr=22050):
    y = prepare(y, sr)
    cqt = librosa.cqt(y, sr=sr, hop_length=2048)
    return cqt.flatten('F')

def normalize(x):
    x -= x.min(axis=0)
    x /= x.max(axis=0)
    return x

def basename(file):
    file = os.path.basename(file)
    return os.path.splitext(file)[0]

vectors = []
words = []
filenames = []

onset_samples = list(librosa.frames_to_samples(onset_frames))
onset_samples = np.concatenate(onset_samples, len(y))
starts = onset_samples[0:-1]
stops = onset_samples[1:]

samples_folder = os.path.join(data_dir, 'samples')
try:
    os.makedirs(samples_folder)
except:
    pass

for i, (start, stop) in enumerate(zip(starts, stops)):
    audio = y[start:stop]
    filename = os.path.join(samples_folder, str(i) + '.wav')
    librosa.output.write_wav(filename, audio, sr)
    vector = get_fingerprint(audio, sr=sr)
    word = basename(filename)
    vectors.append(vector)
    words.append(word)
    filenames.append(filename)
np.savetxt(os.path.join(save_dir, 'vectors'), vectors, fmt='%.5f', delimiter='\t')
np.savetxt(os.path.join(save_dir, 'words'), words, fmt='%s')
np.savetxt(os.path.join(save_dir, 'filenames.txt'), filenames, fmt='%s')```

sre
  • 31
  • 5
  • Are you asking for same lengths onsets based on detected onsets or just a simple split? – Lukasz Tracewski Mar 06 '20 at 16:32
  • I don't want any onset detection at all, i just want to cut the .wav into segments with the same length, e.g.: I have a 60sec file and want to cut it into 12 parts of 5sec. – sre Mar 07 '20 at 10:13
  • Once you read the audio in, it's just an array. If e.g. you wave has sampling frequency of 16000 Hz and 3s, you will have `3 * 16000 = 48000` numbers in the array (assuming it's mono). Now you can simply split the array. Try that. If things won't work, put your code here instead of what you have copied from Kyle. BTW, you might want to read on `numpy` is slicing arrays. – Lukasz Tracewski Mar 07 '20 at 13:53

0 Answers0