1

I am using python (pydub) and I am trying to split an audio file when the sound changes and the sound is uniform. I really hate to post entire code but I have tried multiple options but without any success. In this method, it cuts but it will generate either part of the sound or avoid it and the sounds' length varies in the same sound file. You may found the first sound for 3 sec and the second sound for 5 seconds and so on. Kindly help me.

from pydub import AudioSegment
from pydub.silence import split_on_silence
import threading
import time


r = sr.Recognizer()

def worker(audio, silenceLen, silenceThresh, **kwargs):
    
    chunk = split_on_silence (audio, min_silence_len = silenceLen, silence_thresh = silenceThresh, keep_silence=keepSilence)
    
    if chunk != []:
        chunks.append({'Chunk':chunk,'SilenceThresh': silenceThresh, 'SilenceLen': silenceLen, 'keepSilence': keepSilence})

    threads.pop()
    return



def match_target_amplitude(aChunk, target_dBFS):
    ''' Normalize given audio chunk '''
    change_in_dBFS = target_dBFS - aChunk.dBFS
    return aChunk.apply_gain(change_in_dBFS)

audio = AudioSegment.from_wav("Audio/Sample001/001/001000.wav")


threads = []

chunks = []

# My PC specs
max_num = 20
# EDIT: Forgot to add this 
keepSilence = 100

# Testing for better parameters
for silenceLen in range(1, 1000): # Can be changed
    if len(chunks) >= max_num:
            break
    for silenceThresh in range(-30, 30): # Can be changed
        t = threading.Thread(target=worker, kwargs={'audio': audio, 'silenceThresh': silenceThresh, 'silenceLen': silenceLen, 'keepSilence' : keepSilence})
        threads.append(t)
        t.start()
        while len(threads) == 50:
            print('Waiting')
            time.sleep(3)
        
        if len(chunks) >= max_num:
            break

# Saving the slices            
for data in chunks:
    num = 0
    for i, chunk in enumerate(data['Chunk']):
        silence_chunk = AudioSegment.silent(duration=0)
        audio_chunk = silence_chunk + chunk + silence_chunk
        normalized_chunk = match_target_amplitude(audio_chunk, -10.0)
        normalized_chunk.export(
            "Audio/Sample001/001/chunk/{}.wav".format(str(i + num).zfill(3)),
            bitrate = "192k",
            format = "wav"
        )
    
    num+=1

0 Answers0