I've been working on a streaming audio fingerprinter in Python, and experimenting with a couple different libraries/services (Gracenote, AcoustID, ACRCloud), but can't get anything to work.
My current code uses python-sounddevice to record raw audio through my microphone (or soundcard for less noise) in a float32 format, and fill a ring buffer, which I then read every 5 seconds, convert to PCM 16 bit, and feed into the acoustid fingerprinting function.
My code:
import sounddevice as sd
import time as time2
duration = 15
b = Buffer(duration * RATE)
RATE = 16000
run_time = 60
class Buffer(object):
def __init__(self, size, dtype=np.float32):
self.size = size
self.buf = np.zeros(self.size * 2, dtype=dtype)
self.i = 0
def extend(self, data):
if len(data.shape) > 1:
raise ValueError("data must be a flat array")
l = data.size
if l > self.size:
raise ValueError("data cannot be larger than size")
start = (self.i % self.size)
end = start + l
start_2 = start + self.size
end_2 = end + self.size
self.i += l
if end < self.buf.size:
self.buf[start:end] = data
if end_2 < self.buf.size:
self.buf[start_2:end_2] = data
def read(self):
start = (self.i % self.size)
end = start + self.size
return self.buf[start:end]
def float_to_16_bit_pcm(raw_floats):
floats = array.array('f', raw_floats)
samples = [sample * 32767 for sample in floats]
raw_ints = struct.pack("<%dh" % len(samples), *samples)
return raw_ints
def callback(indata, frames, time, status): #outdata is 5th - when no inputstream
global run_time
global i
if status:
print(status)
b.extend(indata.squeeze())
elapsed_time = time2.time()- start
if elapsed_time > duration and i % 50 == 0:
aud = b.read()
pcm16 = float_to_16_bit_pcm(aud)
fp = acoustid.fingerprint(16000, 1, pcm16)
response = acoustid.lookup(API_KEY, fp, 15)
print response
i += 1
with sd.InputStream(samplerate=16000, dtype= np.float32, channels=1, callback=callback):
sd.sleep(int(run_time*1000))
The response comes back with the error: {u'status': u'error', u'error': {u'message': u'invalid fingerprint', u'code': 3}}
I know that the creator of Chromaprint did mention that it is now possible to feed in a continuous audio stream to Chromaprint 1.4 (in C++) and get fingerprints here: https://oxygene.sk/2016/12/chromaprint-1-4-released/
Does anyone have any experience with this or advice to offer?
Thanks