3

I'm trying to play an audio stream with ffmpeg and opensles in android. And the problem seems to be when passing the decoded and resampled frames from ffmpeg to opensles as the sound I can hear sounds robotic and has scratching.

The decoded frames from ffmpeg:

PCM
48000 Hz
S16p

Opensles needs in this case:

PCM
48000 Hz
S16

Opensles setup:

SLDataLocator_AndroidSimpleBufferQueue loc_bufq = {SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE, 255};

SLDataFormat_PCM format_pcm = { SL_DATAFORMAT_PCM, 2 , SL_SAMPLINGRATE_48, SL_PCMSAMPLEFORMAT_FIXED_16, SL_PCMSAMPLEFORMAT_FIXED_16,
                SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT, SL_BYTEORDER_LITTLEENDIAN};

SLDataSource audioSrc = {&loc_bufq, &format_pcm};

This is the pseudocode for resampling and enqueueing to opensles:

#define OPENSLES_BUFLEN 10
#define MAX_AUDIO_FRAME_SIZE 192000

DECLARE_ALIGNED(16,uint8_t,audio_buffer)[MAX_AUDIO_FRAME_SIZE * OPENSLES_BUFLEN];


int decode_audio(AVCodecContext * ctx, SwrContext *swr_context, AVPacket *packet, AVFrame * frame){
    int got_frame_ptr;
    int len = avcodec_decode_audio4(ctx, frame, &got_frame_ptr, packet);

    if(!got_frame_ptr)
        return -ERROR;

    int original_data_size = av_samples_get_buffer_size(NULL, ctx->channels,
        frame->nb_samples, ctx->sample_fmt, 1);

    uint8_t *audio_buf;
    int data_size;

    if (swr_context != NULL) {
        uint8_t *out[] = { audio_buffer };

        int sample_per_buffer_divider = 2* av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);;


        int len2 = swr_convert(swr_context, out,
            sizeof(audio_buffer) / sample_per_buffer_divider,
            frame->extended_data, frame->nb_samples);



        if (len2 < 0) {
            return -ERROR;
        }
        if (len2 == sizeof(audio_buffer) / sample_per_buffer_divider) {
            swr_init(swr_context);
        }
        audio_buf = audio_buffer;
        data_size = len2 * sample_per_buffer_divider;
        } 
     else {
        audio_buf = frame->data[0];
        data_size = original_data_size;
     }

     (*opengSLESData->bqPlayerBufferQueue)->Enqueue(opengSLESData->bqPlayerBufferQueue, audio_buf, data_size)


}

I would appreciate any help, thanks.

  • 1
    Did you get this working, I got the resampling to work but I'm still hearing some popping and cracking noises when using OpenSL to play the samples. – William Seemann Nov 24 '14 at 21:25
  • Did you get this working? I'm still hearing some popping and cracking noises :) @WilliamSeemann – fandyushin Mar 24 '16 at 12:47

1 Answers1

0

example may help

#include "stdafx.h"
#include <iostream>

extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
//#include "swscale.h"
#include "libswresample/swresample.h"
};

FILE           *fin,    *fout;

int ffmpeg_audio_decode( const char * inFile, const char * outFile)
{
// Initialize FFmpeg
av_register_all();

AVFrame* frame = avcodec_alloc_frame();
if (!frame)
{
    std::cout << "Error allocating the frame" << std::endl;
    return 1;
}

// you can change the file name "01 Push Me to the Floor.wav" to whatever the file is you're reading, like "myFile.ogg" or
// "someFile.webm" and this should still work
AVFormatContext* formatContext = NULL;
//if (avformat_open_input(&formatContext, "01 Push Me to the Floor.wav", NULL, NULL) != 0)
if (avformat_open_input(&formatContext, inFile, NULL, NULL) != 0)
{
    av_free(frame);
    std::cout << "Error opening the file" << std::endl;
    return 1;
}

if (avformat_find_stream_info(formatContext, NULL) < 0)
{
    av_free(frame);
    av_close_input_file(formatContext);
    std::cout << "Error finding the stream info" << std::endl;
    return 1;
}

AVStream* audioStream = NULL;
// Find the audio stream (some container files can have multiple streams in them)
for (unsigned int i = 0; i < formatContext->nb_streams; ++i)
{
    if (formatContext->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
    {
        audioStream = formatContext->streams[i];
        break;
    }
}

if (audioStream == NULL)
{
    av_free(frame);
    av_close_input_file(formatContext);
    std::cout << "Could not find any audio stream in the file" << std::endl;
    return 1;
}

AVCodecContext* codecContext = audioStream->codec;

codecContext->codec = avcodec_find_decoder(codecContext->codec_id);
if (codecContext->codec == NULL)
{
    av_free(frame);
    av_close_input_file(formatContext);
    std::cout << "Couldn't find a proper decoder" << std::endl;
    return 1;
}
else if (avcodec_open2(codecContext, codecContext->codec, NULL) != 0)
{
    av_free(frame);
    av_close_input_file(formatContext);
    std::cout << "Couldn't open the context with the decoder" << std::endl;
    return 1;
}

std::cout << "This stream has " << codecContext->channels << " channels and a sample rate of " << codecContext->sample_rate << "Hz" << std::endl;
std::cout << "The data is in the format " << av_get_sample_fmt_name(codecContext->sample_fmt) << std::endl;

//codecContext->sample_fmt = AV_SAMPLE_FMT_S16;

int64_t outChannelLayout = AV_CH_LAYOUT_MONO; //AV_CH_LAYOUT_STEREO;
AVSampleFormat outSampleFormat = AV_SAMPLE_FMT_S16; // Packed audio, non-planar (this is the most common format, and probably what you want; also, WAV needs it)
int outSampleRate = 8000;//44100;
// Note that AVCodecContext::channel_layout may or may not be set by libavcodec. Because of this,
// we won't use it, and will instead try to guess the layout from the number of channels.
SwrContext* swrContext = swr_alloc_set_opts(NULL,
    outChannelLayout,
    outSampleFormat,
    outSampleRate,
    av_get_default_channel_layout(codecContext->channels),
    codecContext->sample_fmt,
    codecContext->sample_rate,
    0,
    NULL);

if (swrContext == NULL)
{
    av_free(frame);
    avcodec_close(codecContext);
    avformat_close_input(&formatContext);
    std::cout << "Couldn't create the SwrContext" << std::endl;
    return 1;
}

if (swr_init(swrContext) != 0)
{
    av_free(frame);
    avcodec_close(codecContext);
    avformat_close_input(&formatContext);
    swr_free(&swrContext);
    std::cout << "Couldn't initialize the SwrContext" << std::endl;
    return 1;
}

fout = fopen(outFile, "wb+");

AVPacket packet;
av_init_packet(&packet);

// Read the packets in a loop
while (av_read_frame(formatContext, &packet) == 0)
{
    if (packet.stream_index == audioStream->index)
    {
        AVPacket decodingPacket = packet;

        while (decodingPacket.size > 0)
        {
            // Try to decode the packet into a frame
            int frameFinished = 0;
            int result = avcodec_decode_audio4(
                codecContext, 
                frame, 
                &frameFinished, 
                &decodingPacket);

            if (result < 0 || frameFinished == 0)
            {
                break;
            }

            unsigned char buffer[100000] = {NULL};
            unsigned char* pointers[SWR_CH_MAX] = {NULL};
            pointers[0] = &buffer[0];

            int numSamplesOut = swr_convert(
                swrContext,
                pointers,
                outSampleRate,
                (const unsigned char**)frame->extended_data,
                frame->nb_samples);


            fwrite(  
                (short *)buffer, 
                sizeof(short), 
                (size_t)numSamplesOut, 
                fout);

            decodingPacket.size -= result;
            decodingPacket.data += result;
        }

    }

    // You *must* call av_free_packet() after each call to av_read_frame() or else you'll leak memory
    av_free_packet(&packet);
}

// Some codecs will cause frames to be buffered up in the decoding process. If the CODEC_CAP_DELAY flag
// is set, there can be buffered up frames that need to be flushed, so we'll do that
if (codecContext->codec->capabilities & CODEC_CAP_DELAY)
{
    av_init_packet(&packet);
    // Decode all the remaining frames in the buffer, until the end is reached
    int frameFinished = 0;
    while (avcodec_decode_audio4(codecContext, frame, &frameFinished, &packet) >= 0 && frameFinished)
    {
    }
}

// Clean up!
av_free(frame);
avcodec_close(codecContext);
av_close_input_file(formatContext);
fclose(fout);
}
olegog
  • 121
  • 1
  • 3