7

I'm working on video encoding that will be used in a Unity plugin. I have made image encoding work, but now I'm at the audio. So trying only with the audio in to a mp4 file with AAC encoding. And I'm stuck. The resulting file does not contain anything. Also, from what I understand, AAC in ffmpeg only supports AV_SAMPLE_FMT_FLTP, that's why I use it. Here's my code:

Setup:

int initialize_encoding_audio(const char *filename)
{
    int ret;
    AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
    AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;

    avcodec_register_all();
    av_register_all();

    aud_codec = avcodec_find_encoder(aud_codec_id);
    avcodec_register(aud_codec);

    if (!aud_codec)
        return COULD_NOT_FIND_AUD_CODEC;

    aud_codec_context = avcodec_alloc_context3(aud_codec);
    if (!aud_codec_context)
        return CONTEXT_CREATION_ERROR;

    aud_codec_context->bit_rate = 192000;
    aud_codec_context->sample_rate = select_sample_rate(aud_codec);
    aud_codec_context->sample_fmt = sample_fmt;
    aud_codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
    aud_codec_context->channels = av_get_channel_layout_nb_channels(aud_codec_context->channel_layout);

    aud_codec_context->codec = aud_codec;
    aud_codec_context->codec_id = aud_codec_id;

    ret = avcodec_open2(aud_codec_context, aud_codec, NULL);

    if (ret < 0)
        return COULD_NOT_OPEN_AUD_CODEC;

    outctx = avformat_alloc_context();
    ret = avformat_alloc_output_context2(&outctx, NULL, "mp4", filename);

    outctx->audio_codec = aud_codec;
    outctx->audio_codec_id = aud_codec_id;

    audio_st = avformat_new_stream(outctx, aud_codec);

    audio_st->codecpar->bit_rate = aud_codec_context->bit_rate;
    audio_st->codecpar->sample_rate = aud_codec_context->sample_rate;
    audio_st->codecpar->channels = aud_codec_context->channels;
    audio_st->codecpar->channel_layout = aud_codec_context->channel_layout;
    audio_st->codecpar->codec_id = aud_codec_id;
    audio_st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
    audio_st->codecpar->format = sample_fmt;
    audio_st->codecpar->frame_size = aud_codec_context->frame_size;
    audio_st->codecpar->block_align = aud_codec_context->block_align;
    audio_st->codecpar->initial_padding = aud_codec_context->initial_padding;

    outctx->streams = new AVStream*[1];
    outctx->streams[0] = audio_st;

    av_dump_format(outctx, 0, filename, 1);

    if (!(outctx->oformat->flags & AVFMT_NOFILE))
    {
        if (avio_open(&outctx->pb, filename, AVIO_FLAG_WRITE) < 0)
            return COULD_NOT_OPEN_FILE;
    }

    ret = avformat_write_header(outctx, NULL);

    aud_frame = av_frame_alloc();
    aud_frame->nb_samples = aud_codec_context->frame_size;
    aud_frame->format = aud_codec_context->sample_fmt;
    aud_frame->channel_layout = aud_codec_context->channel_layout;

    int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
        aud_codec_context->sample_fmt, 0);

    av_frame_get_buffer(aud_frame, buffer_size / aud_codec_context->channels);

    if (!aud_frame)
        return COULD_NOT_ALLOCATE_FRAME;

    aud_frame_counter = 0;

    return 0;
}

Encoding:

int encode_audio_samples(uint8_t **aud_samples)
{
    int ret;

    int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
        aud_codec_context->sample_fmt, 0);

    for (size_t i = 0; i < buffer_size / aud_codec_context->channels; i++)
    {
        aud_frame->data[0][i] = aud_samples[0][i];
        aud_frame->data[1][i] = aud_samples[1][i];
    }

    aud_frame->pts = aud_frame_counter++;

    ret = avcodec_send_frame(aud_codec_context, aud_frame);
    if (ret < 0)
        return ERROR_ENCODING_SAMPLES_SEND;

    AVPacket pkt;
    av_init_packet(&pkt);
    pkt.data = NULL;
    pkt.size = 0;

    fflush(stdout);

    while (true)
    {
        ret = avcodec_receive_packet(aud_codec_context, &pkt);
        if (!ret)
        {
            av_packet_rescale_ts(&pkt, aud_codec_context->time_base, audio_st->time_base);

            pkt.stream_index = audio_st->index;
            av_write_frame(outctx, &pkt);
            av_packet_unref(&pkt);
        }
        if (ret == AVERROR(EAGAIN))
            break;
        else if (ret < 0)
            return ERROR_ENCODING_SAMPLES_RECEIVE;
        else
            break;
    }

    return 0;
}

Finish encoding:

int finish_audio_encoding()
{
    AVPacket pkt;
    av_init_packet(&pkt);
    pkt.data = NULL;
    pkt.size = 0;

    fflush(stdout);

    int ret = avcodec_send_frame(aud_codec_context, NULL);
    if (ret < 0)
        return ERROR_ENCODING_FRAME_SEND;

    while (true)
    {
        ret = avcodec_receive_packet(aud_codec_context, &pkt);
        if (!ret)
        {
            if (pkt.pts != AV_NOPTS_VALUE)
                pkt.pts = av_rescale_q(pkt.pts, aud_codec_context->time_base, audio_st->time_base);
            if (pkt.dts != AV_NOPTS_VALUE)
                pkt.dts = av_rescale_q(pkt.dts, aud_codec_context->time_base, audio_st->time_base);

            av_write_frame(outctx, &pkt);
            av_packet_unref(&pkt);
        }
        if (ret == -AVERROR(AVERROR_EOF))
            break;
        else if (ret < 0)
            return ERROR_ENCODING_FRAME_RECEIVE;
    }

    av_write_trailer(outctx);
}

Main:

void get_audio_frame(float_t *left_samples, float_t *right_samples, int frame_size, float* t, float* tincr, float* tincr2)
{
    int j, i;
    float v;
    for (j = 0; j < frame_size; j++)
    {
        v = sin(*t);
        *left_samples = v;
        *right_samples = v;

        left_samples++;
        right_samples++;

        *t += *tincr;
        *tincr += *tincr2;
    }
}

int main()
{
    int frame_rate = 30;  // this should be like 96000 / 1024 or somthing i guess?
    float t, tincr, tincr2;

    initialize_encoding_audio("audio.mp4");

    int sec = 50;

    float_t** aud_samples;
    int src_samples_linesize;
    int src_nb_samples = 1024;
    int src_channels = 2;

    int ret = av_samples_alloc_array_and_samples((uint8_t***)&aud_samples, &src_samples_linesize, src_channels,
        src_nb_samples, AV_SAMPLE_FMT_FLTP, 0);


    t = 0;
    tincr = 0;
    tincr2 = 0;

    for (size_t i = 0; i < frame_rate * sec; i++)
    {
        get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);

        encode_audio_samples((uint8_t **)aud_samples);

    }

    finish_audio_encoding();
    //cleanup();

    return 0;
}

I guess the first thing that I would want to make sure I got right is the synthetic sound generation and how I transfer that to the AVFrame. Are my conversions correct? But feel free to point out anything that might be wrong.

Thanks in advance!

Edit: the whole source: http://pastebin.com/jYtmkhek

Edit2: Added initialization of tincr & tincr2

Ivan Kolesnikov
  • 1,787
  • 1
  • 29
  • 45
Mockarutan
  • 442
  • 4
  • 28
  • 1
    Is there a specific problem? You seem to be asking "is my code correct", and a common response to it is "well, does it work?" – Ronald S. Bultje Nov 22 '16 at 20:48
  • Well, as I said "The resulting file does not contain anything.". So no, there is no sound in the file. – Mockarutan Nov 22 '16 at 21:06
  • @Mockarutan `v = sin(*t)` are you sure this is in the audible range? Because it *doesn't sound* like a yes to me :) Link the complete source file please. – aergistal Nov 24 '16 at 11:01
  • In the docs it says " * - The floating-point formats are based on full volume being in the range [-1.0, 1.0]. Any values outside this range are beyond full volume level." So it should be the right? And this is this full source code. But I can link a Pastebin also, gonna do that as soon as I have it available. – Mockarutan Nov 24 '16 at 12:34
  • Yes, it would be easier to compile/debug. – aergistal Nov 24 '16 at 16:34
  • There: http://pastebin.com/jYtmkhek – Mockarutan Nov 24 '16 at 16:38
  • From your full code, I am not able to see any code for opening the input stream. Could you pls clarify and if possible share the full code? I see only the encoding resulting in an output file audio.mp4. – Vandana Chadha Feb 11 '20 at 16:55
  • The sound wave is generated in code, get_audio_frame(), so no input stream needed. – Mockarutan Feb 28 '20 at 09:27

1 Answers1

2

Unless I'm missing something from the pastebin, you forgot to initialize a few variables. You're using garbage to generate your samples.

float t, tincr, tincr2;
[...]
get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);

You probably want to start with t=0 and increment by 2 * PI * frequency / sample rate for a sine wave.

Also, avformat_new_stream() creates the stream for you, don't do it with new.

Update:

I removed all the c++ stuff to test this. Here's the code that works: pastebin

And here's the resulting file: audio.mp4

ffmpeg -i audio.mp4 -filter_complex "showwaves=s=640x120:mode=line:colors=white" -frames:v 1 wave.jpg

enter image description here

Diff:

1,6d0
< #include "encoder.h"
< #include <algorithm>
< #include <iterator>
< 
< extern "C"
< {
14a9
> #include <math.h>
40,41c35,36
<   SwsContext *sws_ctx;
<   SwrContext *swr_ctx = NULL;
---
> struct SwsContext *sws_ctx;
> struct SwrContext *swr_ctx = NULL;
76,77c71,72
<       AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
<       AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
---
>   enum AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
>   enum AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
125,126c120,121
<       outctx->streams = new AVStream*[1];
<       outctx->streams[0] = audio_st;
---
>   //outctx->streams = new AVStream*[1];
>   //outctx->streams[0] = audio_st;
182c177
<       while (true)
---
>   while (1)
216c211
<       while (true)
---
>   while (1)
291c286
<       float t, tincr, tincr2;
---
>   float t = 0, tincr = 2 * M_PI * 440.0 / 96000, tincr2 = 0;
317d311
<   }
aergistal
  • 29,947
  • 5
  • 70
  • 92
  • You are correct, missed that. However, the file is still mostly empty. It seems like the sound actually became longer, like 100ms to 500ms. File desc still says 00:00 length. So good catch, but did not solve it. (I've edited the post with the new code) – Mockarutan Nov 25 '16 at 07:20
  • @Mockarutan added the code, diff and sample audio. It works on most players, but the FAAD decoder in VLC doesn't like the bitstream. In any case there is audio in the file now and you should be able to work out the rest of your code issues on your own. – aergistal Nov 25 '16 at 09:51
  • I hear nothing in that audio file and the file description says 00:00 as duration. Do you hear any sound when you play it? – Mockarutan Nov 25 '16 at 22:39
  • Yes, try with `ffplay`. The browser might not support 96KHz. – aergistal Nov 26 '16 at 09:18
  • Added a wave representation of the generated audio stream. – aergistal Nov 26 '16 at 16:43
  • Well, how about that! And here I was going mad about my code, when in reality I probably had some sort of valid sound produced from the code for some time now, Never knew that VLC (which i thought could play anything) could be picky about the sample rates and stuff. I tried switching to 48khz (for some reason only tried 44,1k and 96k) and now it even plays in VLC! Thanks! – Mockarutan Nov 27 '16 at 11:48
  • audio.aac created with this code not playing. Passed format_name = NULL in avformat_alloc_output_context2(&outctx, NULL, NULL, "audio.aac"); – Arunraj Shanmugam Jul 06 '17 at 08:45