I'm trying to encode the input images from MacBook Pro's built-in FaceTime HD Camera into an H.264 video stream in real time using the libx264 on Mac OS X 10.9.5.
Below are the steps I took:
- Get 1280x720 32BGRA images from camera at 15fps using AVFoundation API (AVCaptureDevice class, etc.)
- Convert the images into 320x180 YUV420P format using libswscale.
- Encode the images into an H.264 video stream (baseline profile) using libx264.
I apply the above steps each time the image is obtained from the camera, believing that the encoder keeps track of the encoding state and generates a NAL unit when it's available.
As I wanted to get the encoded frames while providing the input images to the encoder, I decided to flush the encoder (calling x264_encoder_delayed_frames()) every 30 frames (2 seconds).
However, when I restart the encoding, the encoder stops after a while (x264_encoder_encode() never returns.) I tried changing the number of frames before flushing, but the situation didn't change.
Below are the related code (I omitted the image capture code because it looks no problem.)
Can you point out anything I might be doing wrong?
x264_t *encoder;
x264_param_t param;
// Will be called only first time.
int initEncoder() {
int ret;
if ((ret = x264_param_default_preset(¶m, "medium", NULL)) < 0) {
return ret;
}
param.i_csp = X264_CSP_I420;
param.i_width = 320;
param.i_height = 180;
param.b_vfr_input = 0;
param.b_repeat_headers = 1;
param.b_annexb = 1;
if ((ret = x264_param_apply_profile(¶m, "baseline")) < 0) {
return ret;
}
encoder = x264_encoder_open(¶m);
if (!encoder) {
return AVERROR_UNKNOWN;
}
return 0;
}
// Will be called from encodeFrame() defined below.
int convertImage(const enum AVPixelFormat srcFmt, const int srcW, const int srcH, const uint8_t *srcData, const enum AVPixelFormat dstFmt, const int dstW, const int dstH, x264_image_t *dstData) {
struct SwsContext *sws_ctx;
int ret;
int src_linesize[4];
uint8_t *src_data[4];
sws_ctx = sws_getContext(srcW, srcH, srcFmt,
dstW, dstH, dstFmt,
SWS_BILINEAR, NULL, NULL, NULL);
if (!sws_ctx) {
return AVERROR_UNKNOWN;
}
if ((ret = av_image_fill_linesizes(src_linesize, srcFmt, srcW)) < 0) {
sws_freeContext(sws_ctx);
return ret;
}
if ((ret = av_image_fill_pointers(src_data, srcFmt, srcH, (uint8_t *) srcData, src_linesize)) < 0) {
sws_freeContext(sws_ctx);
return ret;
}
sws_scale(sws_ctx, (const uint8_t * const*)src_data, src_linesize, 0, srcH, dstData->plane, dstData->i_stride);
sws_freeContext(sws_ctx);
return 0;
}
// Will be called for each frame.
int encodeFrame(const uint8_t *data, const int width, const int height) {
int ret;
x264_picture_t pic;
x264_picture_t pic_out;
x264_nal_t *nal;
int i_nal;
if ((ret = x264_picture_alloc(&pic, param.i_csp, param.i_width, param.i_height)) < 0) {
return ret;
}
if ((ret = convertImage(AV_PIX_FMT_RGB32, width, height, data, AV_PIX_FMT_YUV420P, 320, 180, &pic.img)) < 0) {
x264_picture_clean(&pic);
return ret;
}
if ((ret = x264_encoder_encode(encoder, &nal, &i_nal, &pic, &pic_out)) < 0) {
x264_picture_clean(&pic);
return ret;
}
if(ret) {
for (int i = 0; i < i_nal; i++) {
printNAL(nal + i);
}
}
x264_picture_clean(&pic);
return 0;
}
// Will be called every 30 frames.
int flushEncoder() {
int ret;
x264_nal_t *nal;
int i_nal;
x264_picture_t pic_out;
/* Flush delayed frames */
while (x264_encoder_delayed_frames(encoder)) {
if ((ret = x264_encoder_encode(encoder, &nal, &i_nal, NULL, &pic_out)) < 0) {
return ret;
}
if (ret) {
for (int j = 0; j < i_nal; j++) {
printNAL(nal + j);
}
}
}
}