I am building a C# .NET Core application that gathers user interaction data and stores it alongside a video feed. I intend to utilize timed metadata in an MPEG container to store the interaction data and h264 encoded video data.
According to the official documentation it appears to be possible to encode a metadata stream during a transcode operation with the MediaTranscoder
.
The code I am using has been modified from a screen recorder example. It works by creating a custom MediaEncodingProfile
set up to support metadata and a MediaStreamSource
containing both a VideoStreamDescriptor
and a TimedMetadataStreamDescriptor
before using those to perform the encoding with the MediaTranscoder
.
public class Encoder
{
private MediaEncodingProfile GetEncodingProfile()
{
var profile = new MediaEncodingProfile();
var containerEncoding = new ContainerEncodingProperties
{
Subtype = MediaEncodingSubtypes.Mpeg4
};
var videoEncoding = new VideoEncodingProperties
{
Subtype = MediaEncodingSubtypes.H264,
Width = configuration.Width,
Height = configuration.Height,
Bitrate = configuration.BitsPerSecond,
FrameRate = { Denominator = 1, Numerator = configuration.FramesPerSecond },
PixelAspectRatio = { Denominator = 1, Numerator = 1 }
};
profile.Container = containerEncoding;
profile.Video = videoEncoding;
return profile;
}
private Tuple<VideoStreamDescriptor, TimedMetadataStreamDescriptor> GetStreamDescriptors()
{
var videoEncoding = VideoEncodingProperties.CreateUncompressed(MediaEncodingSubtypes.Bgra8,
configuration.InputWidth, configuration.InputHeight);
var videoStreamDescriptor = new VideoStreamDescriptor(videoEncoding);
var metadataEncoding = new TimedMetadataEncodingProperties
{
Subtype = "{36002D6F-4D0D-4FD7-8538-5680DA4ED58D}"
};
byte[] streamFormatData = GetMetadataStreamFormatData(); // This just sets some arbitrary bytes
metadataEncoding.SetFormatUserData(streamFormatData);
var metadataStreamDescriptor = new TimedMetadataStreamDescriptor(metadataEncoding);
return new Tuple<VideoStreamDescriptor, TimedMetadataStreamDescriptor>(
videoStreamDescriptor, metadataStreamDescriptor);
}
private MediaStreamSource GetMediaStreamSource(IMediaStreamDescriptor videoStreamDescriptor,
IMediaStreamDescriptor metadataStreamDescriptor)
{
var mediaStreamSource = new MediaStreamSource(videoStreamDescriptor, metadataStreamDescriptor)
{
BufferTime = TimeSpan.FromSeconds(0)
};
mediaStreamSource.Starting += OnStart;
mediaStreamSource.SampleRequested += OnSampleRequested;
return mediaStreamSource;
}
private void OnStart(MediaStreamSource sender, MediaStreamSourceStartingEventArgs args)
{
// Intentionally omitted
}
private void OnSampleRequested(MediaStreamSource sender, MediaStreamSourceSampleRequestedEventArgs args)
{
// This only gets called for the video stream, not the metadata stream
}
private MediaTranscoder GetTranscoder()
{
var transcoder = new MediaTranscoder { HardwareAccelerationEnabled = true };
return transcoder;
}
public async Task TranscodeAsync()
{
var transcoder = GetTranscoder();
var (videoStreamDescriptor, metadataStreamDescriptor) = GetStreamDescriptors();
var mediaStreamSource = GetMediaStreamSource(videoStreamDescriptor, metadataStreamDescriptor);
var encodingProfile = GetEncodingProfile();
await using var destinationFile = File.Open(configuration.FilePath, FileMode.Create);
var prepareTranscodeResult = await transcoder.PrepareMediaStreamSourceTranscodeAsync(
mediaStreamSource, destinationFile.AsRandomAccessStream(), encodingProfile);
await prepareTranscodeResult.TranscodeAsync().AsTask();
}
}
The problem I am facing is that the SampleRequested
event does not get raised for the timed metadata stream, only for the video stream. During testing I replaced the timed metadata stream with an audio stream and the SampleRequested
event correctly got raised for both the video stream and the audio stream. I suspect there might be a different way of adding data to a timed metadata stream, perhaps using TimedMetadataTrack
and DataCue
, but my efforts have thus far proven unsuccessful.
What is the correct way of adding timed metadata available only on a per-sample basis during encoding to a stream when using MediaTranscoder
(and potentially MediaStreamSource
)?