Add initial audio buffer support in video player

This commit is contained in:
Wojtek Figat
2024-05-03 14:32:23 +02:00
parent 754ed56119
commit 388a0f4196
3 changed files with 230 additions and 150 deletions

View File

@@ -37,7 +37,7 @@ namespace
bool result = true;
// Find the native format of the stream
HRESULT hr = playerMF.SourceReader->GetNativeMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, MF_SOURCE_READER_CURRENT_TYPE_INDEX, &nativeType);
HRESULT hr = playerMF.SourceReader->GetNativeMediaType(streamIndex, MF_SOURCE_READER_CURRENT_TYPE_INDEX, &nativeType);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(GetNativeMediaType, hr);
@@ -81,7 +81,7 @@ namespace
player.Width = videoArea.Area.cx;
player.Height = videoArea.Area.cy;
}
player.AvgBitRate = MFGetAttributeUINT32(mediaType, MF_MT_AVG_BITRATE, 0);
player.AvgVideoBitRate = MFGetAttributeUINT32(mediaType, MF_MT_AVG_BITRATE, 0);
uint64_t fpsValue;
hr = mediaType->GetUINT64(MF_MT_FRAME_RATE, &fpsValue);
if (SUCCEEDED(hr))
@@ -132,6 +132,26 @@ namespace
player.AudioInfo.SampleRate = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_SAMPLES_PER_SECOND, 0);
player.AudioInfo.NumChannels = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_NUM_CHANNELS, 0);
player.AudioInfo.BitDepth = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_BITS_PER_SAMPLE, 16);
if (subtype != MFAudioFormat_PCM)
{
// Reconfigure decoder to output audio data in PCM format
IMFMediaType* customType = nullptr;
hr = MFCreateMediaType(&customType);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(MFCreateMediaType, hr);
goto END;
}
customType->SetGUID(MF_MT_MAJOR_TYPE, majorType);
customType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM);
hr = playerMF.SourceReader->SetCurrentMediaType(streamIndex, nullptr, customType);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(SetCurrentMediaType, hr);
goto END;
}
customType->Release();
}
}
result = false;
@@ -139,6 +159,156 @@ namespace
SAFE_RELEASE(mediaType);
return result;
}
bool ReadStream(VideoBackendPlayer& player, VideoPlayerMF& playerMF, DWORD streamIndex, TimeSpan dt)
{
const bool isVideo = streamIndex == MF_SOURCE_READER_FIRST_VIDEO_STREAM;
const bool isAudio = streamIndex == MF_SOURCE_READER_FIRST_AUDIO_STREAM;
const TimeSpan lastFrameTime = isVideo ? player.VideoFrameTime : player.AudioBufferTime;
const TimeSpan lastFrameDuration = isVideo ? player.VideoFrameDuration : player.AudioBufferDuration;
// Check if the current frame is valid (eg. when playing 24fps video at 60fps)
if (lastFrameDuration.Ticks > 0 &&
Math::IsInRange(playerMF.Time, lastFrameTime, lastFrameTime + lastFrameDuration))
{
return false;
}
// Read samples until frame is matching the current time
int32 samplesLeft = 500;
HRESULT hr;
for (; samplesLeft > 0; samplesLeft--)
{
// Read sample
DWORD flags = 0;
LONGLONG samplePos = 0, sampleDuration = 0;
IMFSample* sample = nullptr;
{
PROFILE_CPU_NAMED("ReadSample");
hr = playerMF.SourceReader->ReadSample(streamIndex, 0, &streamIndex, &flags, &samplePos, &sample);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(ReadSample, hr);
break;
}
}
TimeSpan frameTime((int64)samplePos);
TimeSpan franeDuration = player.FrameRate > 0 ? TimeSpan::FromSeconds(1.0 / player.FrameRate) : dt;
if (sample && sample->GetSampleDuration(&sampleDuration) == S_OK && sampleDuration > 0)
{
franeDuration.Ticks = sampleDuration;
}
//const int32 framesToTime = (playerMF.Time.Ticks - frameTime.Ticks) / franeDuration.Ticks;
const bool isGoodSample = Math::IsInRange(playerMF.Time, frameTime, frameTime + franeDuration);
// Process sample
if (sample && isGoodSample)
{
PROFILE_CPU_NAMED("ProcessSample");
// Lock sample buffer memory (try to use 2D buffer for more direct memory access)
IMFMediaBuffer* buffer = nullptr;
IMF2DBuffer* buffer2D = nullptr;
BYTE* bufferData = nullptr;
LONG bufferStride = 0;
if (isVideo && sample->GetBufferByIndex(0, &buffer) == S_OK && buffer->QueryInterface(IID_PPV_ARGS(&buffer2D)) == S_OK)
{
LONG bufferPitch = 0;
hr = buffer2D->Lock2D(&bufferData, &bufferPitch);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(GetCurrentLength, hr);
goto PROCESS_SAMPLE_END;
}
if (bufferPitch < 0)
bufferPitch = -bufferPitch; // Flipped image
bufferStride = bufferPitch * player.VideoFrameHeight;
}
else
{
if (buffer)
{
buffer->Release();
buffer = nullptr;
}
DWORD bufferLength;
hr = sample->ConvertToContiguousBuffer(&buffer);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(ConvertToContiguousBuffer, hr);
goto PROCESS_SAMPLE_END;
}
hr = buffer->GetCurrentLength(&bufferLength);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(GetCurrentLength, hr);
goto PROCESS_SAMPLE_END;
}
DWORD bufferMaxLen = 0, bufferCurrentLength = 0;
hr = buffer->Lock(&bufferData, &bufferMaxLen, &bufferCurrentLength);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(Lock, hr);
goto PROCESS_SAMPLE_END;
}
bufferStride = bufferCurrentLength;
}
Span<byte> bufferSpan(bufferData, bufferStride);
if (isVideo)
{
// Send pixels to the texture
player.UpdateVideoFrame(bufferSpan, frameTime, franeDuration);
}
else if (isAudio)
{
// Send PCM data
player.UpdateAudioBuffer(bufferSpan, frameTime, franeDuration);
}
// Unlock sample buffer memory
if (buffer2D)
{
hr = buffer2D->Unlock2D();
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(Unlock2D, hr);
}
}
else
{
hr = buffer->Unlock();
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(Unlock, hr);
}
}
PROCESS_SAMPLE_END:
buffer->Release();
}
if (sample)
sample->Release();
if (flags & MF_SOURCE_READERF_ENDOFSTREAM)
{
// Media ended
break;
}
if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED || flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED)
{
// Format/metadata might have changed so update the stream
Configure(player, playerMF, streamIndex);
}
// End loop if got good sample or need to seek back
if (isGoodSample)
break;
}
// True if run out of samples and failed to get frame for the current time
return samplesLeft == 0;
}
}
bool VideoBackendMF::Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player)
@@ -278,7 +448,6 @@ void VideoBackendMF::Base_Update()
{
PROFILE_CPU();
// TODO: use async Task Graph to update videos
HRESULT hr;
for (auto* e : Players)
{
auto& player = *e;
@@ -341,140 +510,15 @@ void VideoBackendMF::Base_Update()
// After seeking, the application should call ReadSample and advance to the desired position.
}
// Check if the current frame is valid (eg. when playing 24fps video at 60fps)
if (player.VideoFrameDuration.Ticks > 0 &&
Math::IsInRange(playerMF.Time, player.VideoFrameTime, player.VideoFrameTime + player.VideoFrameDuration))
{
continue;
}
// Read samples until frame is matching the current time
int32 samplesLeft = 500;
for (; samplesLeft > 0; samplesLeft--)
{
// Read sample
DWORD streamIndex = 0, flags = 0;
LONGLONG samplePos = 0, sampleDuration = 0;
IMFSample* videoSample = nullptr;
{
PROFILE_CPU_NAMED("ReadSample");
hr = playerMF.SourceReader->ReadSample(MF_SOURCE_READER_FIRST_VIDEO_STREAM, 0, &streamIndex, &flags, &samplePos, &videoSample);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(ReadSample, hr);
break;
}
}
TimeSpan frameTime((int64)samplePos);
TimeSpan franeDuration = player.FrameRate > 0 ? TimeSpan::FromSeconds(1.0 / player.FrameRate) : dt;
if (videoSample && videoSample->GetSampleDuration(&sampleDuration) == S_OK && sampleDuration > 0)
{
franeDuration.Ticks = sampleDuration;
}
//const int32 framesToTime = (playerMF.Time.Ticks - frameTime.Ticks) / franeDuration.Ticks;
const bool isGoodSample = Math::IsInRange(playerMF.Time, frameTime, frameTime + franeDuration);
// Process sample
if (videoSample && isGoodSample)
{
PROFILE_CPU_NAMED("ProcessSample");
// Lock sample buffer memory (try to use 2D buffer for more direct memory access)
IMFMediaBuffer* buffer = nullptr;
IMF2DBuffer* buffer2D = nullptr;
BYTE* bufferData = nullptr;
LONG bufferStride = 0;
if (videoSample->GetBufferByIndex(0, &buffer) == S_OK && buffer->QueryInterface(IID_PPV_ARGS(&buffer2D)) == S_OK)
{
LONG bufferPitch = 0;
hr = buffer2D->Lock2D(&bufferData, &bufferPitch);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(GetCurrentLength, hr);
goto PROCESS_SAMPLE_END;
}
if (bufferPitch < 0)
bufferPitch = -bufferPitch; // Flipped image
bufferStride = bufferPitch * player.VideoFrameHeight;
}
else
{
if (buffer)
{
buffer->Release();
buffer = nullptr;
}
DWORD bufferLength;
hr = videoSample->ConvertToContiguousBuffer(&buffer);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(ConvertToContiguousBuffer, hr);
goto PROCESS_SAMPLE_END;
}
hr = buffer->GetCurrentLength(&bufferLength);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(GetCurrentLength, hr);
goto PROCESS_SAMPLE_END;
}
DWORD bufferMaxLen = 0, bufferCurrentLength = 0;
hr = buffer->Lock(&bufferData, &bufferMaxLen, &bufferCurrentLength);
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(Lock, hr);
goto PROCESS_SAMPLE_END;
}
bufferStride = bufferCurrentLength;
}
// Send pixels to the texture
player.UpdateVideoFrame(Span<byte>(bufferData, bufferStride), frameTime, franeDuration);
// Unlock sample buffer memory
if (buffer2D)
{
hr = buffer2D->Unlock2D();
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(Unlock2D, hr);
}
}
else
{
hr = buffer->Unlock();
if (FAILED(hr))
{
VIDEO_API_MF_ERROR(Unlock, hr);
}
}
PROCESS_SAMPLE_END:
buffer->Release();
}
if (videoSample)
videoSample->Release();
if (flags & MF_SOURCE_READERF_ENDOFSTREAM)
{
// Media ended
break;
}
if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED || flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED)
{
// Format/metadata might have changed so update the stream
Configure(player, playerMF, streamIndex);
}
// End loop if got good sample or need to seek back
if (isGoodSample)
break;
}
if (samplesLeft == 0 && seeks < 2)
// Update streams
if (ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_VIDEO_STREAM, dt))
{
// Failed to pick a valid sample so try again with seeking
playerMF.Seek = 1;
goto SEEK_START;
}
if (player.AudioInfo.BitDepth != 0)
ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM, dt);
}
}

View File

@@ -6,6 +6,7 @@
#include "Engine/Core/Types/TimeSpan.h"
#include "Engine/Core/Types/DataContainer.h"
#include "Engine/Audio/Types.h"
#include "Engine/Audio/Config.h"
#include "Engine/Graphics/PixelFormat.h"
class Video;
@@ -22,19 +23,29 @@ class GPUPipelineState;
/// </summary>
struct VideoBackendPlayer
{
VideoBackend* Backend = nullptr;
GPUTexture* Frame = nullptr;
GPUBuffer* FrameUpload = nullptr;
int32 Width = 0, Height = 0, AvgBitRate = 0, FramesCount = 0;
int32 VideoFrameWidth = 0, VideoFrameHeight = 0;
PixelFormat Format = PixelFormat::Unknown;
float FrameRate = 0.0f;
TimeSpan Duration = TimeSpan(0);
TimeSpan VideoFrameTime = TimeSpan(0), VideoFrameDuration = TimeSpan(0);
AudioDataInfo AudioInfo = {};
VideoBackend* Backend;
GPUTexture* Frame;
GPUBuffer* FrameUpload;
int32 Width, Height, AvgVideoBitRate, FramesCount;
int32 VideoFrameWidth, VideoFrameHeight;
PixelFormat Format;
float FrameRate;
TimeSpan Duration;
TimeSpan VideoFrameTime, VideoFrameDuration;
TimeSpan AudioBufferTime, AudioBufferDuration;
AudioDataInfo AudioInfo;
BytesContainer VideoFrameMemory;
class GPUUploadVideoFrameTask* UploadVideoFrameTask = nullptr;
uintptr BackendState[8] = {};
AUDIO_BUFFER_ID_TYPE AudioBuffer;
AUDIO_SOURCE_ID_TYPE AudioSource;
class GPUUploadVideoFrameTask* UploadVideoFrameTask;
uintptr BackendState[8];
VideoBackendPlayer()
{
Platform::MemoryClear(this, sizeof(VideoBackendPlayer));
}
POD_COPYABLE(VideoBackendPlayer);
template<typename T>
FORCE_INLINE T& GetBackendState()
@@ -51,6 +62,7 @@ struct VideoBackendPlayer
}
void InitVideoFrame();
void UpdateVideoFrame(Span<byte> frame, TimeSpan time, TimeSpan duration);
void UpdateVideoFrame(Span<byte> data, TimeSpan time, TimeSpan duration);
void UpdateAudioBuffer(Span<byte> data, TimeSpan time, TimeSpan duration);
void ReleaseResources();
};

View File

@@ -2,13 +2,13 @@
#include "Video.h"
#include "VideoBackend.h"
#include "Engine/Audio/AudioBackend.h"
#include "Engine/Core/Log.h"
#include "Engine/Profiler/ProfilerCPU.h"
#include "Engine/Engine/EngineService.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPUBuffer.h"
#include "Engine/Graphics/GPUResource.h"
#include "Engine/Graphics/GPUPipelineState.h"
#include "Engine/Graphics/PixelFormatExtensions.h"
#include "Engine/Graphics/RenderTools.h"
#include "Engine/Graphics/Async/GPUTask.h"
@@ -186,7 +186,7 @@ void VideoBackendPlayer::InitVideoFrame()
Frame = GPUDevice::Instance->CreateTexture(TEXT("VideoFrame"));
}
void VideoBackendPlayer::UpdateVideoFrame(Span<byte> frame, TimeSpan time, TimeSpan duration)
void VideoBackendPlayer::UpdateVideoFrame(Span<byte> data, TimeSpan time, TimeSpan duration)
{
PROFILE_CPU();
VideoFrameTime = time;
@@ -197,9 +197,9 @@ void VideoBackendPlayer::UpdateVideoFrame(Span<byte> frame, TimeSpan time, TimeS
// Ensure that sampled frame data matches the target texture size
uint32 rowPitch, slicePitch;
RenderTools::ComputePitch(Format, VideoFrameWidth, VideoFrameHeight, rowPitch, slicePitch);
if (slicePitch != frame.Length())
if (slicePitch != data.Length())
{
LOG(Warning, "Incorrect video frame stride {}, doesn't match stride {} of video {}x{} in format {}", frame.Length(), slicePitch, Width, Height, ScriptingEnum::ToString(Format));
LOG(Warning, "Incorrect video frame stride {}, doesn't match stride {} of video {}x{} in format {}", data.Length(), slicePitch, Width, Height, ScriptingEnum::ToString(Format));
return;
}
@@ -213,7 +213,7 @@ void VideoBackendPlayer::UpdateVideoFrame(Span<byte> frame, TimeSpan time, TimeS
return;
}
}
Platform::MemoryCopy(VideoFrameMemory.Get(), frame.Get(), slicePitch);
Platform::MemoryCopy(VideoFrameMemory.Get(), data.Get(), slicePitch);
// Update output frame texture
InitVideoFrame();
@@ -235,8 +235,32 @@ void VideoBackendPlayer::UpdateVideoFrame(Span<byte> frame, TimeSpan time, TimeS
}
}
void VideoBackendPlayer::UpdateAudioBuffer(Span<byte> data, TimeSpan time, TimeSpan duration)
{
PROFILE_CPU();
AudioBufferTime = time;
AudioBufferDuration = duration;
auto start = time.GetTotalMilliseconds();
auto dur = duration.GetTotalMilliseconds();
auto end = (time + duration).GetTotalMilliseconds();
if (!AudioBackend::Instance)
return;
// Update audio buffer
if (!AudioBuffer)
AudioBuffer = AudioBackend::Buffer::Create();
AudioDataInfo dataInfo = AudioInfo;
const uint32 samplesPerSecond = dataInfo.SampleRate * dataInfo.NumChannels;
const uint32 maxSamplesInData = (uint32)data.Length() * 8 / dataInfo.BitDepth;
const uint32 maxSamplesInDuration = (uint32)Math::CeilToInt(samplesPerSecond * duration.GetTotalSeconds());
dataInfo.NumSamples = Math::Min(maxSamplesInData, maxSamplesInDuration);
AudioBackend::Buffer::Write(AudioBuffer, data.Get(), dataInfo);
}
void VideoBackendPlayer::ReleaseResources()
{
if (AudioBuffer)
AudioBackend::Buffer::Delete(AudioBuffer);
if (UploadVideoFrameTask)
UploadVideoFrameTask->Cancel();
VideoFrameMemory.Release();