From 388a0f4196170e6b6521d884f68263afc07ba3c8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 3 May 2024 14:32:23 +0200 Subject: [PATCH] Add initial audio buffer support in video player --- Source/Engine/Video/MF/VideoBackendMF.cpp | 308 ++++++++++++---------- Source/Engine/Video/Types.h | 38 ++- Source/Engine/Video/Video.cpp | 34 ++- 3 files changed, 230 insertions(+), 150 deletions(-) diff --git a/Source/Engine/Video/MF/VideoBackendMF.cpp b/Source/Engine/Video/MF/VideoBackendMF.cpp index 8ccefb403..84715db45 100644 --- a/Source/Engine/Video/MF/VideoBackendMF.cpp +++ b/Source/Engine/Video/MF/VideoBackendMF.cpp @@ -37,7 +37,7 @@ namespace bool result = true; // Find the native format of the stream - HRESULT hr = playerMF.SourceReader->GetNativeMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, MF_SOURCE_READER_CURRENT_TYPE_INDEX, &nativeType); + HRESULT hr = playerMF.SourceReader->GetNativeMediaType(streamIndex, MF_SOURCE_READER_CURRENT_TYPE_INDEX, &nativeType); if (FAILED(hr)) { VIDEO_API_MF_ERROR(GetNativeMediaType, hr); @@ -81,7 +81,7 @@ namespace player.Width = videoArea.Area.cx; player.Height = videoArea.Area.cy; } - player.AvgBitRate = MFGetAttributeUINT32(mediaType, MF_MT_AVG_BITRATE, 0); + player.AvgVideoBitRate = MFGetAttributeUINT32(mediaType, MF_MT_AVG_BITRATE, 0); uint64_t fpsValue; hr = mediaType->GetUINT64(MF_MT_FRAME_RATE, &fpsValue); if (SUCCEEDED(hr)) @@ -132,6 +132,26 @@ namespace player.AudioInfo.SampleRate = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_SAMPLES_PER_SECOND, 0); player.AudioInfo.NumChannels = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_NUM_CHANNELS, 0); player.AudioInfo.BitDepth = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_BITS_PER_SAMPLE, 16); + if (subtype != MFAudioFormat_PCM) + { + // Reconfigure decoder to output audio data in PCM format + IMFMediaType* customType = nullptr; + hr = MFCreateMediaType(&customType); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(MFCreateMediaType, hr); + goto END; + } + customType->SetGUID(MF_MT_MAJOR_TYPE, majorType); + customType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM); + hr = playerMF.SourceReader->SetCurrentMediaType(streamIndex, nullptr, customType); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(SetCurrentMediaType, hr); + goto END; + } + customType->Release(); + } } result = false; @@ -139,6 +159,156 @@ namespace SAFE_RELEASE(mediaType); return result; } + + bool ReadStream(VideoBackendPlayer& player, VideoPlayerMF& playerMF, DWORD streamIndex, TimeSpan dt) + { + const bool isVideo = streamIndex == MF_SOURCE_READER_FIRST_VIDEO_STREAM; + const bool isAudio = streamIndex == MF_SOURCE_READER_FIRST_AUDIO_STREAM; + const TimeSpan lastFrameTime = isVideo ? player.VideoFrameTime : player.AudioBufferTime; + const TimeSpan lastFrameDuration = isVideo ? player.VideoFrameDuration : player.AudioBufferDuration; + + // Check if the current frame is valid (eg. when playing 24fps video at 60fps) + if (lastFrameDuration.Ticks > 0 && + Math::IsInRange(playerMF.Time, lastFrameTime, lastFrameTime + lastFrameDuration)) + { + return false; + } + + // Read samples until frame is matching the current time + int32 samplesLeft = 500; + HRESULT hr; + for (; samplesLeft > 0; samplesLeft--) + { + // Read sample + DWORD flags = 0; + LONGLONG samplePos = 0, sampleDuration = 0; + IMFSample* sample = nullptr; + { + PROFILE_CPU_NAMED("ReadSample"); + hr = playerMF.SourceReader->ReadSample(streamIndex, 0, &streamIndex, &flags, &samplePos, &sample); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(ReadSample, hr); + break; + } + } + TimeSpan frameTime((int64)samplePos); + TimeSpan franeDuration = player.FrameRate > 0 ? TimeSpan::FromSeconds(1.0 / player.FrameRate) : dt; + if (sample && sample->GetSampleDuration(&sampleDuration) == S_OK && sampleDuration > 0) + { + franeDuration.Ticks = sampleDuration; + } + //const int32 framesToTime = (playerMF.Time.Ticks - frameTime.Ticks) / franeDuration.Ticks; + const bool isGoodSample = Math::IsInRange(playerMF.Time, frameTime, frameTime + franeDuration); + + // Process sample + if (sample && isGoodSample) + { + PROFILE_CPU_NAMED("ProcessSample"); + + // Lock sample buffer memory (try to use 2D buffer for more direct memory access) + IMFMediaBuffer* buffer = nullptr; + IMF2DBuffer* buffer2D = nullptr; + BYTE* bufferData = nullptr; + LONG bufferStride = 0; + if (isVideo && sample->GetBufferByIndex(0, &buffer) == S_OK && buffer->QueryInterface(IID_PPV_ARGS(&buffer2D)) == S_OK) + { + LONG bufferPitch = 0; + hr = buffer2D->Lock2D(&bufferData, &bufferPitch); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(GetCurrentLength, hr); + goto PROCESS_SAMPLE_END; + } + if (bufferPitch < 0) + bufferPitch = -bufferPitch; // Flipped image + bufferStride = bufferPitch * player.VideoFrameHeight; + } + else + { + if (buffer) + { + buffer->Release(); + buffer = nullptr; + } + DWORD bufferLength; + hr = sample->ConvertToContiguousBuffer(&buffer); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(ConvertToContiguousBuffer, hr); + goto PROCESS_SAMPLE_END; + } + hr = buffer->GetCurrentLength(&bufferLength); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(GetCurrentLength, hr); + goto PROCESS_SAMPLE_END; + } + DWORD bufferMaxLen = 0, bufferCurrentLength = 0; + hr = buffer->Lock(&bufferData, &bufferMaxLen, &bufferCurrentLength); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(Lock, hr); + goto PROCESS_SAMPLE_END; + } + bufferStride = bufferCurrentLength; + } + + Span bufferSpan(bufferData, bufferStride); + if (isVideo) + { + // Send pixels to the texture + player.UpdateVideoFrame(bufferSpan, frameTime, franeDuration); + } + else if (isAudio) + { + // Send PCM data + player.UpdateAudioBuffer(bufferSpan, frameTime, franeDuration); + } + + // Unlock sample buffer memory + if (buffer2D) + { + hr = buffer2D->Unlock2D(); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(Unlock2D, hr); + } + } + else + { + hr = buffer->Unlock(); + if (FAILED(hr)) + { + VIDEO_API_MF_ERROR(Unlock, hr); + } + } + + PROCESS_SAMPLE_END: + buffer->Release(); + } + if (sample) + sample->Release(); + + if (flags & MF_SOURCE_READERF_ENDOFSTREAM) + { + // Media ended + break; + } + if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED || flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED) + { + // Format/metadata might have changed so update the stream + Configure(player, playerMF, streamIndex); + } + + // End loop if got good sample or need to seek back + if (isGoodSample) + break; + } + + // True if run out of samples and failed to get frame for the current time + return samplesLeft == 0; + } } bool VideoBackendMF::Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player) @@ -278,7 +448,6 @@ void VideoBackendMF::Base_Update() { PROFILE_CPU(); // TODO: use async Task Graph to update videos - HRESULT hr; for (auto* e : Players) { auto& player = *e; @@ -341,140 +510,15 @@ void VideoBackendMF::Base_Update() // After seeking, the application should call ReadSample and advance to the desired position. } - // Check if the current frame is valid (eg. when playing 24fps video at 60fps) - if (player.VideoFrameDuration.Ticks > 0 && - Math::IsInRange(playerMF.Time, player.VideoFrameTime, player.VideoFrameTime + player.VideoFrameDuration)) - { - continue; - } - - // Read samples until frame is matching the current time - int32 samplesLeft = 500; - for (; samplesLeft > 0; samplesLeft--) - { - // Read sample - DWORD streamIndex = 0, flags = 0; - LONGLONG samplePos = 0, sampleDuration = 0; - IMFSample* videoSample = nullptr; - { - PROFILE_CPU_NAMED("ReadSample"); - hr = playerMF.SourceReader->ReadSample(MF_SOURCE_READER_FIRST_VIDEO_STREAM, 0, &streamIndex, &flags, &samplePos, &videoSample); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(ReadSample, hr); - break; - } - } - TimeSpan frameTime((int64)samplePos); - TimeSpan franeDuration = player.FrameRate > 0 ? TimeSpan::FromSeconds(1.0 / player.FrameRate) : dt; - if (videoSample && videoSample->GetSampleDuration(&sampleDuration) == S_OK && sampleDuration > 0) - { - franeDuration.Ticks = sampleDuration; - } - //const int32 framesToTime = (playerMF.Time.Ticks - frameTime.Ticks) / franeDuration.Ticks; - const bool isGoodSample = Math::IsInRange(playerMF.Time, frameTime, frameTime + franeDuration); - - // Process sample - if (videoSample && isGoodSample) - { - PROFILE_CPU_NAMED("ProcessSample"); - - // Lock sample buffer memory (try to use 2D buffer for more direct memory access) - IMFMediaBuffer* buffer = nullptr; - IMF2DBuffer* buffer2D = nullptr; - BYTE* bufferData = nullptr; - LONG bufferStride = 0; - if (videoSample->GetBufferByIndex(0, &buffer) == S_OK && buffer->QueryInterface(IID_PPV_ARGS(&buffer2D)) == S_OK) - { - LONG bufferPitch = 0; - hr = buffer2D->Lock2D(&bufferData, &bufferPitch); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(GetCurrentLength, hr); - goto PROCESS_SAMPLE_END; - } - if (bufferPitch < 0) - bufferPitch = -bufferPitch; // Flipped image - bufferStride = bufferPitch * player.VideoFrameHeight; - } - else - { - if (buffer) - { - buffer->Release(); - buffer = nullptr; - } - DWORD bufferLength; - hr = videoSample->ConvertToContiguousBuffer(&buffer); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(ConvertToContiguousBuffer, hr); - goto PROCESS_SAMPLE_END; - } - hr = buffer->GetCurrentLength(&bufferLength); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(GetCurrentLength, hr); - goto PROCESS_SAMPLE_END; - } - DWORD bufferMaxLen = 0, bufferCurrentLength = 0; - hr = buffer->Lock(&bufferData, &bufferMaxLen, &bufferCurrentLength); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(Lock, hr); - goto PROCESS_SAMPLE_END; - } - bufferStride = bufferCurrentLength; - } - - // Send pixels to the texture - player.UpdateVideoFrame(Span(bufferData, bufferStride), frameTime, franeDuration); - - // Unlock sample buffer memory - if (buffer2D) - { - hr = buffer2D->Unlock2D(); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(Unlock2D, hr); - } - } - else - { - hr = buffer->Unlock(); - if (FAILED(hr)) - { - VIDEO_API_MF_ERROR(Unlock, hr); - } - } - - PROCESS_SAMPLE_END: - buffer->Release(); - } - if (videoSample) - videoSample->Release(); - - if (flags & MF_SOURCE_READERF_ENDOFSTREAM) - { - // Media ended - break; - } - if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED || flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED) - { - // Format/metadata might have changed so update the stream - Configure(player, playerMF, streamIndex); - } - - // End loop if got good sample or need to seek back - if (isGoodSample) - break; - } - if (samplesLeft == 0 && seeks < 2) + // Update streams + if (ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_VIDEO_STREAM, dt)) { // Failed to pick a valid sample so try again with seeking playerMF.Seek = 1; goto SEEK_START; } + if (player.AudioInfo.BitDepth != 0) + ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM, dt); } } diff --git a/Source/Engine/Video/Types.h b/Source/Engine/Video/Types.h index 1a1a3fad1..ee03b0238 100644 --- a/Source/Engine/Video/Types.h +++ b/Source/Engine/Video/Types.h @@ -6,6 +6,7 @@ #include "Engine/Core/Types/TimeSpan.h" #include "Engine/Core/Types/DataContainer.h" #include "Engine/Audio/Types.h" +#include "Engine/Audio/Config.h" #include "Engine/Graphics/PixelFormat.h" class Video; @@ -22,19 +23,29 @@ class GPUPipelineState; /// struct VideoBackendPlayer { - VideoBackend* Backend = nullptr; - GPUTexture* Frame = nullptr; - GPUBuffer* FrameUpload = nullptr; - int32 Width = 0, Height = 0, AvgBitRate = 0, FramesCount = 0; - int32 VideoFrameWidth = 0, VideoFrameHeight = 0; - PixelFormat Format = PixelFormat::Unknown; - float FrameRate = 0.0f; - TimeSpan Duration = TimeSpan(0); - TimeSpan VideoFrameTime = TimeSpan(0), VideoFrameDuration = TimeSpan(0); - AudioDataInfo AudioInfo = {}; + VideoBackend* Backend; + GPUTexture* Frame; + GPUBuffer* FrameUpload; + int32 Width, Height, AvgVideoBitRate, FramesCount; + int32 VideoFrameWidth, VideoFrameHeight; + PixelFormat Format; + float FrameRate; + TimeSpan Duration; + TimeSpan VideoFrameTime, VideoFrameDuration; + TimeSpan AudioBufferTime, AudioBufferDuration; + AudioDataInfo AudioInfo; BytesContainer VideoFrameMemory; - class GPUUploadVideoFrameTask* UploadVideoFrameTask = nullptr; - uintptr BackendState[8] = {}; + AUDIO_BUFFER_ID_TYPE AudioBuffer; + AUDIO_SOURCE_ID_TYPE AudioSource; + class GPUUploadVideoFrameTask* UploadVideoFrameTask; + uintptr BackendState[8]; + + VideoBackendPlayer() + { + Platform::MemoryClear(this, sizeof(VideoBackendPlayer)); + } + + POD_COPYABLE(VideoBackendPlayer); template FORCE_INLINE T& GetBackendState() @@ -51,6 +62,7 @@ struct VideoBackendPlayer } void InitVideoFrame(); - void UpdateVideoFrame(Span frame, TimeSpan time, TimeSpan duration); + void UpdateVideoFrame(Span data, TimeSpan time, TimeSpan duration); + void UpdateAudioBuffer(Span data, TimeSpan time, TimeSpan duration); void ReleaseResources(); }; diff --git a/Source/Engine/Video/Video.cpp b/Source/Engine/Video/Video.cpp index fe6eb40d3..814713c3b 100644 --- a/Source/Engine/Video/Video.cpp +++ b/Source/Engine/Video/Video.cpp @@ -2,13 +2,13 @@ #include "Video.h" #include "VideoBackend.h" +#include "Engine/Audio/AudioBackend.h" #include "Engine/Core/Log.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Engine/EngineService.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUBuffer.h" #include "Engine/Graphics/GPUResource.h" -#include "Engine/Graphics/GPUPipelineState.h" #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/Graphics/RenderTools.h" #include "Engine/Graphics/Async/GPUTask.h" @@ -186,7 +186,7 @@ void VideoBackendPlayer::InitVideoFrame() Frame = GPUDevice::Instance->CreateTexture(TEXT("VideoFrame")); } -void VideoBackendPlayer::UpdateVideoFrame(Span frame, TimeSpan time, TimeSpan duration) +void VideoBackendPlayer::UpdateVideoFrame(Span data, TimeSpan time, TimeSpan duration) { PROFILE_CPU(); VideoFrameTime = time; @@ -197,9 +197,9 @@ void VideoBackendPlayer::UpdateVideoFrame(Span frame, TimeSpan time, TimeS // Ensure that sampled frame data matches the target texture size uint32 rowPitch, slicePitch; RenderTools::ComputePitch(Format, VideoFrameWidth, VideoFrameHeight, rowPitch, slicePitch); - if (slicePitch != frame.Length()) + if (slicePitch != data.Length()) { - LOG(Warning, "Incorrect video frame stride {}, doesn't match stride {} of video {}x{} in format {}", frame.Length(), slicePitch, Width, Height, ScriptingEnum::ToString(Format)); + LOG(Warning, "Incorrect video frame stride {}, doesn't match stride {} of video {}x{} in format {}", data.Length(), slicePitch, Width, Height, ScriptingEnum::ToString(Format)); return; } @@ -213,7 +213,7 @@ void VideoBackendPlayer::UpdateVideoFrame(Span frame, TimeSpan time, TimeS return; } } - Platform::MemoryCopy(VideoFrameMemory.Get(), frame.Get(), slicePitch); + Platform::MemoryCopy(VideoFrameMemory.Get(), data.Get(), slicePitch); // Update output frame texture InitVideoFrame(); @@ -235,8 +235,32 @@ void VideoBackendPlayer::UpdateVideoFrame(Span frame, TimeSpan time, TimeS } } +void VideoBackendPlayer::UpdateAudioBuffer(Span data, TimeSpan time, TimeSpan duration) +{ + PROFILE_CPU(); + AudioBufferTime = time; + AudioBufferDuration = duration; + auto start = time.GetTotalMilliseconds(); + auto dur = duration.GetTotalMilliseconds(); + auto end = (time + duration).GetTotalMilliseconds(); + if (!AudioBackend::Instance) + return; + + // Update audio buffer + if (!AudioBuffer) + AudioBuffer = AudioBackend::Buffer::Create(); + AudioDataInfo dataInfo = AudioInfo; + const uint32 samplesPerSecond = dataInfo.SampleRate * dataInfo.NumChannels; + const uint32 maxSamplesInData = (uint32)data.Length() * 8 / dataInfo.BitDepth; + const uint32 maxSamplesInDuration = (uint32)Math::CeilToInt(samplesPerSecond * duration.GetTotalSeconds()); + dataInfo.NumSamples = Math::Min(maxSamplesInData, maxSamplesInDuration); + AudioBackend::Buffer::Write(AudioBuffer, data.Get(), dataInfo); +} + void VideoBackendPlayer::ReleaseResources() { + if (AudioBuffer) + AudioBackend::Buffer::Delete(AudioBuffer); if (UploadVideoFrameTask) UploadVideoFrameTask->Cancel(); VideoFrameMemory.Release();