Add initial audio buffer support in video player

2024-05-03 14:32:23 +02:00
parent 754ed56119
commit 388a0f4196
3 changed files with 230 additions and 150 deletions
--- a/Source/Engine/Video/MF/VideoBackendMF.cpp
+++ b/Source/Engine/Video/MF/VideoBackendMF.cpp
@@ -37,7 +37,7 @@ namespace
        bool result = true;

        // Find the native format of the stream
-        HRESULT hr = playerMF.SourceReader->GetNativeMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, MF_SOURCE_READER_CURRENT_TYPE_INDEX, &nativeType);
+        HRESULT hr = playerMF.SourceReader->GetNativeMediaType(streamIndex, MF_SOURCE_READER_CURRENT_TYPE_INDEX, &nativeType);
        if (FAILED(hr))
        {
            VIDEO_API_MF_ERROR(GetNativeMediaType, hr);
@@ -81,7 +81,7 @@ namespace
                player.Width = videoArea.Area.cx;
                player.Height = videoArea.Area.cy;
            }
-            player.AvgBitRate = MFGetAttributeUINT32(mediaType, MF_MT_AVG_BITRATE, 0);
+            player.AvgVideoBitRate = MFGetAttributeUINT32(mediaType, MF_MT_AVG_BITRATE, 0);
            uint64_t fpsValue;
            hr = mediaType->GetUINT64(MF_MT_FRAME_RATE, &fpsValue);
            if (SUCCEEDED(hr))
@@ -132,6 +132,26 @@ namespace
            player.AudioInfo.SampleRate = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_SAMPLES_PER_SECOND, 0);
            player.AudioInfo.NumChannels = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_NUM_CHANNELS, 0);
            player.AudioInfo.BitDepth = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_BITS_PER_SAMPLE, 16);
+            if (subtype != MFAudioFormat_PCM)
+            {
+                // Reconfigure decoder to output audio data in PCM format
+                IMFMediaType* customType = nullptr;
+                hr = MFCreateMediaType(&customType);
+                if (FAILED(hr))
+                {
+                    VIDEO_API_MF_ERROR(MFCreateMediaType, hr);
+                    goto END;
+                }
+                customType->SetGUID(MF_MT_MAJOR_TYPE, majorType);
+                customType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM);
+                hr = playerMF.SourceReader->SetCurrentMediaType(streamIndex, nullptr, customType);
+                if (FAILED(hr))
+                {
+                    VIDEO_API_MF_ERROR(SetCurrentMediaType, hr);
+                    goto END;
+                }
+                customType->Release();
+            }
        }

        result = false;
@@ -139,6 +159,156 @@ namespace
        SAFE_RELEASE(mediaType);
        return result;
    }
+
+    bool ReadStream(VideoBackendPlayer& player, VideoPlayerMF& playerMF, DWORD streamIndex, TimeSpan dt)
+    {
+        const bool isVideo = streamIndex == MF_SOURCE_READER_FIRST_VIDEO_STREAM;
+        const bool isAudio = streamIndex == MF_SOURCE_READER_FIRST_AUDIO_STREAM;
+        const TimeSpan lastFrameTime = isVideo ? player.VideoFrameTime : player.AudioBufferTime;
+        const TimeSpan lastFrameDuration = isVideo ? player.VideoFrameDuration : player.AudioBufferDuration;
+
+        // Check if the current frame is valid (eg. when playing 24fps video at 60fps)
+        if (lastFrameDuration.Ticks > 0 &&
+            Math::IsInRange(playerMF.Time, lastFrameTime, lastFrameTime + lastFrameDuration))
+        {
+            return false;
+        }
+
+        // Read samples until frame is matching the current time
+        int32 samplesLeft = 500;
+        HRESULT hr;
+        for (; samplesLeft > 0; samplesLeft--)
+        {
+            // Read sample
+            DWORD flags = 0;
+            LONGLONG samplePos = 0, sampleDuration = 0;
+            IMFSample* sample = nullptr;
+            {
+                PROFILE_CPU_NAMED("ReadSample");
+                hr = playerMF.SourceReader->ReadSample(streamIndex, 0, &streamIndex, &flags, &samplePos, &sample);
+                if (FAILED(hr))
+                {
+                    VIDEO_API_MF_ERROR(ReadSample, hr);
+                    break;
+                }
+            }
+            TimeSpan frameTime((int64)samplePos);
+            TimeSpan franeDuration = player.FrameRate > 0 ? TimeSpan::FromSeconds(1.0 / player.FrameRate) : dt;
+            if (sample && sample->GetSampleDuration(&sampleDuration) == S_OK && sampleDuration > 0)
+            {
+                franeDuration.Ticks = sampleDuration;
+            }
+            //const int32 framesToTime = (playerMF.Time.Ticks - frameTime.Ticks) / franeDuration.Ticks;
+            const bool isGoodSample = Math::IsInRange(playerMF.Time, frameTime, frameTime + franeDuration);
+
+            // Process sample
+            if (sample && isGoodSample)
+            {
+                PROFILE_CPU_NAMED("ProcessSample");
+
+                // Lock sample buffer memory (try to use 2D buffer for more direct memory access)
+                IMFMediaBuffer* buffer = nullptr;
+                IMF2DBuffer* buffer2D = nullptr;
+                BYTE* bufferData = nullptr;
+                LONG bufferStride = 0;
+                if (isVideo && sample->GetBufferByIndex(0, &buffer) == S_OK && buffer->QueryInterface(IID_PPV_ARGS(&buffer2D)) == S_OK)
+                {
+                    LONG bufferPitch = 0;
+                    hr = buffer2D->Lock2D(&bufferData, &bufferPitch);
+                    if (FAILED(hr))
+                    {
+                        VIDEO_API_MF_ERROR(GetCurrentLength, hr);
+                        goto PROCESS_SAMPLE_END;
+                    }
+                    if (bufferPitch < 0)
+                        bufferPitch = -bufferPitch; // Flipped image
+                    bufferStride = bufferPitch * player.VideoFrameHeight;
+                }
+                else
+                {
+                    if (buffer)
+                    {
+                        buffer->Release();
+                        buffer = nullptr;
+                    }
+                    DWORD bufferLength;
+                    hr = sample->ConvertToContiguousBuffer(&buffer);
+                    if (FAILED(hr))
+                    {
+                        VIDEO_API_MF_ERROR(ConvertToContiguousBuffer, hr);
+                        goto PROCESS_SAMPLE_END;
+                    }
+                    hr = buffer->GetCurrentLength(&bufferLength);
+                    if (FAILED(hr))
+                    {
+                        VIDEO_API_MF_ERROR(GetCurrentLength, hr);
+                        goto PROCESS_SAMPLE_END;
+                    }
+                    DWORD bufferMaxLen = 0, bufferCurrentLength = 0;
+                    hr = buffer->Lock(&bufferData, &bufferMaxLen, &bufferCurrentLength);
+                    if (FAILED(hr))
+                    {
+                        VIDEO_API_MF_ERROR(Lock, hr);
+                        goto PROCESS_SAMPLE_END;
+                    }
+                    bufferStride = bufferCurrentLength;
+                }
+
+                Span<byte> bufferSpan(bufferData, bufferStride);
+                if (isVideo)
+                {
+                    // Send pixels to the texture
+                    player.UpdateVideoFrame(bufferSpan, frameTime, franeDuration);
+                }
+                else if (isAudio)
+                {
+                    // Send PCM data
+                    player.UpdateAudioBuffer(bufferSpan, frameTime, franeDuration);
+                }
+
+                // Unlock sample buffer memory
+                if (buffer2D)
+                {
+                    hr = buffer2D->Unlock2D();
+                    if (FAILED(hr))
+                    {
+                        VIDEO_API_MF_ERROR(Unlock2D, hr);
+                    }
+                }
+                else
+                {
+                    hr = buffer->Unlock();
+                    if (FAILED(hr))
+                    {
+                        VIDEO_API_MF_ERROR(Unlock, hr);
+                    }
+                }
+
+            PROCESS_SAMPLE_END:
+                buffer->Release();
+            }
+            if (sample)
+                sample->Release();
+
+            if (flags & MF_SOURCE_READERF_ENDOFSTREAM)
+            {
+                // Media ended
+                break;
+            }
+            if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED || flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED)
+            {
+                // Format/metadata might have changed so update the stream
+                Configure(player, playerMF, streamIndex);
+            }
+
+            // End loop if got good sample or need to seek back
+            if (isGoodSample)
+                break;
+        }
+
+        // True if run out of samples and failed to get frame for the current time
+        return samplesLeft == 0;
+    }
 }

 bool VideoBackendMF::Player_Create(const VideoBackendPlayerInfo& info, VideoBackendPlayer& player)
@@ -278,7 +448,6 @@ void VideoBackendMF::Base_Update()
 {
    PROFILE_CPU();
    // TODO: use async Task Graph to update videos
-    HRESULT hr;
    for (auto* e : Players)
    {
        auto& player = *e;
@@ -341,140 +510,15 @@ void VideoBackendMF::Base_Update()
            // After seeking, the application should call ReadSample and advance to the desired position.
        }

-        // Check if the current frame is valid (eg. when playing 24fps video at 60fps)
-        if (player.VideoFrameDuration.Ticks > 0 &&
-            Math::IsInRange(playerMF.Time, player.VideoFrameTime, player.VideoFrameTime + player.VideoFrameDuration))
-        {
-            continue;
-        }
-
-        // Read samples until frame is matching the current time
-        int32 samplesLeft = 500;
-        for (; samplesLeft > 0; samplesLeft--)
-        {
-            // Read sample
-            DWORD streamIndex = 0, flags = 0;
-            LONGLONG samplePos = 0, sampleDuration = 0;
-            IMFSample* videoSample = nullptr;
-            {
-                PROFILE_CPU_NAMED("ReadSample");
-                hr = playerMF.SourceReader->ReadSample(MF_SOURCE_READER_FIRST_VIDEO_STREAM, 0, &streamIndex, &flags, &samplePos, &videoSample);
-                if (FAILED(hr))
-                {
-                    VIDEO_API_MF_ERROR(ReadSample, hr);
-                    break;
-                }
-            }
-            TimeSpan frameTime((int64)samplePos);
-            TimeSpan franeDuration = player.FrameRate > 0 ? TimeSpan::FromSeconds(1.0 / player.FrameRate) : dt;
-            if (videoSample && videoSample->GetSampleDuration(&sampleDuration) == S_OK && sampleDuration > 0)
-            {
-                franeDuration.Ticks = sampleDuration;
-            }
-            //const int32 framesToTime = (playerMF.Time.Ticks - frameTime.Ticks) / franeDuration.Ticks;
-            const bool isGoodSample = Math::IsInRange(playerMF.Time, frameTime, frameTime + franeDuration);
-
-            // Process sample
-            if (videoSample && isGoodSample)
-            {
-                PROFILE_CPU_NAMED("ProcessSample");
-
-                // Lock sample buffer memory (try to use 2D buffer for more direct memory access)
-                IMFMediaBuffer* buffer = nullptr;
-                IMF2DBuffer* buffer2D = nullptr;
-                BYTE* bufferData = nullptr;
-                LONG bufferStride = 0;
-                if (videoSample->GetBufferByIndex(0, &buffer) == S_OK && buffer->QueryInterface(IID_PPV_ARGS(&buffer2D)) == S_OK)
-                {
-                    LONG bufferPitch = 0;
-                    hr = buffer2D->Lock2D(&bufferData, &bufferPitch);
-                    if (FAILED(hr))
-                    {
-                        VIDEO_API_MF_ERROR(GetCurrentLength, hr);
-                        goto PROCESS_SAMPLE_END;
-                    }
-                    if (bufferPitch < 0)
-                        bufferPitch = -bufferPitch; // Flipped image
-                    bufferStride = bufferPitch * player.VideoFrameHeight;
-                }
-                else
-                {
-                    if (buffer)
-                    {
-                        buffer->Release();
-                        buffer = nullptr;
-                    }
-                    DWORD bufferLength;
-                    hr = videoSample->ConvertToContiguousBuffer(&buffer);
-                    if (FAILED(hr))
-                    {
-                        VIDEO_API_MF_ERROR(ConvertToContiguousBuffer, hr);
-                        goto PROCESS_SAMPLE_END;
-                    }
-                    hr = buffer->GetCurrentLength(&bufferLength);
-                    if (FAILED(hr))
-                    {
-                        VIDEO_API_MF_ERROR(GetCurrentLength, hr);
-                        goto PROCESS_SAMPLE_END;
-                    }
-                    DWORD bufferMaxLen = 0, bufferCurrentLength = 0;
-                    hr = buffer->Lock(&bufferData, &bufferMaxLen, &bufferCurrentLength);
-                    if (FAILED(hr))
-                    {
-                        VIDEO_API_MF_ERROR(Lock, hr);
-                        goto PROCESS_SAMPLE_END;
-                    }
-                    bufferStride = bufferCurrentLength;
-                }
-
-                // Send pixels to the texture
-                player.UpdateVideoFrame(Span<byte>(bufferData, bufferStride), frameTime, franeDuration);
-
-                // Unlock sample buffer memory
-                if (buffer2D)
-                {
-                    hr = buffer2D->Unlock2D();
-                    if (FAILED(hr))
-                    {
-                        VIDEO_API_MF_ERROR(Unlock2D, hr);
-                    }
-                }
-                else
-                {
-                    hr = buffer->Unlock();
-                    if (FAILED(hr))
-                    {
-                        VIDEO_API_MF_ERROR(Unlock, hr);
-                    }
-                }
-
-            PROCESS_SAMPLE_END:
-                buffer->Release();
-            }
-            if (videoSample)
-                videoSample->Release();
-
-            if (flags & MF_SOURCE_READERF_ENDOFSTREAM)
-            {
-                // Media ended
-                break;
-            }
-            if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED || flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED)
-            {
-                // Format/metadata might have changed so update the stream
-                Configure(player, playerMF, streamIndex);
-            }
-
-            // End loop if got good sample or need to seek back
-            if (isGoodSample)
-                break;
-        }
-        if (samplesLeft == 0 && seeks < 2)
+        // Update streams
+        if (ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_VIDEO_STREAM, dt))
        {
            // Failed to pick a valid sample so try again with seeking
            playerMF.Seek = 1;
            goto SEEK_START;
        }
+        if (player.AudioInfo.BitDepth != 0)
+            ReadStream(player, playerMF, MF_SOURCE_READER_FIRST_AUDIO_STREAM, dt);
    }
 }

--- a/Source/Engine/Video/Types.h
+++ b/Source/Engine/Video/Types.h
@@ -6,6 +6,7 @@
 #include "Engine/Core/Types/TimeSpan.h"
 #include "Engine/Core/Types/DataContainer.h"
 #include "Engine/Audio/Types.h"
+#include "Engine/Audio/Config.h"
 #include "Engine/Graphics/PixelFormat.h"

 class Video;
@@ -22,19 +23,29 @@ class GPUPipelineState;
 /// </summary>
 struct VideoBackendPlayer
 {
-    VideoBackend* Backend = nullptr;
-    GPUTexture* Frame = nullptr;
-    GPUBuffer* FrameUpload = nullptr;
-    int32 Width = 0, Height = 0, AvgBitRate = 0, FramesCount = 0;
-    int32 VideoFrameWidth = 0, VideoFrameHeight = 0;
-    PixelFormat Format = PixelFormat::Unknown;
-    float FrameRate = 0.0f;
-    TimeSpan Duration = TimeSpan(0);
-    TimeSpan VideoFrameTime = TimeSpan(0), VideoFrameDuration = TimeSpan(0);
-    AudioDataInfo AudioInfo = {};
+    VideoBackend* Backend;
+    GPUTexture* Frame;
+    GPUBuffer* FrameUpload;
+    int32 Width, Height, AvgVideoBitRate, FramesCount;
+    int32 VideoFrameWidth, VideoFrameHeight;
+    PixelFormat Format;
+    float FrameRate;
+    TimeSpan Duration;
+    TimeSpan VideoFrameTime, VideoFrameDuration;
+    TimeSpan AudioBufferTime, AudioBufferDuration;
+    AudioDataInfo AudioInfo;
    BytesContainer VideoFrameMemory;
-    class GPUUploadVideoFrameTask* UploadVideoFrameTask = nullptr;
-    uintptr BackendState[8] = {};
+    AUDIO_BUFFER_ID_TYPE AudioBuffer;
+    AUDIO_SOURCE_ID_TYPE AudioSource;
+    class GPUUploadVideoFrameTask* UploadVideoFrameTask;
+    uintptr BackendState[8];
+
+    VideoBackendPlayer()
+    {
+        Platform::MemoryClear(this, sizeof(VideoBackendPlayer));
+    }
+
+    POD_COPYABLE(VideoBackendPlayer);

    template<typename T>
    FORCE_INLINE T& GetBackendState()
@@ -51,6 +62,7 @@ struct VideoBackendPlayer
    }

    void InitVideoFrame();
-    void UpdateVideoFrame(Span<byte> frame, TimeSpan time, TimeSpan duration);
+    void UpdateVideoFrame(Span<byte> data, TimeSpan time, TimeSpan duration);
+    void UpdateAudioBuffer(Span<byte> data, TimeSpan time, TimeSpan duration);
    void ReleaseResources();
 };
--- a/Source/Engine/Video/Video.cpp
+++ b/Source/Engine/Video/Video.cpp
@@ -2,13 +2,13 @@

 #include "Video.h"
 #include "VideoBackend.h"
+#include "Engine/Audio/AudioBackend.h"
 #include "Engine/Core/Log.h"
 #include "Engine/Profiler/ProfilerCPU.h"
 #include "Engine/Engine/EngineService.h"
 #include "Engine/Graphics/GPUDevice.h"
 #include "Engine/Graphics/GPUBuffer.h"
 #include "Engine/Graphics/GPUResource.h"
-#include "Engine/Graphics/GPUPipelineState.h"
 #include "Engine/Graphics/PixelFormatExtensions.h"
 #include "Engine/Graphics/RenderTools.h"
 #include "Engine/Graphics/Async/GPUTask.h"
@@ -186,7 +186,7 @@ void VideoBackendPlayer::InitVideoFrame()
        Frame = GPUDevice::Instance->CreateTexture(TEXT("VideoFrame"));
 }

-void VideoBackendPlayer::UpdateVideoFrame(Span<byte> frame, TimeSpan time, TimeSpan duration)
+void VideoBackendPlayer::UpdateVideoFrame(Span<byte> data, TimeSpan time, TimeSpan duration)
 {
    PROFILE_CPU();
    VideoFrameTime = time;
@@ -197,9 +197,9 @@ void VideoBackendPlayer::UpdateVideoFrame(Span<byte> frame, TimeSpan time, TimeS
    // Ensure that sampled frame data matches the target texture size
    uint32 rowPitch, slicePitch;
    RenderTools::ComputePitch(Format, VideoFrameWidth, VideoFrameHeight, rowPitch, slicePitch);
-    if (slicePitch != frame.Length())
+    if (slicePitch != data.Length())
    {
-        LOG(Warning, "Incorrect video frame stride {}, doesn't match stride {} of video {}x{} in format {}", frame.Length(), slicePitch, Width, Height, ScriptingEnum::ToString(Format));
+        LOG(Warning, "Incorrect video frame stride {}, doesn't match stride {} of video {}x{} in format {}", data.Length(), slicePitch, Width, Height, ScriptingEnum::ToString(Format));
        return;
    }

@@ -213,7 +213,7 @@ void VideoBackendPlayer::UpdateVideoFrame(Span<byte> frame, TimeSpan time, TimeS
            return;
        }
    }
-    Platform::MemoryCopy(VideoFrameMemory.Get(), frame.Get(), slicePitch);
+    Platform::MemoryCopy(VideoFrameMemory.Get(), data.Get(), slicePitch);

    // Update output frame texture
    InitVideoFrame();
@@ -235,8 +235,32 @@ void VideoBackendPlayer::UpdateVideoFrame(Span<byte> frame, TimeSpan time, TimeS
    }
 }

+void VideoBackendPlayer::UpdateAudioBuffer(Span<byte> data, TimeSpan time, TimeSpan duration)
+{
+    PROFILE_CPU();
+    AudioBufferTime = time;
+    AudioBufferDuration = duration;
+    auto start = time.GetTotalMilliseconds();
+    auto dur = duration.GetTotalMilliseconds();
+    auto end = (time + duration).GetTotalMilliseconds();
+    if (!AudioBackend::Instance)
+        return;
+
+    // Update audio buffer
+    if (!AudioBuffer)
+        AudioBuffer = AudioBackend::Buffer::Create();
+    AudioDataInfo dataInfo = AudioInfo;
+    const uint32 samplesPerSecond = dataInfo.SampleRate * dataInfo.NumChannels;
+    const uint32 maxSamplesInData = (uint32)data.Length() * 8 / dataInfo.BitDepth;
+    const uint32 maxSamplesInDuration = (uint32)Math::CeilToInt(samplesPerSecond * duration.GetTotalSeconds());
+    dataInfo.NumSamples = Math::Min(maxSamplesInData, maxSamplesInDuration);
+    AudioBackend::Buffer::Write(AudioBuffer, data.Get(), dataInfo);
+}
+
 void VideoBackendPlayer::ReleaseResources()
 {
+    if (AudioBuffer)
+        AudioBackend::Buffer::Delete(AudioBuffer);
    if (UploadVideoFrameTask)
        UploadVideoFrameTask->Cancel();
    VideoFrameMemory.Release();