Merge remote-tracking branch 'origin/1.11' into sdl_platform_1.11

# Conflicts:
#	Source/Engine/Platform/Windows/WindowsPlatform.cpp
#	Source/Tools/Flax.Build/Build/ProjectTarget.cs
#	Source/Tools/Flax.Build/Configuration.cs
This commit is contained in:
2025-09-07 19:31:16 +03:00
470 changed files with 53355 additions and 3350 deletions

View File

@@ -49,10 +49,19 @@ void CmdBufferVulkan::End()
PROFILE_CPU();
ASSERT(IsOutsideRenderPass());
#if GPU_ALLOW_PROFILE_EVENTS && VK_EXT_debug_utils
#if GPU_ALLOW_PROFILE_EVENTS
// End remaining events
while (_eventsBegin--)
vkCmdEndDebugUtilsLabelEXT(GetHandle());
{
#if VK_EXT_debug_utils
if (vkCmdEndDebugUtilsLabelEXT)
vkCmdEndDebugUtilsLabelEXT(GetHandle());
#endif
#if GPU_ENABLE_TRACY
tracy::EndVkZoneScope(_tracyZones.Last().Data);
_tracyZones.RemoveLast();
#endif
}
#endif
VALIDATE_VULKAN_RESULT(vkEndCommandBuffer(GetHandle()));
@@ -85,39 +94,43 @@ void CmdBufferVulkan::EndRenderPass()
#if GPU_ALLOW_PROFILE_EVENTS
void CmdBufferVulkan::BeginEvent(const Char* name)
void CmdBufferVulkan::BeginEvent(const Char* name, void* tracyContext)
{
#if VK_EXT_debug_utils
if (!vkCmdBeginDebugUtilsLabelEXT)
return;
_eventsBegin++;
// Convert to ANSI
char buffer[101];
int32 i = 0;
while (i < 100 && name[i])
{
buffer[i] = (char)name[i];
i++;
}
buffer[i] = 0;
char buffer[60];
int32 bufferSize = StringUtils::Copy(buffer, name, sizeof(buffer));
VkDebugUtilsLabelEXT label;
RenderToolsVulkan::ZeroStruct(label, VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT);
label.pLabelName = buffer;
vkCmdBeginDebugUtilsLabelEXT(GetHandle(), &label);
#if GPU_ENABLE_TRACY
auto& zone = _tracyZones.AddOne();
tracy::BeginVkZoneScope(zone.Data, tracyContext, GetHandle(), buffer, bufferSize);
#endif
#if VK_EXT_debug_utils
if (vkCmdBeginDebugUtilsLabelEXT)
{
VkDebugUtilsLabelEXT label;
RenderToolsVulkan::ZeroStruct(label, VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT);
label.pLabelName = buffer;
vkCmdBeginDebugUtilsLabelEXT(GetHandle(), &label);
}
#endif
}
void CmdBufferVulkan::EndEvent()
{
#if VK_EXT_debug_utils
if (_eventsBegin == 0 || !vkCmdEndDebugUtilsLabelEXT)
if (_eventsBegin == 0)
return;
_eventsBegin--;
vkCmdEndDebugUtilsLabelEXT(GetHandle());
#if VK_EXT_debug_utils
if (vkCmdEndDebugUtilsLabelEXT)
vkCmdEndDebugUtilsLabelEXT(GetHandle());
#endif
#if GPU_ENABLE_TRACY
tracy::EndVkZoneScope(_tracyZones.Last().Data);
_tracyZones.RemoveLast();
#endif
}

View File

@@ -5,6 +5,7 @@
#include "GPUDeviceVulkan.h"
#include "Engine/Core/Types/BaseTypes.h"
#include "Engine/Core/Collections/Array.h"
#include <ThirdParty/tracy/tracy/TracyVulkan.hpp>
#if GRAPHICS_API_VULKAN
@@ -42,6 +43,8 @@ private:
FenceVulkan* _fence;
#if GPU_ALLOW_PROFILE_EVENTS
int32 _eventsBegin = 0;
struct TracyZone { byte Data[TracyVulkanZoneSize]; };
Array<TracyZone, InlinedAllocation<32>> _tracyZones;
#endif
// The latest value when command buffer was submitted.
@@ -129,7 +132,7 @@ public:
}
#if GPU_ALLOW_PROFILE_EVENTS
void BeginEvent(const Char* name);
void BeginEvent(const Char* name, void* tracyContext);
void EndEvent();
#endif

View File

@@ -33,6 +33,10 @@
#define VULKAN_USE_DEBUG_LAYER GPU_ENABLE_DIAGNOSTICS
#define VULKAN_USE_DEBUG_DATA (GPU_ENABLE_DIAGNOSTICS && COMPILE_WITH_DEV_ENV)
#ifndef VULKAN_USE_PIPELINE_CACHE
#define VULKAN_USE_PIPELINE_CACHE 1
#endif
#ifndef VULKAN_USE_VALIDATION_CACHE
#ifdef VK_EXT_validation_cache
#define VULKAN_USE_VALIDATION_CACHE VK_EXT_validation_cache

View File

@@ -292,7 +292,7 @@ DescriptorPoolSetContainerVulkan* DescriptorPoolsManagerVulkan::AcquirePoolSetCo
ScopeLock lock(_locker);
for (auto* poolSet : _poolSets)
{
if (poolSet->Refs == 0 && Engine::FrameCount - poolSet->LastFrameUsed > VULKAN_RESOURCE_DELETE_SAFE_FRAMES_COUNT)
if (poolSet->Refs == 0 && Engine::FrameCount != poolSet->LastFrameUsed)
{
poolSet->LastFrameUsed = Engine::FrameCount;
poolSet->Reset();

View File

@@ -65,7 +65,7 @@ public:
uint32 Hash = 0;
uint32 SetLayoutsHash = 0;
uint32 LayoutTypes[VULKAN_DESCRIPTOR_TYPE_END];
uint32 LayoutTypes[VULKAN_DESCRIPTOR_TYPE_END + 1];
Array<SetLayout> SetLayouts;
public:

View File

@@ -19,7 +19,7 @@ void GPUBufferViewVulkan::Init(GPUDeviceVulkan* device, GPUBufferVulkan* owner,
Buffer = buffer;
Size = size;
if ((owner->IsShaderResource() && !(owner->GetDescription().Flags & GPUBufferFlags::Structured)) || (usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) == VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)
if ((EnumHasAnyFlags(owner->GetDescription().Flags, GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess) && !(owner->GetDescription().Flags & GPUBufferFlags::Structured)) || (usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) == VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)
{
VkBufferViewCreateInfo viewInfo;
RenderToolsVulkan::ZeroStruct(viewInfo, VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
@@ -103,7 +103,7 @@ bool GPUBufferVulkan::OnInit()
bufferInfo.usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
if (useUAV || EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::Structured))
bufferInfo.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
if (useUAV && useSRV)
if (useUAV)
bufferInfo.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
if (EnumHasAnyFlags(_desc.Flags, GPUBufferFlags::Argument))
bufferInfo.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;

View File

@@ -4,6 +4,7 @@
#include "GPUContextVulkan.h"
#include "CmdBufferVulkan.h"
#include "GPUAdapterVulkan.h"
#include "RenderToolsVulkan.h"
#include "Engine/Core/Math/Color.h"
#include "Engine/Core/Math/Rectangle.h"
@@ -15,6 +16,7 @@
#include "Engine/Profiler/RenderStats.h"
#include "GPUShaderProgramVulkan.h"
#include "GPUTextureVulkan.h"
#include "QueueVulkan.h"
#include "Engine/Graphics/PixelFormatExtensions.h"
#include "Engine/Debug/Exceptions/NotImplementedException.h"
@@ -76,13 +78,14 @@ const Char* ToString(VkImageLayout layout)
void PipelineBarrierVulkan::Execute(const CmdBufferVulkan* cmdBuffer)
{
ASSERT(cmdBuffer->IsOutsideRenderPass());
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), SourceStage, DestStage, 0, 0, nullptr, BufferBarriers.Count(), BufferBarriers.Get(), ImageBarriers.Count(), ImageBarriers.Get());
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), SourceStage, DestStage, 0, MemoryBarriers.Count(), MemoryBarriers.Get(), BufferBarriers.Count(), BufferBarriers.Get(), ImageBarriers.Count(), ImageBarriers.Get());
// Reset
SourceStage = 0;
DestStage = 0;
ImageBarriers.Clear();
BufferBarriers.Clear();
MemoryBarriers.Clear();
#if VK_ENABLE_BARRIERS_DEBUG
ImageBarriersDebug.Clear();
#endif
@@ -107,10 +110,37 @@ GPUContextVulkan::GPUContextVulkan(GPUDeviceVulkan* device, QueueVulkan* queue)
_handlesSizes[(int32)SpirvShaderResourceBindingType::SRV] = GPU_MAX_SR_BINDED;
_handlesSizes[(int32)SpirvShaderResourceBindingType::UAV] = GPU_MAX_UA_BINDED;
#endif
#if GPU_ENABLE_TRACY
#if VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset && !PLATFORM_SWITCH
// Use calibrated timestamps extension
if (vkResetQueryPoolEXT && vkGetCalibratedTimestampsEXT)
{
_tracyContext = tracy::CreateVkContext(_device->Adapter->Gpu, _device->Device, vkResetQueryPoolEXT, vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, vkGetCalibratedTimestampsEXT);
}
else
#endif
{
// Use immediate command buffer for timestamps calibration
VkCommandBufferAllocateInfo cmdInfo;
RenderToolsVulkan::ZeroStruct(cmdInfo, VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO);
cmdInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cmdInfo.commandPool = _cmdBufferManager->GetHandle();
cmdInfo.commandBufferCount = 1;
VkCommandBuffer tracyCmdBuffer;
vkAllocateCommandBuffers(_device->Device, &cmdInfo, &tracyCmdBuffer);
_tracyContext = tracy::CreateVkContext(_device->Adapter->Gpu, _device->Device, _queue->GetHandle(), tracyCmdBuffer, vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, vkGetCalibratedTimestampsEXT);
vkQueueWaitIdle(_queue->GetHandle());
vkFreeCommandBuffers(_device->Device, _cmdBufferManager->GetHandle(), 1, &tracyCmdBuffer);
}
#endif
}
GPUContextVulkan::~GPUContextVulkan()
{
#if GPU_ENABLE_TRACY
tracy::DestroyVkContext(_tracyContext);
#endif
for (int32 i = 0; i < _descriptorPools.Count(); i++)
{
_descriptorPools[i].ClearDelete();
@@ -124,12 +154,7 @@ void GPUContextVulkan::AddImageBarrier(VkImage image, VkImageLayout srcLayout, V
#if VK_ENABLE_BARRIERS_BATCHING
// Auto-flush on overflow
if (_barriers.IsFull())
{
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
if (cmdBuffer->IsInsideRenderPass())
EndRenderPass();
_barriers.Execute(cmdBuffer);
}
FlushBarriers();
#endif
// Insert barrier
@@ -161,10 +186,7 @@ void GPUContextVulkan::AddImageBarrier(VkImage image, VkImageLayout srcLayout, V
#if !VK_ENABLE_BARRIERS_BATCHING
// Auto-flush without batching
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
if (cmdBuffer->IsInsideRenderPass())
EndRenderPass();
_barriers.Execute(cmdBuffer);
FlushBarriers();
#endif
}
@@ -286,12 +308,7 @@ void GPUContextVulkan::AddBufferBarrier(GPUBufferVulkan* buffer, VkAccessFlags d
#if VK_ENABLE_BARRIERS_BATCHING
// Auto-flush on overflow
if (_barriers.IsFull())
{
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
if (cmdBuffer->IsInsideRenderPass())
EndRenderPass();
_barriers.Execute(cmdBuffer);
}
FlushBarriers();
#endif
// Insert barrier
@@ -310,13 +327,38 @@ void GPUContextVulkan::AddBufferBarrier(GPUBufferVulkan* buffer, VkAccessFlags d
#if !VK_ENABLE_BARRIERS_BATCHING
// Auto-flush without batching
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
if (cmdBuffer->IsInsideRenderPass())
EndRenderPass();
_barriers.Execute(cmdBuffer);
FlushBarriers();
#endif
}
void GPUContextVulkan::AddMemoryBarrier()
{
#if VK_ENABLE_BARRIERS_BATCHING
// Auto-flush on overflow
if (_barriers.IsFull())
FlushBarriers();
#endif
// Insert barrier
VkMemoryBarrier& memoryBarrier = _barriers.MemoryBarriers.AddOne();
RenderToolsVulkan::ZeroStruct(memoryBarrier, VK_STRUCTURE_TYPE_MEMORY_BARRIER);
memoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
memoryBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
_barriers.SourceStage |= VK_PIPELINE_STAGE_TRANSFER_BIT;
_barriers.DestStage |= VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
#if !VK_ENABLE_BARRIERS_BATCHING
// Auto-flush without batching
FlushBarriers();
#endif
}
void GPUContextVulkan::AddUABarrier()
{
_barriers.SourceStage |= VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
_barriers.DestStage |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
}
void GPUContextVulkan::FlushBarriers()
{
#if VK_ENABLE_BARRIERS_BATCHING
@@ -446,7 +488,7 @@ void GPUContextVulkan::EndRenderPass()
cmdBuffer->EndRenderPass();
_renderPass = nullptr;
// Place a barrier between RenderPasses, so that color / depth outputs can be read in subsequent passes
// Place a barrier between RenderPasses, so that color/depth outputs can be read in subsequent passes
// TODO: remove it in future and use proper barriers without whole pipeline stalls
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);
}
@@ -679,15 +721,9 @@ void GPUContextVulkan::OnDrawCall()
// Bind descriptors sets to the graphics pipeline
if (pipelineState->HasDescriptorsPerStageMask)
{
vkCmdBindDescriptorSets(
cmdBuffer->GetHandle(),
VK_PIPELINE_BIND_POINT_GRAPHICS,
pipelineState->GetLayout()->Handle,
0,
pipelineState->DescriptorSetHandles.Count(),
pipelineState->DescriptorSetHandles.Get(),
pipelineState->DynamicOffsets.Count(),
pipelineState->DynamicOffsets.Get());
auto& descriptorSets = pipelineState->DescriptorSetHandles;
auto& dynamicOffsets = pipelineState->DynamicOffsets;
vkCmdBindDescriptorSets(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineState->GetLayout()->Handle, 0, descriptorSets.Count(), descriptorSets.Get(), dynamicOffsets.Count(), dynamicOffsets.Get());
}
_rtDirtyFlag = false;
@@ -710,6 +746,7 @@ void GPUContextVulkan::FrameBegin()
_stencilRef = 0;
_renderPass = nullptr;
_currentState = nullptr;
_currentCompute = nullptr;
_vertexLayout = nullptr;
_rtDepth = nullptr;
Platform::MemoryClear(_rtHandles, sizeof(_rtHandles));
@@ -748,6 +785,11 @@ void GPUContextVulkan::FrameEnd()
// Execute any queued layout transitions that weren't already handled by the render pass
FlushBarriers();
#if GPU_ENABLE_TRACY
if (cmdBuffer)
tracy::CollectVkContext(_tracyContext, cmdBuffer->GetHandle());
#endif
// Base
GPUContext::FrameEnd();
}
@@ -757,7 +799,12 @@ void GPUContextVulkan::FrameEnd()
void GPUContextVulkan::EventBegin(const Char* name)
{
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
cmdBuffer->BeginEvent(name);
#if COMPILE_WITH_PROFILER
void* tracyContext = _tracyContext;
#else
void* tracyContext = nullptr;
#endif
cmdBuffer->BeginEvent(name, tracyContext);
}
void GPUContextVulkan::EventEnd()
@@ -988,9 +1035,7 @@ void GPUContextVulkan::ResetCB()
void GPUContextVulkan::BindCB(int32 slot, GPUConstantBuffer* cb)
{
ASSERT(slot >= 0 && slot < GPU_MAX_CB_BINDED);
const auto cbVulkan = static_cast<GPUConstantBufferVulkan*>(cb);
if (_cbHandles[slot] != cbVulkan)
{
_cbDirtyFlag = true;
@@ -1077,7 +1122,6 @@ void GPUContextVulkan::UpdateCB(GPUConstantBuffer* cb, const void* data)
const uint32 size = cbVulkan->GetSize();
if (size == 0)
return;
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
// Allocate bytes for the buffer
const auto allocation = _device->UniformBufferUploader->Allocate(size, 0, this);
@@ -1114,8 +1158,12 @@ void GPUContextVulkan::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCo
FlushBarriers();
// Bind pipeline
vkCmdBindPipeline(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_COMPUTE, pipelineState->GetHandle());
RENDER_STAT_PS_STATE_CHANGE();
if (_currentCompute != shaderVulkan)
{
_currentCompute = shaderVulkan;
vkCmdBindPipeline(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_COMPUTE, pipelineState->GetHandle());
RENDER_STAT_PS_STATE_CHANGE();
}
// Bind descriptors sets to the compute pipeline
pipelineState->Bind(cmdBuffer);
@@ -1125,8 +1173,8 @@ void GPUContextVulkan::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCo
RENDER_STAT_DISPATCH_CALL();
// Place a barrier between dispatches, so that UAVs can be read+write in subsequent passes
// TODO: optimize it by moving inputs/outputs into higher-layer so eg. Global SDF can manually optimize it
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);
if (_pass == 0)
AddUABarrier();
#if VK_ENABLE_BARRIERS_DEBUG
LOG(Warning, "Dispatch");
@@ -1150,8 +1198,12 @@ void GPUContextVulkan::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* b
FlushBarriers();
// Bind pipeline
vkCmdBindPipeline(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_COMPUTE, pipelineState->GetHandle());
RENDER_STAT_PS_STATE_CHANGE();
if (_currentCompute != shaderVulkan)
{
_currentCompute = shaderVulkan;
vkCmdBindPipeline(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_COMPUTE, pipelineState->GetHandle());
RENDER_STAT_PS_STATE_CHANGE();
}
// Bind descriptors sets to the compute pipeline
pipelineState->Bind(cmdBuffer);
@@ -1161,8 +1213,8 @@ void GPUContextVulkan::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* b
RENDER_STAT_DISPATCH_CALL();
// Place a barrier between dispatches, so that UAVs can be read+write in subsequent passes
// TODO: optimize it by moving inputs/outputs into higher-layer so eg. Global SDF can manually optimize it
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);
if (_pass == 0)
AddUABarrier();
#if VK_ENABLE_BARRIERS_DEBUG
LOG(Warning, "DispatchIndirect");
@@ -1303,6 +1355,7 @@ void GPUContextVulkan::Flush()
// Flush remaining and buffered commands
FlushState();
_currentState = nullptr;
_currentCompute = nullptr;
// Execute commands
_cmdBufferManager->SubmitActiveCmdBuffer();
@@ -1321,38 +1374,32 @@ void GPUContextVulkan::UpdateBuffer(GPUBuffer* buffer, const void* data, uint32
const auto bufferVulkan = static_cast<GPUBufferVulkan*>(buffer);
// Memory transfer barrier
// TODO: batch pipeline barriers
const VkMemoryBarrier barrierBefore = { VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr, VK_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT };
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &barrierBefore, 0, nullptr, 0, nullptr);
// Transition resource
if (_pass == 0)
AddMemoryBarrier();
AddBufferBarrier(bufferVulkan, VK_ACCESS_TRANSFER_WRITE_BIT);
FlushBarriers();
// Use direct update for small buffers
const uint32 alignedSize = Math::AlignUp<uint32>(size, 4);
if (size <= 16 * 1024 && alignedSize <= buffer->GetSize())
if (size <= 4 * 1024 && alignedSize <= buffer->GetSize())
{
//AddBufferBarrier(bufferVulkan, VK_ACCESS_TRANSFER_WRITE_BIT);
//FlushBarriers();
vkCmdUpdateBuffer(cmdBuffer->GetHandle(), bufferVulkan->GetHandle(), offset, alignedSize, data);
}
else
{
auto staging = _device->StagingManager.AcquireBuffer(size, GPUResourceUsage::StagingUpload);
staging->SetData(data, size);
auto allocation = _device->UploadBuffer.Upload(data, size, 4);
VkBufferCopy region;
region.size = size;
region.srcOffset = 0;
region.srcOffset = allocation.Offset;
region.dstOffset = offset;
vkCmdCopyBuffer(cmdBuffer->GetHandle(), ((GPUBufferVulkan*)staging)->GetHandle(), ((GPUBufferVulkan*)buffer)->GetHandle(), 1, &region);
_device->StagingManager.ReleaseBuffer(cmdBuffer, staging);
vkCmdCopyBuffer(cmdBuffer->GetHandle(), allocation.Buffer, ((GPUBufferVulkan*)buffer)->GetHandle(), 1, &region);
}
// Memory transfer barrier
// TODO: batch pipeline barriers
const VkMemoryBarrier barrierAfter = { VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT };
vkCmdPipelineBarrier(cmdBuffer->GetHandle(), VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &barrierAfter, 0, nullptr, 0, nullptr);
// Memory transfer barrier to ensure buffer is ready to read (eg. by Draw or Dispatch)
if (_pass == 0)
AddMemoryBarrier();
}
void GPUContextVulkan::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, uint32 size, uint32 dstOffset, uint32 srcOffset)
@@ -1377,6 +1424,10 @@ void GPUContextVulkan::CopyBuffer(GPUBuffer* dstBuffer, GPUBuffer* srcBuffer, ui
bufferCopy.dstOffset = dstOffset;
bufferCopy.size = size;
vkCmdCopyBuffer(cmdBuffer->GetHandle(), srcBufferVulkan->GetHandle(), dstBufferVulkan->GetHandle(), 1, &bufferCopy);
// Memory transfer barrier to ensure buffer is ready to read (eg. by Draw or Dispatch)
if (_pass == 0)
AddMemoryBarrier();
}
void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int32 mipIndex, const void* data, uint32 rowPitch, uint32 slicePitch)
@@ -1392,14 +1443,14 @@ void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int3
AddImageBarrier(textureVulkan, mipIndex, arrayIndex, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
FlushBarriers();
auto buffer = _device->StagingManager.AcquireBuffer(slicePitch, GPUResourceUsage::StagingUpload);
buffer->SetData(data, slicePitch);
auto allocation = _device->UploadBuffer.Upload(data, slicePitch, 512);
// Setup buffer copy region
int32 mipWidth, mipHeight, mipDepth;
texture->GetMipSize(mipIndex, mipWidth, mipHeight, mipDepth);
VkBufferImageCopy bufferCopyRegion;
Platform::MemoryClear(&bufferCopyRegion, sizeof(bufferCopyRegion));
bufferCopyRegion.bufferOffset = allocation.Offset;
bufferCopyRegion.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
bufferCopyRegion.imageSubresource.mipLevel = mipIndex;
bufferCopyRegion.imageSubresource.baseArrayLayer = arrayIndex;
@@ -1409,9 +1460,7 @@ void GPUContextVulkan::UpdateTexture(GPUTexture* texture, int32 arrayIndex, int3
bufferCopyRegion.imageExtent.depth = static_cast<uint32_t>(mipDepth);
// Copy mip level from staging buffer
vkCmdCopyBufferToImage(cmdBuffer->GetHandle(), ((GPUBufferVulkan*)buffer)->GetHandle(), textureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bufferCopyRegion);
_device->StagingManager.ReleaseBuffer(cmdBuffer, buffer);
vkCmdCopyBufferToImage(cmdBuffer->GetHandle(), allocation.Buffer, textureVulkan->GetHandle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &bufferCopyRegion);
}
void GPUContextVulkan::CopyTexture(GPUTexture* dstResource, uint32 dstSubresource, uint32 dstX, uint32 dstY, uint32 dstZ, GPUTexture* srcResource, uint32 srcSubresource)
@@ -1786,4 +1835,27 @@ void GPUContextVulkan::CopySubresource(GPUResource* dstResource, uint32 dstSubre
}
}
void GPUContextVulkan::Transition(GPUResource* resource, GPUResourceAccess access)
{
if (auto buffer = dynamic_cast<GPUBufferVulkan*>(resource))
{
AddBufferBarrier(buffer, RenderToolsVulkan::GetAccess(access));
}
else if (auto texture = dynamic_cast<GPUTextureVulkan*>(resource))
{
AddImageBarrier(texture, RenderToolsVulkan::GetImageLayout(access));
}
}
void GPUContextVulkan::MemoryBarrier()
{
AddMemoryBarrier();
}
void GPUContextVulkan::OverlapUA(bool end)
{
if (end)
AddUABarrier();
}
#endif

View File

@@ -16,6 +16,7 @@ class GPUTextureViewVulkan;
class GPUBufferVulkan;
class GPUVertexLayoutVulkan;
class GPUPipelineStateVulkan;
class GPUShaderProgramCSVulkan;
class ComputePipelineStateVulkan;
class GPUConstantBufferVulkan;
class DescriptorPoolVulkan;
@@ -34,7 +35,7 @@ class DescriptorSetLayoutVulkan;
/// <summary>
/// Size of the pipeline barriers buffer size (will be auto-flushed on overflow).
/// </summary>
#define VK_BARRIER_BUFFER_SIZE 16
#define VK_BARRIER_BUFFER_SIZE 64
/// <summary>
/// The Vulkan pipeline resources layout barrier batching structure.
@@ -45,18 +46,19 @@ struct PipelineBarrierVulkan
VkPipelineStageFlags DestStage = 0;
Array<VkImageMemoryBarrier, FixedAllocation<VK_BARRIER_BUFFER_SIZE>> ImageBarriers;
Array<VkBufferMemoryBarrier, FixedAllocation<VK_BARRIER_BUFFER_SIZE>> BufferBarriers;
Array<VkMemoryBarrier, FixedAllocation<4>> MemoryBarriers;
#if VK_ENABLE_BARRIERS_DEBUG
Array<GPUTextureViewVulkan*, FixedAllocation<VK_BARRIER_BUFFER_SIZE>> ImageBarriersDebug;
#endif
FORCE_INLINE bool IsFull() const
{
return ImageBarriers.Count() == VK_BARRIER_BUFFER_SIZE || BufferBarriers.Count() == VK_BARRIER_BUFFER_SIZE;
return ImageBarriers.Count() == VK_BARRIER_BUFFER_SIZE || BufferBarriers.Count() == VK_BARRIER_BUFFER_SIZE || MemoryBarriers.Count() == 4;
}
FORCE_INLINE bool HasBarrier() const
{
return ImageBarriers.Count() + BufferBarriers.Count() != 0;
return ImageBarriers.Count() + BufferBarriers.Count() + MemoryBarriers.Count() != 0 || SourceStage + DestStage != 0;
}
void Execute(const CmdBufferVulkan* cmdBuffer);
@@ -83,6 +85,7 @@ private:
RenderPassVulkan* _renderPass;
GPUPipelineStateVulkan* _currentState;
GPUShaderProgramCSVulkan* _currentCompute;
GPUVertexLayoutVulkan* _vertexLayout;
GPUTextureViewVulkan* _rtDepth;
GPUTextureViewVulkan* _rtHandles[GPU_MAX_RT_BINDED];
@@ -94,6 +97,9 @@ private:
#if ENABLE_ASSERTION
uint32 _handlesSizes[(int32)SpirvShaderResourceBindingType::MAX];
#endif
#if COMPILE_WITH_PROFILER
void* _tracyContext;
#endif
typedef Array<DescriptorPoolVulkan*> DescriptorPoolArray;
Dictionary<uint32, DescriptorPoolArray> _descriptorPools;
@@ -127,6 +133,8 @@ public:
void AddImageBarrier(GPUTextureVulkan* texture, int32 mipSlice, int32 arraySlice, VkImageLayout dstLayout);
void AddImageBarrier(GPUTextureVulkan* texture, VkImageLayout dstLayout);
void AddBufferBarrier(GPUBufferVulkan* buffer, VkAccessFlags dstAccess);
void AddMemoryBarrier();
void AddUABarrier();
void FlushBarriers();
@@ -196,6 +204,9 @@ public:
void CopyCounter(GPUBuffer* dstBuffer, uint32 dstOffset, GPUBuffer* srcBuffer) override;
void CopyResource(GPUResource* dstResource, GPUResource* srcResource) override;
void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override;
void Transition(GPUResource* resource, GPUResourceAccess access) override;
void MemoryBarrier() override;
void OverlapUA(bool end) override;
};
#endif

View File

@@ -44,6 +44,9 @@ static const char* GInstanceExtensions[] =
#endif
#if defined(VK_KHR_display) && 0
VK_KHR_DISPLAY_EXTENSION_NAME,
#endif
#if GPU_ENABLE_TRACY && VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset
VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, // Required by VK_EXT_host_query_reset (unless using Vulkan 1.1 or newer)
#endif
nullptr
};
@@ -62,6 +65,10 @@ static const char* GDeviceExtensions[] =
#endif
#if VK_KHR_sampler_mirror_clamp_to_edge
VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
#endif
#if GPU_ENABLE_TRACY && VK_EXT_calibrated_timestamps && VK_EXT_host_query_reset
VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
#endif
nullptr
};

View File

@@ -34,7 +34,9 @@
#include "Engine/Engine/CommandLine.h"
#include "Engine/Utilities/StringConverter.h"
#include "Engine/Profiler/ProfilerCPU.h"
#include "Engine/Profiler/ProfilerMemory.h"
#include "Engine/Threading/Threading.h"
#include "Engine/Threading/ThreadPoolTask.h"
#include "Engine/Scripting/Enums.h"
#if !USE_EDITOR && (PLATFORM_WINDOWS || PLATFORM_LINUX)
@@ -42,7 +44,7 @@
#endif
GPUDeviceVulkan::OptionalVulkanDeviceExtensions GPUDeviceVulkan::OptionalDeviceExtensions;
VkInstance GPUDeviceVulkan::Instance;
VkInstance GPUDeviceVulkan::Instance = VK_NULL_HANDLE;
Array<const char*> GPUDeviceVulkan::InstanceExtensions;
Array<const char*> GPUDeviceVulkan::InstanceLayers;
@@ -141,6 +143,7 @@ static VKAPI_ATTR VkBool32 VKAPI_PTR DebugUtilsCallback(VkDebugUtilsMessageSever
case 3: // Attachment 2 not written by fragment shader
case 5: // SPIR-V module not valid: MemoryBarrier: Vulkan specification requires Memory Semantics to have one of the following bits set: Acquire, Release, AcquireRelease or SequentiallyConsistent
case -1666394502: // After query pool creation, each query must be reset before it is used. Queries must also be reset between uses.
case 1203141749:
case 602160055: // Attachment 4 not written by fragment shader; undefined values will be written to attachment. TODO: investigate it for PS_GBuffer shader from Deferred material with USE_LIGHTMAP=1
case 7060244: // Image Operand Offset can only be used with OpImage*Gather operations
case -1539028524: // SortedIndices is null so Vulkan backend sets it to default R32_SFLOAT format which is not good for UINT format of the buffer
@@ -229,18 +232,22 @@ static VKAPI_ATTR VkBool32 VKAPI_PTR DebugUtilsCallback(VkDebugUtilsMessageSever
const String message(callbackData->pMessage);
if (callbackData->pMessageIdName)
{
LOG(Info, "[Vulkan] {0} {1}:{2}({3}) {4}", type, severity, callbackData->messageIdNumber, String(callbackData->pMessageIdName), message);
}
else
{
LOG(Info, "[Vulkan] {0} {1}:{2} {3}", type, severity, callbackData->messageIdNumber, message);
}
#if BUILD_DEBUG
#if !BUILD_RELEASE
if (auto* context = (GPUContextVulkan*)GPUDevice::Instance->GetMainContext())
{
if (auto* state = (GPUPipelineStateVulkan*)context->GetState())
{
const StringAnsi vsName = state->DebugDesc.VS ? state->DebugDesc.VS->GetName() : StringAnsi::Empty;
const StringAnsi psName = state->DebugDesc.PS ? state->DebugDesc.PS->GetName() : StringAnsi::Empty;
LOG(Warning, "[Vulkan] Error during rendering with VS={}, PS={}", String(vsName), String(psName));
GPUPipelineState::DebugName name;
state->GetDebugName(name);
LOG(Warning, "[Vulkan] Error during rendering with {}", String(name.Get(), name.Count() - 1));
}
}
#endif
@@ -966,133 +973,6 @@ void HelperResourcesVulkan::Dispose()
}
}
StagingManagerVulkan::StagingManagerVulkan(GPUDeviceVulkan* device)
: _device(device)
{
}
GPUBuffer* StagingManagerVulkan::AcquireBuffer(uint32 size, GPUResourceUsage usage)
{
// Try reuse free buffer
{
ScopeLock lock(_locker);
for (int32 i = 0; i < _freeBuffers.Count(); i++)
{
auto& freeBuffer = _freeBuffers[i];
if (freeBuffer.Buffer->GetSize() == size && freeBuffer.Buffer->GetDescription().Usage == usage)
{
const auto buffer = freeBuffer.Buffer;
_freeBuffers.RemoveAt(i);
return buffer;
}
}
}
// Allocate new buffer
auto buffer = _device->CreateBuffer(TEXT("Pooled Staging"));
if (buffer->Init(GPUBufferDescription::Buffer(size, GPUBufferFlags::None, PixelFormat::Unknown, nullptr, 0, usage)))
{
LOG(Warning, "Failed to create pooled staging buffer.");
return nullptr;
}
// Cache buffer
{
ScopeLock lock(_locker);
_allBuffers.Add(buffer);
#if !BUILD_RELEASE
_allBuffersAllocSize += size;
_allBuffersTotalSize += size;
_allBuffersPeekSize = Math::Max(_allBuffersTotalSize, _allBuffersPeekSize);
#endif
}
return buffer;
}
void StagingManagerVulkan::ReleaseBuffer(CmdBufferVulkan* cmdBuffer, GPUBuffer*& buffer)
{
ScopeLock lock(_locker);
if (cmdBuffer)
{
// Return to pending pool (need to wait until command buffer will be executed and buffer will be reusable)
auto& item = _pendingBuffers.AddOne();
item.Buffer = buffer;
item.CmdBuffer = cmdBuffer;
item.FenceCounter = cmdBuffer->GetFenceSignaledCounter();
}
else
{
// Return to pool
_freeBuffers.Add({ buffer, Engine::FrameCount });
}
// Clear reference
buffer = nullptr;
}
void StagingManagerVulkan::ProcessPendingFree()
{
ScopeLock lock(_locker);
// Find staging buffers that has been processed by the GPU and can be reused
for (int32 i = _pendingBuffers.Count() - 1; i >= 0; i--)
{
auto& e = _pendingBuffers[i];
if (e.FenceCounter < e.CmdBuffer->GetFenceSignaledCounter())
{
// Return to pool
_freeBuffers.Add({ e.Buffer, Engine::FrameCount });
_pendingBuffers.RemoveAt(i);
}
}
// Free staging buffers that has not been used for a few frames
for (int32 i = _freeBuffers.Count() - 1; i >= 0; i--)
{
auto& e = _freeBuffers.Get()[i];
if (e.FrameNumber + VULKAN_RESOURCE_DELETE_SAFE_FRAMES_COUNT < Engine::FrameCount)
{
auto buffer = e.Buffer;
// Remove buffer from lists
_allBuffers.Remove(buffer);
_freeBuffers.RemoveAt(i);
#if !BUILD_RELEASE
// Update stats
_allBuffersFreeSize += buffer->GetSize();
_allBuffersTotalSize -= buffer->GetSize();
#endif
// Release memory
buffer->ReleaseGPU();
Delete(buffer);
}
}
}
void StagingManagerVulkan::Dispose()
{
ScopeLock lock(_locker);
#if BUILD_DEBUG
LOG(Info, "Vulkan staging buffers peek memory usage: {0}, allocs: {1}, frees: {2}", Utilities::BytesToText(_allBuffersPeekSize), Utilities::BytesToText(_allBuffersAllocSize), Utilities::BytesToText(_allBuffersFreeSize));
#endif
// Release buffers and clear memory
for (auto buffer : _allBuffers)
{
buffer->ReleaseGPU();
Delete(buffer);
}
_allBuffers.Resize(0);
_pendingBuffers.Resize(0);
}
GPUDeviceVulkan::GPUDeviceVulkan(ShaderProfile shaderProfile, GPUAdapterVulkan* adapter)
: GPUDevice(RendererType::Vulkan, shaderProfile)
, _renderPasses(512)
@@ -1100,7 +980,7 @@ GPUDeviceVulkan::GPUDeviceVulkan(ShaderProfile shaderProfile, GPUAdapterVulkan*
, _layouts(4096)
, Adapter(adapter)
, DeferredDeletionQueue(this)
, StagingManager(this)
, UploadBuffer(this)
, HelperResources(this)
{
}
@@ -1174,23 +1054,57 @@ GPUDevice* GPUDeviceVulkan::Create()
Array<VkExtensionProperties> properties;
properties.Resize(propertyCount);
vkEnumerateInstanceExtensionProperties(nullptr, &propertyCount, properties.Get());
String missingExtension;
for (const char* extension : InstanceExtensions)
{
bool found = false;
for (uint32_t propertyIndex = 0; propertyIndex < propertyCount; propertyIndex++)
{
if (!StringUtils::Compare(properties[propertyIndex].extensionName, extension))
{
found = true;
if (missingExtension.IsEmpty())
missingExtension = extension;
else
missingExtension += TEXT(", ") + String(extension);
break;
}
}
if (!found)
}
LOG(Warning, "Extensions found:");
for (const VkExtensionProperties& property : properties)
LOG(Warning, " > {}", String(property.extensionName));
auto error = String::Format(TEXT("Vulkan driver doesn't contain specified extensions:\n{0}\nPlease make sure your layers path is set appropriately."), missingExtension);
LOG_STR(Error, error);
Platform::Error(*error);
return nullptr;
}
if (result == VK_ERROR_LAYER_NOT_PRESENT)
{
// Layers error
uint32_t propertyCount;
vkEnumerateInstanceLayerProperties(&propertyCount, nullptr);
Array<VkLayerProperties> properties;
properties.Resize(propertyCount);
vkEnumerateInstanceLayerProperties(&propertyCount, properties.Get());
String missingLayers;
for (const char* layer : InstanceLayers)
{
for (uint32_t propertyIndex = 0; propertyIndex < propertyCount; propertyIndex++)
{
LOG(Warning, "Missing required Vulkan extension: {0}", String(extension));
if (!StringUtils::Compare(properties[propertyIndex].layerName, layer))
{
if (missingLayers.IsEmpty())
missingLayers = layer;
else
missingLayers += TEXT(", ") + String(layer);
break;
}
}
}
auto error = String::Format(TEXT("Vulkan driver doesn't contain specified extensions:\n{0}\nPlease make sure your layers path is set appropriately."));
LOG(Warning, "Layers found:");
for (const VkLayerProperties& property : properties)
LOG(Warning, " > {}", String(property.layerName));
auto error = String::Format(TEXT("Vulkan driver doesn't contain specified layers:\n{0}\nPlease make sure your layers path is set appropriately."), missingLayers);
LOG_STR(Error, error);
Platform::Error(*error);
return nullptr;
}
@@ -1310,6 +1224,24 @@ GPUDeviceVulkan::~GPUDeviceVulkan()
GPUDeviceVulkan::Dispose();
}
BufferedQueryPoolVulkan* GPUDeviceVulkan::FindAvailableQueryPool(VkQueryType queryType)
{
auto& pools = queryType == VK_QUERY_TYPE_OCCLUSION ? OcclusionQueryPools : TimestampQueryPools;
// Try to use pool with available space inside
for (int32 i = 0; i < pools.Count(); i++)
{
auto pool = pools.Get()[i];
if (pool->HasRoom())
return pool;
}
// Create new pool
const auto pool = New<BufferedQueryPoolVulkan>(this, queryType == VK_QUERY_TYPE_OCCLUSION ? 4096 : 1024, queryType);
pools.Add(pool);
return pool;
}
RenderPassVulkan* GPUDeviceVulkan::GetOrCreateRenderPass(RenderTargetLayoutVulkan& layout)
{
RenderPassVulkan* renderPass;
@@ -1459,54 +1391,87 @@ PixelFormat GPUDeviceVulkan::GetClosestSupportedPixelFormat(PixelFormat format,
return format;
}
void GetPipelineCachePath(String& path)
bool VulkanPlatformBase::LoadCache(const String& folder, const Char* filename, Array<byte>& data)
{
#if USE_EDITOR
path = Globals::ProjectCacheFolder / TEXT("VulkanPipeline.cache");
#else
path = Globals::ProductLocalFolder / TEXT("VulkanPipeline.cache");
#endif
String path = folder / filename;
if (FileSystem::FileExists(path))
{
LOG(Info, "Loading Vulkan cache from file '{}'", path);
return File::ReadAllBytes(path, data);
}
return false;
}
bool GPUDeviceVulkan::SavePipelineCache()
bool VulkanPlatformBase::SaveCache(const String& folder, const Char* filename, const Array<byte>& data)
{
if (PipelineCache == VK_NULL_HANDLE || !vkGetPipelineCacheData)
return false;
// Query data size
size_t dataSize = 0;
VkResult result = vkGetPipelineCacheData(Device, PipelineCache, &dataSize, nullptr);
LOG_VULKAN_RESULT_WITH_RETURN(result);
if (dataSize <= 0)
return false;
// Query data
Array<byte> data;
data.Resize((int32)dataSize);
result = vkGetPipelineCacheData(Device, PipelineCache, &dataSize, data.Get());
LOG_VULKAN_RESULT_WITH_RETURN(result);
// Save data
String path;
GetPipelineCachePath(path);
String path = folder / filename;
LOG(Info, "Saving Vulkan cache to file '{}' ({} kB)", path, data.Count() / 1024);
return File::WriteAllBytes(path, data);
}
#if VULKAN_USE_VALIDATION_CACHE
void GetValidationCachePath(String& path)
{
#if USE_EDITOR
path = Globals::ProjectCacheFolder / TEXT("VulkanValidation.cache");
#define CACHE_FOLDER Globals::ProjectCacheFolder
#else
path = Globals::ProductLocalFolder / TEXT("VulkanValidation.cache");
#define CACHE_FOLDER Globals::ProductLocalFolder
#endif
#if VULKAN_USE_PIPELINE_CACHE
bool SavePipelineCacheAsync()
{
PROFILE_CPU();
((GPUDeviceVulkan*)GPUDevice::Instance)->SavePipelineCache(false, true);
return false;
}
#endif
bool GPUDeviceVulkan::SavePipelineCache(bool async, bool cached)
{
#if VULKAN_USE_PIPELINE_CACHE
if (PipelineCache == VK_NULL_HANDLE || !vkGetPipelineCacheData || PipelineCacheUsage == 0)
return false;
PROFILE_CPU();
PROFILE_MEM(Graphics);
if (!cached)
{
// Query data size
size_t dataSize = 0;
VkResult result = vkGetPipelineCacheData(Device, PipelineCache, &dataSize, nullptr);
LOG_VULKAN_RESULT_WITH_RETURN(result);
if (dataSize <= 0)
return false;
// Query data
PipelineCacheSaveData.Resize((int32)dataSize);
result = vkGetPipelineCacheData(Device, PipelineCache, &dataSize, PipelineCacheSaveData.Get());
LOG_VULKAN_RESULT_WITH_RETURN(result);
}
if (async)
{
// Kick off the async job that will save the cached bytes
Function<bool()> action(SavePipelineCacheAsync);
return Task::StartNew(action) != nullptr;
}
// Reset usage counter
PipelineCacheUsage = 0;
// Save data
return VulkanPlatform::SaveCache(CACHE_FOLDER, TEXT("VulkanPipeline.cache"), PipelineCacheSaveData);
#else
return false;
#endif
}
#if VULKAN_USE_VALIDATION_CACHE
bool GPUDeviceVulkan::SaveValidationCache()
{
if (ValidationCache == VK_NULL_HANDLE || !vkGetValidationCacheDataEXT)
return false;
PROFILE_CPU();
PROFILE_MEM(Graphics);
// Query data size
size_t dataSize = 0;
@@ -1522,9 +1487,7 @@ bool GPUDeviceVulkan::SaveValidationCache()
LOG_VULKAN_RESULT_WITH_RETURN(result);
// Save data
String path;
GetValidationCachePath(path);
return File::WriteAllBytes(path, data);
return VulkanPlatform::SaveCache(CACHE_FOLDER, TEXT("VulkanValidation.cache"), data);
}
#endif
@@ -1936,57 +1899,48 @@ bool GPUDeviceVulkan::Init()
UniformBufferUploader = New<UniformBufferUploaderVulkan>(this);
DescriptorPoolsManager = New<DescriptorPoolsManagerVulkan>(this);
MainContext = New<GPUContextVulkan>(this, GraphicsQueue);
#if VULKAN_USE_PIPELINE_CACHE
if (vkCreatePipelineCache)
{
Array<uint8> data;
String path;
GetPipelineCachePath(path);
if (FileSystem::FileExists(path))
{
LOG(Info, "Trying to load Vulkan pipeline cache file {0}", path);
File::ReadAllBytes(path, data);
}
VulkanPlatform::LoadCache(CACHE_FOLDER, TEXT("VulkanPipeline.cache"), data);
VkPipelineCacheCreateInfo pipelineCacheCreateInfo;
RenderToolsVulkan::ZeroStruct(pipelineCacheCreateInfo, VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
pipelineCacheCreateInfo.initialDataSize = data.Count();
pipelineCacheCreateInfo.pInitialData = data.Count() > 0 ? data.Get() : nullptr;
const VkResult result = vkCreatePipelineCache(Device, &pipelineCacheCreateInfo, nullptr, &PipelineCache);
LOG_VULKAN_RESULT(result);
PipelineCacheSaveTime = Platform::GetTimeSeconds();
}
#endif
#if VULKAN_USE_VALIDATION_CACHE
if (OptionalDeviceExtensions.HasEXTValidationCache && vkCreateValidationCacheEXT && vkDestroyValidationCacheEXT)
{
Array<uint8> data;
String path;
GetValidationCachePath(path);
if (FileSystem::FileExists(path))
VulkanPlatform::LoadCache(CACHE_FOLDER, TEXT("VulkanValidation.cache"), data);
if (data.HasItems())
{
LOG(Info, "Trying to load Vulkan validation cache file {0}", path);
File::ReadAllBytes(path, data);
if (data.HasItems())
int32* dataPtr = (int32*)data.Get();
if (*dataPtr > 0)
{
int32* dataPtr = (int32*)data.Get();
if (*dataPtr > 0)
const int32 cacheSize = *dataPtr++;
const int32 cacheVersion = *dataPtr++;
const int32 cacheVersionExpected = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
if (cacheVersion == cacheVersionExpected)
{
const int32 cacheSize = *dataPtr++;
const int32 cacheVersion = *dataPtr++;
const int32 cacheVersionExpected = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
if (cacheVersion == cacheVersionExpected)
{
dataPtr += VK_UUID_SIZE / sizeof(int32);
}
else
{
LOG(Warning, "Bad validation cache file, version: {0}, expected: {1}", cacheVersion, cacheVersionExpected);
data.Clear();
}
dataPtr += VK_UUID_SIZE / sizeof(int32);
}
else
{
LOG(Warning, "Bad validation cache file, header size: {0}", *dataPtr);
LOG(Warning, "Bad validation cache file, version: {0}, expected: {1}", cacheVersion, cacheVersionExpected);
data.Clear();
}
}
else
{
LOG(Warning, "Bad validation cache file, header size: {0}", *dataPtr);
data.Clear();
}
}
VkValidationCacheCreateInfoEXT validationCreateInfo;
RenderToolsVulkan::ZeroStruct(validationCreateInfo, VK_STRUCTURE_TYPE_VALIDATION_CACHE_CREATE_INFO_EXT);
@@ -2008,8 +1962,19 @@ void GPUDeviceVulkan::DrawBegin()
// Flush resources
DeferredDeletionQueue.ReleaseResources();
StagingManager.ProcessPendingFree();
DescriptorPoolsManager->GC();
UploadBuffer.BeginGeneration(Engine::FrameCount);
#if VULKAN_USE_PIPELINE_CACHE
// Serialize pipeline cache periodically for less PSO hitches on next app run
const double time = Platform::GetTimeSeconds();
const double saveTimeFrequency = Engine::FrameCount < 60 * Math::Clamp(Engine::GetFramesPerSecond(), 30, 60) ? 10 : 180; // More frequent saves during the first 1min of gameplay
if (Engine::HasFocus && time - PipelineCacheSaveTime >= saveTimeFrequency)
{
SavePipelineCache(true);
PipelineCacheSaveTime = time;
}
#endif
}
void GPUDeviceVulkan::Dispose()
@@ -2034,8 +1999,9 @@ void GPUDeviceVulkan::Dispose()
_renderPasses.ClearDelete();
_layouts.ClearDelete();
HelperResources.Dispose();
StagingManager.Dispose();
UploadBuffer.Dispose();
TimestampQueryPools.ClearDelete();
OcclusionQueryPools.ClearDelete();
SAFE_DELETE_GPU_RESOURCE(UniformBufferUploader);
Delete(DescriptorPoolsManager);
SAFE_DELETE(MainContext);
@@ -2049,6 +2015,7 @@ void GPUDeviceVulkan::Dispose()
DeferredDeletionQueue.ReleaseResources(true);
vmaDestroyAllocator(Allocator);
Allocator = VK_NULL_HANDLE;
#if VULKAN_USE_PIPELINE_CACHE
if (PipelineCache != VK_NULL_HANDLE)
{
if (SavePipelineCache())
@@ -2056,6 +2023,7 @@ void GPUDeviceVulkan::Dispose()
vkDestroyPipelineCache(Device, PipelineCache, nullptr);
PipelineCache = VK_NULL_HANDLE;
}
#endif
#if VULKAN_USE_VALIDATION_CACHE
if (ValidationCache != VK_NULL_HANDLE)
{
@@ -2089,22 +2057,26 @@ void GPUDeviceVulkan::WaitForGPU()
if (Device != VK_NULL_HANDLE)
{
PROFILE_CPU();
ZoneColor(TracyWaitZoneColor);
VALIDATE_VULKAN_RESULT(vkDeviceWaitIdle(Device));
}
}
GPUTexture* GPUDeviceVulkan::CreateTexture(const StringView& name)
{
PROFILE_MEM(GraphicsTextures);
return New<GPUTextureVulkan>(this, name);
}
GPUShader* GPUDeviceVulkan::CreateShader(const StringView& name)
{
PROFILE_MEM(GraphicsShaders);
return New<GPUShaderVulkan>(this, name);
}
GPUPipelineState* GPUDeviceVulkan::CreatePipelineState()
{
PROFILE_MEM(GraphicsCommands);
return New<GPUPipelineStateVulkan>(this);
}
@@ -2115,6 +2087,7 @@ GPUTimerQuery* GPUDeviceVulkan::CreateTimerQuery()
GPUBuffer* GPUDeviceVulkan::CreateBuffer(const StringView& name)
{
PROFILE_MEM(GraphicsBuffers);
return New<GPUBufferVulkan>(this, name);
}
@@ -2135,6 +2108,7 @@ GPUSwapChain* GPUDeviceVulkan::CreateSwapChain(Window* window)
GPUConstantBuffer* GPUDeviceVulkan::CreateConstantBuffer(uint32 size, const StringView& name)
{
PROFILE_MEM(GraphicsShaders);
return New<GPUConstantBufferVulkan>(this, size);
}

View File

@@ -7,6 +7,7 @@
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPUResource.h"
#include "DescriptorSetVulkan.h"
#include "UploadBufferVulkan.h"
#include "IncludeVulkanHeaders.h"
#include "Config.h"
@@ -326,45 +327,6 @@ public:
void Dispose();
};
/// <summary>
/// Vulkan staging buffers manager.
/// </summary>
class StagingManagerVulkan
{
private:
struct PendingEntry
{
GPUBuffer* Buffer;
CmdBufferVulkan* CmdBuffer;
uint64 FenceCounter;
};
struct FreeEntry
{
GPUBuffer* Buffer;
uint64 FrameNumber;
};
GPUDeviceVulkan* _device;
CriticalSection _locker;
Array<GPUBuffer*> _allBuffers;
Array<FreeEntry> _freeBuffers;
Array<PendingEntry> _pendingBuffers;
#if !BUILD_RELEASE
uint64 _allBuffersTotalSize = 0;
uint64 _allBuffersPeekSize = 0;
uint64 _allBuffersAllocSize = 0;
uint64 _allBuffersFreeSize = 0;
#endif
public:
StagingManagerVulkan(GPUDeviceVulkan* device);
GPUBuffer* AcquireBuffer(uint32 size, GPUResourceUsage usage);
void ReleaseBuffer(CmdBufferVulkan* cmdBuffer, GPUBuffer*& buffer);
void ProcessPendingFree();
void Dispose();
};
/// <summary>
/// Implementation of Graphics Device for Vulkan backend.
/// </summary>
@@ -464,9 +426,9 @@ public:
DeferredDeletionQueueVulkan DeferredDeletionQueue;
/// <summary>
/// The staging buffers manager.
/// Data uploading utility via pages.
/// </summary>
StagingManagerVulkan StagingManager;
UploadBufferVulkan UploadBuffer;
/// <summary>
/// The helper device resources manager.
@@ -502,6 +464,11 @@ public:
/// The pipeline cache.
/// </summary>
VkPipelineCache PipelineCache = VK_NULL_HANDLE;
#if VULKAN_USE_PIPELINE_CACHE
uint32 PipelineCacheUsage = 0;
double PipelineCacheSaveTime = 0.0f;
Array<byte> PipelineCacheSaveData;
#endif
#if VULKAN_USE_VALIDATION_CACHE
/// <summary>
@@ -531,37 +498,13 @@ public:
VkPhysicalDeviceFeatures PhysicalDeviceFeatures;
Array<BufferedQueryPoolVulkan*> TimestampQueryPools;
Array<BufferedQueryPoolVulkan*> OcclusionQueryPools;
#if VULKAN_RESET_QUERY_POOLS
Array<QueryPoolVulkan*> QueriesToReset;
#endif
inline BufferedQueryPoolVulkan* FindAvailableQueryPool(Array<BufferedQueryPoolVulkan*>& pools, VkQueryType queryType)
{
// Try to use pool with available space inside
for (int32 i = 0; i < pools.Count(); i++)
{
auto pool = pools.Get()[i];
if (pool->HasRoom())
return pool;
}
// Create new pool
enum
{
NUM_OCCLUSION_QUERIES_PER_POOL = 4096,
NUM_TIMESTAMP_QUERIES_PER_POOL = 1024,
};
const auto pool = New<BufferedQueryPoolVulkan>(this, queryType == VK_QUERY_TYPE_OCCLUSION ? NUM_OCCLUSION_QUERIES_PER_POOL : NUM_TIMESTAMP_QUERIES_PER_POOL, queryType);
pools.Add(pool);
return pool;
}
inline BufferedQueryPoolVulkan* FindAvailableTimestampQueryPool()
{
return FindAvailableQueryPool(TimestampQueryPools, VK_QUERY_TYPE_TIMESTAMP);
}
BufferedQueryPoolVulkan* FindAvailableQueryPool(VkQueryType queryType);
RenderPassVulkan* GetOrCreateRenderPass(RenderTargetLayoutVulkan& layout);
FramebufferVulkan* GetOrCreateFramebuffer(FramebufferVulkan::Key& key, VkExtent2D& extent, uint32 layers);
PipelineLayoutVulkan* GetOrCreateLayout(DescriptorSetLayoutInfoVulkan& key);
@@ -586,7 +529,9 @@ public:
/// <summary>
/// Saves the pipeline cache.
/// </summary>
bool SavePipelineCache();
/// <param name="async">Enables async writing to file to reduce stuttering of main thread.</param>
/// <param name="cached">Uses cached results from the last call to vkGetPipelineCacheData, used to just save cached data when running in async.</param>
bool SavePipelineCache(bool async = false, bool cached = false);
#if VK_EXT_validation_cache
/// <summary>

View File

@@ -90,6 +90,8 @@ ComputePipelineStateVulkan* GPUShaderProgramCSVulkan::GetOrCreateState()
{
if (_pipelineState)
return _pipelineState;
PROFILE_CPU();
ZoneText(*_name, _name.Length());
// Create pipeline layout
DescriptorSetLayoutInfoVulkan descriptorSetLayoutInfo;
@@ -110,7 +112,8 @@ ComputePipelineStateVulkan* GPUShaderProgramCSVulkan::GetOrCreateState()
// Create pipeline object
VkPipeline pipeline;
const VkResult result = vkCreateComputePipelines(_device->Device, _device->PipelineCache, 1, &desc, nullptr, &pipeline);
VkResult result = vkCreateComputePipelines(_device->Device, _device->PipelineCache, 1, &desc, nullptr, &pipeline);
_device->PipelineCacheUsage++;
LOG_VULKAN_RESULT(result);
if (result != VK_SUCCESS)
return nullptr;
@@ -220,7 +223,12 @@ VkPipeline GPUPipelineStateVulkan::GetState(RenderPassVulkan* renderPass, GPUVer
#endif
return pipeline;
}
PROFILE_CPU_NAMED("Create Pipeline");
PROFILE_CPU();
#if !BUILD_RELEASE
DebugName name;
GetDebugName(name);
ZoneText(name.Get(), name.Count() - 1);
#endif
// Bind vertex input
VkPipelineVertexInputStateCreateInfo vertexInputCreateInfo;
@@ -306,14 +314,13 @@ VkPipeline GPUPipelineStateVulkan::GetState(RenderPassVulkan* renderPass, GPUVer
auto depthWrite = _descDepthStencil.depthWriteEnable;
_descDepthStencil.depthWriteEnable &= renderPass->CanDepthWrite ? 1 : 0;
const VkResult result = vkCreateGraphicsPipelines(_device->Device, _device->PipelineCache, 1, &_desc, nullptr, &pipeline);
_device->PipelineCacheUsage++;
_descDepthStencil.depthWriteEnable = depthWrite;
LOG_VULKAN_RESULT(result);
if (result != VK_SUCCESS)
{
#if BUILD_DEBUG
const StringAnsi vsName = DebugDesc.VS ? DebugDesc.VS->GetName() : StringAnsi::Empty;
const StringAnsi psName = DebugDesc.PS ? DebugDesc.PS->GetName() : StringAnsi::Empty;
LOG(Error, "vkCreateGraphicsPipelines failed for VS={0}, PS={1}", String(vsName), String(psName));
#if !BUILD_RELEASE
LOG(Error, "vkCreateGraphicsPipelines failed for {}", String(name.Get(), name.Count() - 1));
#endif
return VK_NULL_HANDLE;
}
@@ -350,7 +357,8 @@ bool GPUPipelineStateVulkan::IsValid() const
bool GPUPipelineStateVulkan::Init(const Description& desc)
{
ASSERT(!IsValid());
if (IsValid())
OnReleaseGPU();
// Reset description
RenderToolsVulkan::ZeroStruct(_desc, VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);

View File

@@ -12,6 +12,7 @@
#include "Engine/Core/Types/DataContainer.h"
#include "Engine/Serialization/MemoryReadStream.h"
#include "Engine/Graphics/PixelFormatExtensions.h"
#include "Engine/Profiler/ProfilerMemory.h"
#if PLATFORM_DESKTOP
#define VULKAN_UNIFORM_RING_BUFFER_SIZE (24 * 1024 * 1024)
@@ -41,6 +42,7 @@ UniformBufferUploaderVulkan::UniformBufferUploaderVulkan(GPUDeviceVulkan* device
VkResult result = vmaCreateBuffer(_device->Allocator, &bufferInfo, &allocInfo, &_buffer, &_allocation, nullptr);
LOG_VULKAN_RESULT(result);
_memoryUsage = bufferInfo.size;
PROFILE_MEM_INC(GraphicsCommands, _memoryUsage);
// Map buffer
result = vmaMapMemory(_device->Allocator, _allocation, (void**)&_mapped);
@@ -87,6 +89,7 @@ void UniformBufferUploaderVulkan::OnReleaseGPU()
{
if (_allocation != VK_NULL_HANDLE)
{
PROFILE_MEM_DEC(GraphicsCommands, _memoryUsage);
if (_mapped)
{
vmaUnmapMemory(_device->Allocator, _allocation);

View File

@@ -12,6 +12,7 @@
#include "Engine/Graphics/GPULimits.h"
#include "Engine/Scripting/Enums.h"
#include "Engine/Profiler/ProfilerCPU.h"
#include "Engine/Profiler/ProfilerMemory.h"
void BackBufferVulkan::Setup(GPUSwapChainVulkan* window, VkImage backbuffer, PixelFormat format, VkExtent3D extent)
{
@@ -61,6 +62,7 @@ void GPUSwapChainVulkan::OnReleaseGPU()
ReleaseBackBuffer();
// Release data
PROFILE_MEM_DEC(Graphics, _memoryUsage);
_currentImageIndex = -1;
_semaphoreIndex = 0;
_acquiredImageIndex = -1;
@@ -76,6 +78,7 @@ void GPUSwapChainVulkan::OnReleaseGPU()
_surface = VK_NULL_HANDLE;
}
_width = _height = 0;
_memoryUsage = 0;
}
bool GPUSwapChainVulkan::IsFullscreen()
@@ -423,6 +426,7 @@ bool GPUSwapChainVulkan::CreateSwapChain(int32 width, int32 height)
// Estimate memory usage
_memoryUsage = 1024 + RenderTools::CalculateTextureMemoryUsage(_format, _width, _height, 1) * _backBuffers.Count();
PROFILE_MEM_INC(Graphics, _memoryUsage);
return false;
}
@@ -431,6 +435,7 @@ GPUSwapChainVulkan::Status GPUSwapChainVulkan::Present(QueueVulkan* presentQueue
{
if (_currentImageIndex == -1)
return Status::Ok;
PROFILE_CPU_NAMED("vkQueuePresentKHR");
VkPresentInfoKHR presentInfo;
RenderToolsVulkan::ZeroStruct(presentInfo, VK_STRUCTURE_TYPE_PRESENT_INFO_KHR);
@@ -513,7 +518,7 @@ int32 GPUSwapChainVulkan::TryPresent(Function<int32(GPUSwapChainVulkan*, void*)>
int32 GPUSwapChainVulkan::AcquireNextImage(SemaphoreVulkan*& outSemaphore)
{
PROFILE_CPU();
PROFILE_CPU_NAMED("vkAcquireNextImageKHR");
ASSERT(_swapChain && _backBuffers.HasItems());
uint32 imageIndex = _currentImageIndex;
@@ -521,13 +526,7 @@ int32 GPUSwapChainVulkan::AcquireNextImage(SemaphoreVulkan*& outSemaphore)
_semaphoreIndex = (_semaphoreIndex + 1) % _backBuffers.Count();
const auto semaphore = _backBuffers[_semaphoreIndex].ImageAcquiredSemaphore;
const VkResult result = vkAcquireNextImageKHR(
_device->Device,
_swapChain,
UINT64_MAX,
semaphore->GetHandle(),
VK_NULL_HANDLE,
&imageIndex);
const VkResult result = vkAcquireNextImageKHR(_device->Device, _swapChain, UINT64_MAX, semaphore->GetHandle(), VK_NULL_HANDLE, &imageIndex);
if (result == VK_ERROR_OUT_OF_DATE_KHR)
{
_semaphoreIndex = prevSemaphoreIndex;
@@ -560,6 +559,7 @@ void GPUSwapChainVulkan::Present(bool vsync)
if (_acquiredImageIndex == -1)
return;
PROFILE_CPU();
ZoneColor(TracyWaitZoneColor);
// Ensure that backbuffer has been acquired before presenting it to the window
const auto backBuffer = (GPUTextureViewVulkan*)GetBackBufferView();

View File

@@ -62,8 +62,8 @@ public:
void DescriptorAsImage(GPUContextVulkan* context, VkImageView& imageView, VkImageLayout& layout) override;
void DescriptorAsStorageImage(GPUContextVulkan* context, VkImageView& imageView, VkImageLayout& layout) override;
#if !BUILD_RELEASE
bool HasSRV() const override { return ((GPUTexture*)_parent)->IsShaderResource(); }
bool HasUAV() const override { return ((GPUTexture*)_parent)->IsUnorderedAccess(); }
bool HasSRV() const override { return !_parent || ((GPUTexture*)_parent)->IsShaderResource(); }
bool HasUAV() const override { return !_parent || ((GPUTexture*)_parent)->IsUnorderedAccess(); }
#endif
};

View File

@@ -58,7 +58,7 @@ bool GPUTimerQueryVulkan::GetResult(Query& query)
void GPUTimerQueryVulkan::WriteTimestamp(CmdBufferVulkan* cmdBuffer, Query& query, VkPipelineStageFlagBits stage) const
{
auto pool = _device->FindAvailableTimestampQueryPool();
auto pool = _device->FindAvailableQueryPool(VK_QUERY_TYPE_TIMESTAMP);
uint32 index;
if (pool->AcquireQuery(cmdBuffer, index))
{

View File

@@ -6,6 +6,7 @@
#include "GPUDeviceVulkan.h"
#include "CmdBufferVulkan.h"
#include "RenderToolsVulkan.h"
#include "Engine/Profiler/ProfilerCPU.h"
QueueVulkan::QueueVulkan(GPUDeviceVulkan* device, uint32 familyIndex)
: _queue(VK_NULL_HANDLE)
@@ -20,6 +21,7 @@ QueueVulkan::QueueVulkan(GPUDeviceVulkan* device, uint32 familyIndex)
void QueueVulkan::Submit(CmdBufferVulkan* cmdBuffer, uint32 signalSemaphoresCount, const VkSemaphore* signalSemaphores)
{
PROFILE_CPU_NAMED("vkQueueSubmit");
ASSERT(cmdBuffer->HasEnded());
auto fence = cmdBuffer->GetFence();
ASSERT(!fence->IsSignaled);

View File

@@ -5,6 +5,7 @@
#include "RenderToolsVulkan.h"
#include "Engine/Core/Types/StringBuilder.h"
#include "Engine/Core/Log.h"
#include "Engine/Graphics/GPUResourceAccess.h"
// @formatter:off
@@ -248,10 +249,88 @@ void RenderToolsVulkan::LogVkResult(VkResult result, const char* file, uint32 li
errorType = FatalErrorType::GPUHang;
else if (result == VK_ERROR_DEVICE_LOST || result == VK_ERROR_SURFACE_LOST_KHR || result == VK_ERROR_MEMORY_MAP_FAILED)
errorType = FatalErrorType::GPUCrash;
else if (fatal)
errorType = FatalErrorType::Unknown;
if (errorType != FatalErrorType::None)
Platform::Fatal(msg, nullptr, errorType);
#if LOG_ENABLE
else
Log::Logger::Write(fatal ? LogType::Fatal : LogType::Error, msg);
Log::Logger::Write(LogType::Error, msg);
#endif
}
VkAccessFlags RenderToolsVulkan::GetAccess(GPUResourceAccess access)
{
switch (access)
{
case GPUResourceAccess::None:
return VK_ACCESS_NONE;
case GPUResourceAccess::CopyRead:
return VK_ACCESS_TRANSFER_READ_BIT;
case GPUResourceAccess::CopyWrite:
return VK_ACCESS_TRANSFER_WRITE_BIT;
case GPUResourceAccess::CpuRead:
return VK_ACCESS_HOST_READ_BIT;
case GPUResourceAccess::CpuWrite:
return VK_ACCESS_HOST_WRITE_BIT;
case GPUResourceAccess::DepthRead:
return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
case GPUResourceAccess::DepthWrite:
return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
case GPUResourceAccess::DepthBuffer:
return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
case GPUResourceAccess::RenderTarget:
return VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
case GPUResourceAccess::UnorderedAccess:
return VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
case GPUResourceAccess::IndirectArgs:
return VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
case GPUResourceAccess::ShaderReadCompute:
case GPUResourceAccess::ShaderReadPixel:
case GPUResourceAccess::ShaderReadNonPixel:
case GPUResourceAccess::ShaderReadGraphics:
return VK_ACCESS_SHADER_READ_BIT;
#if !BUILD_RELEASE
default:
LOG(Error, "Unsupported GPU Resource Access: {}", (uint32)access);
#endif
}
return VK_ACCESS_NONE;
}
VkImageLayout RenderToolsVulkan::GetImageLayout(GPUResourceAccess access)
{
switch (access)
{
case GPUResourceAccess::None:
return VK_IMAGE_LAYOUT_UNDEFINED;
case GPUResourceAccess::CopyRead:
return VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
case GPUResourceAccess::CopyWrite:
return VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
case GPUResourceAccess::CpuRead:
case GPUResourceAccess::CpuWrite:
return VK_IMAGE_LAYOUT_GENERAL;
case GPUResourceAccess::DepthRead:
return VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL;
case GPUResourceAccess::DepthWrite:
case GPUResourceAccess::DepthBuffer:
return VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL;
return VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL;
case GPUResourceAccess::RenderTarget:
return VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
case GPUResourceAccess::UnorderedAccess:
case GPUResourceAccess::ShaderReadCompute:
case GPUResourceAccess::ShaderReadPixel:
case GPUResourceAccess::ShaderReadNonPixel:
case GPUResourceAccess::ShaderReadGraphics:
return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
#if !BUILD_RELEASE
default:
LOG(Error, "Unsupported GPU Resource Access: {}", (uint32)access);
#endif
}
return VK_IMAGE_LAYOUT_UNDEFINED;
}
bool RenderToolsVulkan::HasExtension(const Array<const char*>& extensions, const char* name)

View File

@@ -20,6 +20,8 @@
#define VK_SET_DEBUG_NAME(device, handle, type, name)
#endif
enum class GPUResourceAccess;
/// <summary>
/// Set of utilities for rendering on Vulkan platform.
/// </summary>
@@ -40,6 +42,9 @@ public:
static String GetVkErrorString(VkResult result);
static void LogVkResult(VkResult result, const char* file = nullptr, uint32 line = 0, bool fatal = false);
static VkAccessFlags GetAccess(GPUResourceAccess access);
static VkImageLayout GetImageLayout(GPUResourceAccess access);
static inline VkPipelineStageFlags GetBufferBarrierFlags(VkAccessFlags accessFlags)
{
VkPipelineStageFlags stageFlags = (VkPipelineStageFlags)0;
@@ -67,6 +72,9 @@ public:
case VK_ACCESS_SHADER_WRITE_BIT:
stageFlags = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
case VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT:
stageFlags = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT:
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
stageFlags = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;

View File

@@ -0,0 +1,195 @@
// Copyright (c) Wojciech Figat. All rights reserved.
#if GRAPHICS_API_VULKAN
#include "UploadBufferVulkan.h"
#include "GPUDeviceVulkan.h"
#include "RenderToolsVulkan.h"
#include "Engine/Graphics/GPUResource.h"
#include "Engine/Profiler/ProfilerMemory.h"
/// <summary>
/// Single page for the upload buffer
/// </summary>
class UploadBufferPageVulkan : public GPUResourceBase<GPUDeviceVulkan, GPUResource>, public ResourceOwnerVulkan
{
public:
UploadBufferPageVulkan(GPUDeviceVulkan* device, uint64 size);
public:
/// <summary>
/// Last generation that has been using that page
/// </summary>
uint64 LastGen;
/// <summary>
/// Page size in bytes
/// </summary>
uint64 Size;
/// <summary>
/// CPU memory address of the page
/// </summary>
void* Mapped;
/// <summary>
/// Buffer that stored the page data
/// </summary>
VkBuffer Buffer;
/// <summary>
/// Buffer memory allocation
/// </summary>
VmaAllocation Allocation;
public:
// [GPUResourceVulkan]
GPUResourceType GetResourceType() const final override
{
return GPUResourceType::Buffer;
}
// [ResourceOwnerVulkan]
GPUResource* AsGPUResource() const override
{
return (GPUResource*)this;
}
protected:
// [GPUResourceVulkan]
void OnReleaseGPU() final override;
};
UploadBufferVulkan::UploadBufferVulkan(GPUDeviceVulkan* device)
: _device(device)
, _currentPage(nullptr)
, _currentOffset(0)
, _currentGeneration(0)
{
}
UploadBufferVulkan::Allocation UploadBufferVulkan::Allocate(uint64 size, uint64 align)
{
const uint64 alignmentMask = align - 1;
ASSERT_LOW_LAYER((alignmentMask & align) == 0);
const uint64 pageSize = Math::Max<uint64>(size, VULKAN_DEFAULT_UPLOAD_PAGE_SIZE);
const uint64 alignedSize = Math::AlignUpWithMask(size, alignmentMask);
// Align the allocation
_currentOffset = Math::AlignUpWithMask(_currentOffset, alignmentMask);
// Check if there is enough space for that chunk of the data in the current page
if (_currentPage && _currentOffset + alignedSize > _currentPage->Size)
_currentPage = nullptr;
// Check if need to get new page
if (_currentPage == nullptr)
{
// Try reusing existing page
for (int32 i = 0; i < _freePages.Count(); i++)
{
UploadBufferPageVulkan* page = _freePages.Get()[i];
if (page->Size == pageSize)
{
_freePages.RemoveAt(i);
_currentPage = page;
break;
}
}
if (_currentPage == nullptr)
_currentPage = New<UploadBufferPageVulkan>(_device, pageSize);
_usedPages.Add(_currentPage);
ASSERT_LOW_LAYER(_currentPage->Buffer);
_currentOffset = 0;
}
// Mark page as used in this generation
_currentPage->LastGen = _currentGeneration;
// Create allocation result
const Allocation result{ (byte*)_currentPage->Mapped + _currentOffset, _currentOffset, size, _currentPage->Buffer, _currentGeneration };
// Move within a page
_currentOffset += size;
return result;
}
UploadBufferVulkan::Allocation UploadBufferVulkan::Upload(const void* data, uint64 size, uint64 align)
{
auto allocation = Allocate(size, align);
Platform::MemoryCopy(allocation.Mapped, data, size);
return allocation;
}
void UploadBufferVulkan::BeginGeneration(uint64 generation)
{
// Restore ready pages to be reused
for (int32 i = 0; _usedPages.HasItems() && i < _usedPages.Count(); i++)
{
auto page = _usedPages[i];
if (page->LastGen + VULKAN_UPLOAD_PAGE_GEN_TIMEOUT < generation)
{
_usedPages.RemoveAt(i);
i--;
_freePages.Add(page);
}
}
// Remove old pages
for (int32 i = _freePages.Count() - 1; i >= 0 && _freePages.HasItems(); i--)
{
auto page = _freePages[i];
if (page->LastGen + VULKAN_UPLOAD_PAGE_GEN_TIMEOUT + VULKAN_UPLOAD_PAGE_NOT_USED_FRAME_TIMEOUT < generation)
{
_freePages.RemoveAt(i);
i--;
page->ReleaseGPU();
Delete(page);
}
}
// Set new generation
_currentGeneration = generation;
}
void UploadBufferVulkan::Dispose()
{
_freePages.Add(_usedPages);
for (auto page : _freePages)
{
page->ReleaseGPU();
Delete(page);
}
}
UploadBufferPageVulkan::UploadBufferPageVulkan(GPUDeviceVulkan* device, uint64 size)
: GPUResourceBase(device, TEXT("Upload Buffer Page"))
, LastGen(0)
, Size(size)
{
VkBufferCreateInfo bufferInfo;
RenderToolsVulkan::ZeroStruct(bufferInfo, VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
bufferInfo.size = size;
bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
VmaAllocationCreateInfo allocCreateInfo = {};
allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo allocInfo;
vmaCreateBuffer(_device->Allocator, &bufferInfo, &allocCreateInfo, &Buffer, &Allocation, &allocInfo);
Mapped = allocInfo.pMappedData;
ASSERT_LOW_LAYER(Mapped);
_memoryUsage = size;
PROFILE_MEM_INC(GraphicsCommands, _memoryUsage);
}
void UploadBufferPageVulkan::OnReleaseGPU()
{
PROFILE_MEM_DEC(GraphicsCommands, _memoryUsage);
vmaDestroyBuffer(_device->Allocator, Buffer, Allocation);
Buffer = VK_NULL_HANDLE;
Allocation = VK_NULL_HANDLE;
Mapped = nullptr;
}
#endif

View File

@@ -0,0 +1,79 @@
// Copyright (c) Wojciech Figat. All rights reserved.
#pragma once
#include "Engine/Graphics/GPUDevice.h"
#include "ResourceOwnerVulkan.h"
#if GRAPHICS_API_VULKAN
class GPUDeviceVulkan;
class UploadBufferPageVulkan;
// Upload buffer page size
#define VULKAN_DEFAULT_UPLOAD_PAGE_SIZE (4 * 1014 * 1024) // 4 MB
// Upload buffer generations timeout to dispose
#define VULKAN_UPLOAD_PAGE_GEN_TIMEOUT 3
// Upload buffer pages that are not used for a few frames are disposed
#define VULKAN_UPLOAD_PAGE_NOT_USED_FRAME_TIMEOUT 60
/// <summary>
/// Uploading data to GPU buffer utility
/// </summary>
class UploadBufferVulkan
{
public:
/// <summary>
/// Upload buffer allocation
/// </summary>
struct Allocation
{
/// <summary>
/// CPU memory address of the allocation start.
/// </summary>
void* Mapped;
/// <summary>
/// Allocation offset in bytes (from the start of the heap buffer).
/// </summary>
uint64 Offset;
/// <summary>
/// Allocation size in bytes
/// </summary>
uint64 Size;
/// <summary>
/// Upload buffer page resource that owns that allocation
/// </summary>
VkBuffer Buffer;
/// <summary>
/// Generation number of that allocation (generally allocation is invalid after one or two generations)
/// </summary>
uint64 Generation;
};
private:
GPUDeviceVulkan* _device;
UploadBufferPageVulkan* _currentPage;
uint64 _currentOffset;
uint64 _currentGeneration;
Array<UploadBufferPageVulkan*, InlinedAllocation<64>> _freePages;
Array<UploadBufferPageVulkan*, InlinedAllocation<64>> _usedPages;
public:
UploadBufferVulkan(GPUDeviceVulkan* device);
public:
Allocation Allocate(uint64 size, uint64 align);
Allocation Upload(const void* data, uint64 size, uint64 align);
public:
void BeginGeneration(uint64 generation);
void Dispose();
};
#endif

View File

@@ -49,6 +49,9 @@ public:
featuresToEnable.sparseResidency8Samples = VK_FALSE;
featuresToEnable.sparseResidencyAliased = VK_FALSE;
}
static bool LoadCache(const String& folder, const Char* filename, Array<byte>& data);
static bool SaveCache(const String& folder, const Char* filename, const Array<byte>& data);
};
#endif