From b191d3918ed8f494ada126a18ab5b39a5334ac63 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 2 Mar 2026 20:36:33 +0100 Subject: [PATCH] Fix rendering various visuals on WebGPU --- Flax.flaxproj | 2 +- Source/Engine/Graphics/GPUDevice.cpp | 2 - .../Graphics/Materials/MaterialShader.h | 2 +- Source/Engine/Graphics/RenderBuffers.cpp | 17 +- .../WebGPU/GPUContextWebGPU.cpp | 179 ++++++++++++------ .../GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp | 20 +- .../WebGPU/GPUPipelineStateWebGPU.cpp | 158 +++++++++++++--- .../WebGPU/GPUPipelineStateWebGPU.h | 2 +- .../WebGPU/GPUSamplerWebGPU.cpp | 8 +- .../WebGPU/GPUSwapChainWebGPU.cpp | 2 +- .../WebGPU/GPUTextureWebGPU.cpp | 78 +++++--- .../GraphicsDevice/WebGPU/GPUTextureWebGPU.h | 28 ++- .../WebGPU/GPUVertexLayoutWebGPU.h | 4 - .../GPU/ParticleEmitterGraph.GPU.Textures.cpp | 5 - .../Engine/Renderer/AmbientOcclusionPass.cpp | 10 +- Source/Engine/Renderer/RenderList.cpp | 4 + Source/Engine/Renderer/ShadowsPass.cpp | 5 + Source/Engine/Renderer/Utils/MultiScaler.cpp | 15 +- .../MaterialGenerator.Textures.cpp | 10 +- Source/Shaders/Common.hlsl | 14 +- Source/Shaders/DebugDraw.shader | 2 +- Source/Shaders/Fog.shader | 2 +- Source/Shaders/GBuffer.hlsl | 2 +- Source/Shaders/GUI.shader | 4 +- Source/Shaders/Gather.hlsl | 17 ++ Source/Shaders/Lights.shader | 2 +- Source/Shaders/MotionBlur.shader | 8 +- Source/Shaders/MultiScaler.shader | 22 +-- Source/Shaders/Quad.shader | 2 +- Source/Shaders/Reflections.shader | 2 +- Source/Shaders/SSAO.shader | 16 +- Source/Shaders/SSR.hlsl | 6 +- Source/Shaders/Shadows.shader | 6 +- Source/Shaders/ShadowsSampling.hlsl | 14 +- Source/Shaders/Sky.shader | 4 +- Source/Shaders/TAA.shader | 4 +- 36 files changed, 458 insertions(+), 220 deletions(-) diff --git a/Flax.flaxproj b/Flax.flaxproj index 3ee8f392f..b457d7e75 100644 --- a/Flax.flaxproj +++ b/Flax.flaxproj @@ -4,7 +4,7 @@ "Major": 1, "Minor": 12, "Revision": 0, - "Build": 6905 + "Build": 6906 }, "Company": "Flax", "Copyright": "Copyright (c) 2012-2026 Wojciech Figat. All rights reserved.", diff --git a/Source/Engine/Graphics/GPUDevice.cpp b/Source/Engine/Graphics/GPUDevice.cpp index da0e2f07c..911f2d834 100644 --- a/Source/Engine/Graphics/GPUDevice.cpp +++ b/Source/Engine/Graphics/GPUDevice.cpp @@ -391,8 +391,6 @@ bool GPUDevice::Init() _res->TasksManager.SetExecutor(CreateTasksExecutor()); LOG(Info, "Total graphics memory: {0}", Utilities::BytesToText(TotalGraphicsMemory)); - if (!Limits.HasCompute) - LOG(Warning, "Compute Shaders are not supported"); for (const auto& videoOutput : VideoOutputs) LOG(Info, "Video output '{0}' {1}x{2} {3} Hz", videoOutput.Name, videoOutput.Width, videoOutput.Height, videoOutput.RefreshRate); Engine::RequestingExit.Bind(this); diff --git a/Source/Engine/Graphics/Materials/MaterialShader.h b/Source/Engine/Graphics/Materials/MaterialShader.h index 2aadf7b62..849f272f2 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.h +++ b/Source/Engine/Graphics/Materials/MaterialShader.h @@ -10,7 +10,7 @@ /// /// Current materials shader version. /// -#define MATERIAL_GRAPH_VERSION 182 +#define MATERIAL_GRAPH_VERSION 183 class Material; class GPUShader; diff --git a/Source/Engine/Graphics/RenderBuffers.cpp b/Source/Engine/Graphics/RenderBuffers.cpp index 65fc01ea2..296c17c70 100644 --- a/Source/Engine/Graphics/RenderBuffers.cpp +++ b/Source/Engine/Graphics/RenderBuffers.cpp @@ -87,12 +87,10 @@ GPUTexture* RenderBuffers::RequestHalfResDepth(GPUContext* context) if (!MultiScaler::Instance()->IsReady()) return DepthBuffer; - const int32 halfDepthWidth = RenderTools::GetResolution(_width, ResolutionMode::Half); - const int32 halfDepthHeight = RenderTools::GetResolution(_height, ResolutionMode::Half); - const PixelFormat halfDepthFormat = GPU_DEPTH_BUFFER_PIXEL_FORMAT; - auto tempDesc = GPUTextureDescription::New2D(halfDepthWidth, halfDepthHeight, halfDepthFormat); - if (EnumHasAnyFlags(DepthBuffer->Flags(), GPUTextureFlags::ReadOnlyDepthView)) - tempDesc.Flags = GPUTextureFlags::ShaderResource | GPUTextureFlags::DepthStencil | GPUTextureFlags::ReadOnlyDepthView; + auto format = GPU_DEPTH_BUFFER_PIXEL_FORMAT; + auto width = RenderTools::GetResolution(_width, ResolutionMode::Half); + auto height = RenderTools::GetResolution(_height, ResolutionMode::Half); + auto tempDesc = GPUTextureDescription::New2D(width, height, format, DepthBuffer->Flags()); LastFrameHalfResDepth = currentFrame; if (HalfResDepth == nullptr) @@ -101,7 +99,7 @@ GPUTexture* RenderBuffers::RequestHalfResDepth(GPUContext* context) HalfResDepth = RenderTargetPool::Get(tempDesc); RENDER_TARGET_POOL_SET_NAME(HalfResDepth, "HalfResDepth"); } - else if (HalfResDepth->Width() != halfDepthWidth || HalfResDepth->Height() != halfDepthHeight || HalfResDepth->Format() != halfDepthFormat) + else if (HalfResDepth->Width() != width || HalfResDepth->Height() != height || HalfResDepth->Format() != format) { // Wrong size buffer RenderTargetPool::Release(HalfResDepth); @@ -110,7 +108,7 @@ GPUTexture* RenderBuffers::RequestHalfResDepth(GPUContext* context) } // Generate depth - MultiScaler::Instance()->DownscaleDepth(context, halfDepthWidth, halfDepthHeight, DepthBuffer, HalfResDepth->View()); + MultiScaler::Instance()->DownscaleDepth(context, width, height, DepthBuffer, HalfResDepth->View()); return HalfResDepth; } @@ -126,8 +124,7 @@ GPUTexture* RenderBuffers::RequestHiZ(GPUContext* context, bool fullRes, int32 m LastFrameHiZ = currentFrame; // Allocate or resize buffer (with full mip-chain) - // TODO: migrate to inverse depth and try using r16 again as default (should have no artifacts anymore) - auto format = PLATFORM_WEB || PLATFORM_ANDROID || PLATFORM_IOS || PLATFORM_SWITCH ? PixelFormat::R16_UInt : PixelFormat::R32_Float; + auto format = PixelFormat::R32_Float; auto width = fullRes ? _width : Math::Max(_width >> 1, 1); auto height = fullRes ? _height : Math::Max(_height >> 1, 1); auto desc = GPUTextureDescription::New2D(width, height, mipLevels, format, GPUTextureFlags::ShaderResource); diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp index 51ef32e94..c25bc8431 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUContextWebGPU.cpp @@ -60,6 +60,8 @@ GPUContextWebGPU::GPUContextWebGPU(GPUDeviceWebGPU* device) GPUContextWebGPU::~GPUContextWebGPU() { + if (Encoder) + Flush(); CHECK(Encoder == nullptr); } @@ -120,6 +122,10 @@ void GPUContextWebGPU::FrameEnd() void GPUContextWebGPU::EventBegin(const Char* name) { + // Cannot insert commands in encoder during render pass + if (_renderPass) + EndRenderPass(); + StringAsANSI<> nameAnsi(name); wgpuCommandEncoderPushDebugGroup(Encoder, { nameAnsi.Get(), (size_t)nameAnsi.Length() }); } @@ -252,17 +258,19 @@ void GPUContextWebGPU::SetStencilRef(uint32 value) void GPUContextWebGPU::ResetSR() { + _bindGroupDirty = true; Platform::MemoryClear(_shaderResources, sizeof(_shaderResources)); } void GPUContextWebGPU::ResetUA() { + _bindGroupDirty = true; Platform::MemoryClear(_storageResources, sizeof(_storageResources)); } void GPUContextWebGPU::ResetCB() { - _bindGroupDirty = false; + _bindGroupDirty = true; Platform::MemoryClear(_constantBuffers, sizeof(_constantBuffers)); } @@ -425,14 +433,14 @@ void GPUContextWebGPU::EndQuery(uint64 queryID) void GPUContextWebGPU::SetViewport(const Viewport& viewport) { _viewport = viewport; - if (_renderPass) + if (_renderPass && !_renderPassDirty) wgpuRenderPassEncoderSetViewport(_renderPass, viewport.X, viewport.Y, viewport.Width, viewport.Height, viewport.MinDepth, viewport.MaxDepth); } void GPUContextWebGPU::SetScissor(const Rectangle& scissorRect) { _scissorRect = scissorRect; - if (_renderPass) + if (_renderPass && !_renderPassDirty) wgpuRenderPassEncoderSetScissorRect(_renderPass, (uint32_t)scissorRect.GetX(), (uint32_t)scissorRect.GetY(), (uint32_t)scissorRect.GetWidth(), (uint32_t)scissorRect.GetHeight()); } @@ -490,6 +498,7 @@ void GPUContextWebGPU::Flush() WGPUCommandBufferDescriptor commandBufferDesc = WGPU_COMMAND_BUFFER_DESCRIPTOR_INIT; WGPUCommandBuffer commandBuffer = wgpuCommandEncoderFinish(Encoder, &commandBufferDesc); wgpuCommandEncoderRelease(Encoder); + Encoder = nullptr; if (commandBuffer) { wgpuQueueSubmit(_device->Queue, 1, &commandBuffer); @@ -567,8 +576,7 @@ void GPUContextWebGPU::CopyTexture(GPUTexture* dstResource, uint32 dstSubresourc ASSERT(dstResource && srcResource); auto srcTextureWebGPU = (GPUTextureWebGPU*)srcResource; auto dstTextureWebGPU = (GPUTextureWebGPU*)dstResource; - ASSERT_LOW_LAYER(dstTextureWebGPU->Texture && wgpuTextureGetUsage(dstTextureWebGPU->Texture) & WGPUTextureUsage_CopyDst); - ASSERT_LOW_LAYER(srcTextureWebGPU->Texture && wgpuTextureGetUsage(srcTextureWebGPU->Texture) & WGPUTextureUsage_CopySrc); + ASSERT_LOW_LAYER(dstTextureWebGPU->Texture && srcTextureWebGPU->Texture); const int32 srcMipIndex = srcSubresource % srcTextureWebGPU->MipLevels(); const int32 dstMipIndex = dstSubresource % srcTextureWebGPU->MipLevels(); @@ -578,18 +586,45 @@ void GPUContextWebGPU::CopyTexture(GPUTexture* dstResource, uint32 dstSubresourc int32 srcMipWidth, srcMipHeight, srcMipDepth; srcTextureWebGPU->GetMipSize(srcMipIndex, srcMipWidth, srcMipHeight, srcMipDepth); - WGPUTexelCopyTextureInfo srcInfo = WGPU_TEXEL_COPY_TEXTURE_INFO_INIT; - srcInfo.texture = srcTextureWebGPU->Texture; - srcInfo.mipLevel = srcMipIndex; - srcInfo.origin.z = srcArrayIndex; - srcInfo.aspect = WGPUTextureAspect_All; - WGPUTexelCopyTextureInfo dstInfo = WGPU_TEXEL_COPY_TEXTURE_INFO_INIT; - dstInfo.texture = dstTextureWebGPU->Texture; - dstInfo.mipLevel = dstMipIndex; - dstInfo.origin = { dstX, dstY, dstZ + dstArrayIndex }; - dstInfo.aspect = WGPUTextureAspect_All; - WGPUExtent3D copySize = { (uint32_t)srcMipWidth, (uint32_t)srcMipHeight, (uint32_t)srcMipDepth }; - wgpuCommandEncoderCopyTextureToTexture(Encoder, &srcInfo, &dstInfo, ©Size); + if (dstTextureWebGPU->Usage & WGPUTextureUsage_CopyDst && srcTextureWebGPU->Usage & WGPUTextureUsage_CopySrc) + { + // Direct copy + WGPUTexelCopyTextureInfo srcInfo = WGPU_TEXEL_COPY_TEXTURE_INFO_INIT; + srcInfo.texture = srcTextureWebGPU->Texture; + srcInfo.mipLevel = srcMipIndex; + srcInfo.origin.z = srcArrayIndex; + srcInfo.aspect = WGPUTextureAspect_All; + WGPUTexelCopyTextureInfo dstInfo = WGPU_TEXEL_COPY_TEXTURE_INFO_INIT; + dstInfo.texture = dstTextureWebGPU->Texture; + dstInfo.mipLevel = dstMipIndex; + dstInfo.origin = { dstX, dstY, dstZ + dstArrayIndex }; + dstInfo.aspect = WGPUTextureAspect_All; + WGPUExtent3D copySize = { (uint32_t)srcMipWidth, (uint32_t)srcMipHeight, (uint32_t)srcMipDepth }; + wgpuCommandEncoderCopyTextureToTexture(Encoder, &srcInfo, &dstInfo, ©Size); + } + else if (dstTextureWebGPU->Usage & WGPUTextureUsage_RenderAttachment && srcTextureWebGPU->Usage & WGPUTextureUsage_TextureBinding) + { + // Copy via drawing + ResetRenderTarget(); + SetViewportAndScissors(srcMipWidth, srcMipHeight); + SetState(_device->GetCopyLinearPS()); + if (srcSubresource == 0 && dstSubresource == 0) + { + SetRenderTarget(dstTextureWebGPU->View(0)); + BindSR(0, srcTextureWebGPU->View(0)); + } + else + { + ASSERT(dstTextureWebGPU->HasPerMipViews() && srcResource->HasPerMipViews()); + SetRenderTarget(dstTextureWebGPU->View(dstArrayIndex, dstMipIndex)); + BindSR(0, srcTextureWebGPU->View(srcArrayIndex, srcMipIndex)); + } + DrawFullscreenTriangle(); + } + else + { + LOG(Fatal, "Cannot copy texture {} to {}", srcTextureWebGPU->GetDescription().ToString(), dstTextureWebGPU->GetDescription().ToString()); + } } void GPUContextWebGPU::ResetCounter(GPUBuffer* buffer) @@ -615,9 +650,15 @@ void GPUContextWebGPU::CopyResource(GPUResource* dstResource, GPUResource* srcRe { // Texture -> Texture ASSERT(srcTexture->MipLevels() == dstTexture->MipLevels()); - ASSERT(srcTexture->ArraySize() == 1); // TODO: implement copying texture arrays - for (int32 mipLevel = 0; mipLevel < srcTexture->MipLevels(); mipLevel++) - CopyTexture(dstTexture, mipLevel, 0, 0, 0, srcTexture, mipLevel); + ASSERT(srcTexture->ArraySize() == dstTexture->ArraySize()); + for (int32 arraySlice = 0; arraySlice < srcTexture->ArraySize(); arraySlice++) + { + for (int32 mipLevel = 0; mipLevel < srcTexture->MipLevels(); mipLevel++) + { + uint32 subresource = arraySlice * srcTexture->MipLevels() + mipLevel; + CopyTexture(dstTexture, subresource, 0, 0, 0, srcTexture, subresource); + } + } } else if (srcTexture) { @@ -751,7 +792,7 @@ void GPUContextWebGPU::OnDrawCall() } // Check if need to start a new render pass - if (_renderPassDirty) + if (_renderPassDirty || !_renderPass) { FlushRenderPass(); } @@ -815,13 +856,14 @@ void GPUContextWebGPU::FlushRenderPass() // Start a new render pass WGPURenderPassColorAttachment colorAttachments[GPU_MAX_RT_BINDED]; - WGPURenderPassDepthStencilAttachment depthStencilAttachment = WGPU_RENDER_PASS_DEPTH_STENCIL_ATTACHMENT_INIT; + WGPURenderPassDepthStencilAttachment depthStencilAttachment; WGPURenderPassDescriptor renderPassDesc = WGPU_RENDER_PASS_DESCRIPTOR_INIT; renderPassDesc.colorAttachmentCount = _renderTargetCount; renderPassDesc.colorAttachments = colorAttachments; PendingClear clear; _pipelineKey.MultiSampleCount = 1; _pipelineKey.RenderTargetCount = _renderTargetCount; + GPUTextureViewSizeWebGPU attachmentSize; for (int32 i = 0; i < renderPassDesc.colorAttachmentCount; i++) { auto& colorAttachment = colorAttachments[i]; @@ -838,43 +880,54 @@ void GPUContextWebGPU::FlushRenderPass() } _pipelineKey.MultiSampleCount = (int32)renderTarget->GetMSAA(); _pipelineKey.RenderTargetFormats[i] = renderTarget->Format; + attachmentSize.Set(renderTarget->RenderSize); } if (_depthStencil) { + auto renderTarget = _depthStencil; renderPassDesc.depthStencilAttachment = &depthStencilAttachment; - depthStencilAttachment.view = _depthStencil->ViewRender; - depthStencilAttachment.depthLoadOp = WGPULoadOp_Load; - depthStencilAttachment.depthStoreOp = _depthStencil->ReadOnly ? WGPUStoreOp_Discard : WGPUStoreOp_Store; - depthStencilAttachment.depthReadOnly = _depthStencil->ReadOnly; - if (_depthStencil->HasStencil) + depthStencilAttachment = WGPU_RENDER_PASS_DEPTH_STENCIL_ATTACHMENT_INIT; + depthStencilAttachment.view = renderTarget->ViewRender; + depthStencilAttachment.depthLoadOp = renderTarget->ReadOnly ? WGPULoadOp_Undefined : WGPULoadOp_Load; + depthStencilAttachment.depthStoreOp = renderTarget->ReadOnly ? WGPUStoreOp_Undefined : WGPUStoreOp_Store; + depthStencilAttachment.depthReadOnly = renderTarget->ReadOnly; + if (renderTarget->HasStencil) { - depthStencilAttachment.stencilLoadOp = WGPULoadOp_Load; - depthStencilAttachment.stencilStoreOp = _depthStencil->ReadOnly ? WGPUStoreOp_Discard : WGPUStoreOp_Store; - depthStencilAttachment.depthReadOnly = _depthStencil->ReadOnly; + depthStencilAttachment.stencilLoadOp = renderTarget->ReadOnly ? WGPULoadOp_Undefined : WGPULoadOp_Load; + depthStencilAttachment.stencilStoreOp = renderTarget->ReadOnly ? WGPUStoreOp_Undefined : WGPUStoreOp_Store; + depthStencilAttachment.depthReadOnly = renderTarget->ReadOnly; + depthStencilAttachment.stencilReadOnly = renderTarget->ReadOnly; } else { depthStencilAttachment.stencilClearValue = 0; - depthStencilAttachment.stencilLoadOp = WGPULoadOp_Clear; - depthStencilAttachment.stencilStoreOp = WGPUStoreOp_Discard; + depthStencilAttachment.stencilLoadOp = WGPULoadOp_Undefined; + depthStencilAttachment.stencilStoreOp = WGPUStoreOp_Undefined; depthStencilAttachment.stencilReadOnly = true; } - if (FindClear(_depthStencil, clear)) + if (!renderTarget->ReadOnly && FindClear(renderTarget, clear)) { depthStencilAttachment.depthLoadOp = WGPULoadOp_Clear; depthStencilAttachment.depthClearValue = clear.Depth; - if (_depthStencil->HasStencil) + if (renderTarget->HasStencil) { depthStencilAttachment.stencilLoadOp = WGPULoadOp_Clear; depthStencilAttachment.stencilClearValue = clear.Stencil; } } - _pipelineKey.DepthStencilFormat = _depthStencil->Format; + else + { + depthStencilAttachment.depthClearValue = 0.0f; + depthStencilAttachment.stencilClearValue = 0; + } + _pipelineKey.DepthStencilFormat = renderTarget->Format; + attachmentSize.Set(renderTarget->RenderSize); } else { _pipelineKey.DepthStencilFormat = WGPUTextureFormat_Undefined; } + ASSERT(attachmentSize.Packed != 0); _renderPass = wgpuCommandEncoderBeginRenderPass(Encoder, &renderPassDesc); ASSERT(_renderPass); @@ -885,11 +938,11 @@ void GPUContextWebGPU::FlushRenderPass() if (_stencilRef != 0) wgpuRenderPassEncoderSetStencilReference(_renderPass, _stencilRef); auto scissorRect = _scissorRect; - // TODO: skip calling this if scissorRect is default (0, 0, attachment width, attachment height) - wgpuRenderPassEncoderSetScissorRect(_renderPass, (uint32_t)scissorRect.GetX(), (uint32_t)scissorRect.GetY(), (uint32_t)scissorRect.GetWidth(), (uint32_t)scissorRect.GetHeight()); + if (scissorRect != Rectangle(0, 0, attachmentSize.Width, attachmentSize.Height)) + wgpuRenderPassEncoderSetScissorRect(_renderPass, (uint32_t)scissorRect.GetX(), (uint32_t)scissorRect.GetY(), (uint32_t)scissorRect.GetWidth(), (uint32_t)scissorRect.GetHeight()); auto viewport = _viewport; - // TODO: skip calling this if viewport is default (0, 0, attachment width, attachment height, 0, 1) - wgpuRenderPassEncoderSetViewport(_renderPass, viewport.X, viewport.Y, viewport.Width, viewport.Height, viewport.MinDepth, viewport.MaxDepth); + if (viewport != Viewport(Float2(attachmentSize.Width, attachmentSize.Height))) + wgpuRenderPassEncoderSetViewport(_renderPass, viewport.X, viewport.Y, viewport.Width, viewport.Height, viewport.MinDepth, viewport.MaxDepth); // Auto-dirty pipeline when new render pass starts if (_pipelineState) @@ -937,6 +990,7 @@ void GPUContextWebGPU::FlushBindGroup() break; } case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { ASSERT_LOW_LAYER(descriptor.BindingType == SpirvShaderResourceBindingType::SRV); auto view = _shaderResources[descriptor.Slot]; @@ -952,7 +1006,19 @@ void GPUContextWebGPU::FlushBindGroup() LOG(Error, "Missing resource {} at slot {} of binding space {}", (int32)descriptor.ResourceType, descriptor.Slot, (int32)descriptor.BindingType); CRASH; } - view = defaultTexture->View(0); + switch (descriptor.ResourceType) + { + case SpirvShaderResourceType::Texture3D: + view = defaultTexture->ViewVolume(); + break; + case SpirvShaderResourceType::Texture1DArray: + case SpirvShaderResourceType::Texture2DArray: + view = defaultTexture->ViewArray(); + break; + default: + view = defaultTexture->View(0); + break; + } ptr = (GPUResourceViewPtrWebGPU*)view->GetNativePtr(); entry.textureView = ptr->TextureView->View; } @@ -976,6 +1042,7 @@ void GPUContextWebGPU::FlushBindGroup() } break; } + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: { GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot]; @@ -983,20 +1050,10 @@ void GPUContextWebGPU::FlushBindGroup() { entry.buffer = uniform->Allocation.Buffer; entry.size = uniform->AllocationSize; - _dynamicOffsets.Add(uniform->Allocation.Offset); - } - else - CRASH; // TODO: add dummy buffer as fallback - break; - } - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - { - GPUConstantBufferWebGPU* uniform = _constantBuffers[descriptor.Slot]; - if (uniform && uniform->Allocation.Buffer) - { - entry.buffer = uniform->Allocation.Buffer; - entry.offset = uniform->Allocation.Offset; - entry.size = uniform->AllocationSize; + if (descriptor.DescriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) + entry.offset = uniform->Allocation.Offset; + else + _dynamicOffsets.Add(uniform->Allocation.Offset); } else CRASH; // TODO: add dummy buffer as fallback @@ -1015,9 +1072,21 @@ void GPUContextWebGPU::FlushBindGroup() // Create a bind group bindGroupDesc.entryCount = _bindGroupEntries.Count(); bindGroupDesc.entries = entriesPtr; +#if BUILD_DEBUG + for (int32 i = 0; i < bindGroupDesc.entryCount; i++) + { + auto& e = bindGroupDesc.entries[i]; + if ((e.buffer != nullptr) + (e.sampler != nullptr) + (e.textureView != nullptr) != 1) + { + LOG(Error, "Invalid binding in group {} at index {} ({})", groupIndex, i, _pipelineState->GetName()); + LOG(Error, " > sampler: {}", (uint32)e.sampler); + LOG(Error, " > textureView: {}", (uint32)e.textureView); + LOG(Error, " > buffer: {}", (uint32)e.buffer); + } + } +#endif WGPUBindGroup bindGroup = wgpuDeviceCreateBindGroup(_device->Device, &bindGroupDesc); _unusedBindGroups.Add(bindGroup); - // TODO: cache and release them // Bind group wgpuRenderPassEncoderSetBindGroup(_renderPass, groupIndex, bindGroup, _dynamicOffsets.Count(), _dynamicOffsets.Get()); diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp index 3a3fceddc..adf413de1 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUDeviceWebGPU.cpp @@ -28,22 +28,6 @@ GPUVertexLayoutWebGPU::GPUVertexLayoutWebGPU(GPUDeviceWebGPU* device, const Elem : GPUResourceBase(device, StringView::Empty) { SetElements(elements, explicitOffsets); - Layout = WGPU_VERTEX_BUFFER_LAYOUT_INIT; - Layout.stepMode = WGPUVertexStepMode_Vertex; - Layout.arrayStride = GetStride(); - Layout.attributeCount = elements.Count(); - Layout.attributes = Attributes; - const VertexElement* srcElements = GetElements().Get(); - for (int32 i = 0; i < elements.Count(); i++) - { - const VertexElement& src = srcElements[i]; - WGPUVertexAttribute& dst = Attributes[i]; - dst.nextInChain = nullptr; - dst.format = RenderToolsWebGPU::ToVertexFormat(src.Format); - dst.offset = src.Offset; - if (src.PerInstance) - Layout.stepMode = WGPUVertexStepMode_Instance; - } } GPUDataUploaderWebGPU::Allocation GPUDataUploaderWebGPU::Allocate(uint32 size, WGPUBufferUsage usage, uint32 alignment) @@ -183,6 +167,7 @@ bool GPUDeviceWebGPU::Init() if (wgpuAdapterGetLimits(Adapter->Adapter, &limits) == WGPUStatus_Success) { MinUniformBufferOffsetAlignment = limits.minUniformBufferOffsetAlignment; + Limits.HasDrawIndirect = true; Limits.HasDepthAsSRV = true; Limits.HasReadOnlyDepth = true; Limits.HasDepthClip = features.Contains(WGPUFeatureName_DepthClipControl); @@ -431,6 +416,9 @@ bool GPUDeviceWebGPU::Init() { LOG(Info, "WebGPU: {}", WEBGPU_TO_STR(message)); } + static int32 LogSpamLeft = 20; + if (LogSpamLeft-- < 0) + CRASH; // Too many errors #endif }; diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp index 369c98c20..7c791dd0e 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.cpp @@ -11,6 +11,8 @@ #include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Graphics/PixelFormatExtensions.h" +#define WEBGPU_LOG_PSO 0 + WGPUCompareFunction ToCompareFunction(ComparisonFunc value) { switch (value) @@ -158,10 +160,9 @@ void GPUPipelineStateWebGPU::OnReleaseGPU() uint32 GetHash(const GPUPipelineStateWebGPU::Key& key) { - static_assert(sizeof(GPUPipelineStateWebGPU::Key) == sizeof(uint64) * 3, "Invalid PSO key size."); + static_assert(sizeof(GPUPipelineStateWebGPU::Key) == sizeof(uint64) * 2, "Invalid PSO key size."); uint32 hash = GetHash(key.Packed[0]); CombineHash(hash, GetHash(key.Packed[1])); - CombineHash(hash, GetHash(key.Packed[2])); return hash; } @@ -175,6 +176,9 @@ WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const Key& key, GPUResour #if GPU_ENABLE_RESOURCE_NAMING ZoneText(_debugName.Get(), _debugName.Count() - 1); #endif +#if WEBGPU_LOG_PSO + LOG(Info, "[WebGPU] GetPipeline: '{}'", String(_debugName.Get(), _debugName.Count() - 1)); +#endif // Lazy-init layout (cannot do it during Init as texture samplers that access eg. depth need to explicitly use UnfilterableFloat) if (!PipelineDesc.layout) @@ -182,10 +186,14 @@ WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const Key& key, GPUResour // Build final pipeline description _depthStencilDesc.format = (WGPUTextureFormat)key.DepthStencilFormat; + PipelineDesc.depthStencil = key.DepthStencilFormat ? &_depthStencilDesc : nullptr; // Unbind depth stencil state when no debug buffer is bound PipelineDesc.multisample.count = key.MultiSampleCount; - _fragmentDesc.targetCount = key.RenderTargetCount; - for (int32 i = 0; i < _fragmentDesc.targetCount; i++) - _colorTargets[i].format = (WGPUTextureFormat)key.RenderTargetFormats[i]; + if (PS) + { + _fragmentDesc.targetCount = key.RenderTargetCount; + for (int32 i = 0; i < _fragmentDesc.targetCount; i++) + _colorTargets[i].format = (WGPUTextureFormat)key.RenderTargetFormats[i]; + } WGPUVertexBufferLayout buffers[GPU_MAX_VB_BINDED]; if (key.VertexLayout) { @@ -253,6 +261,10 @@ WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const Key& key, GPUResour void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]) { +#if WEBGPU_LOG_PSO + // Debug log for PSOs with specific name + const bool log = true;// StringAnsiView(_debugName.Get(), _debugName.Count() - 1).Contains("PS_HalfDepth"); +#endif // Count the biggest bind group entries (for all shaders) to allocate reused memory int32 maxEntriesCount = 0; @@ -276,6 +288,11 @@ void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX int32 entriesCount = descriptors->DescriptorTypesCount; Platform::MemoryClear(entries.Get(), sizeof(WGPUBindGroupLayoutEntry) * entriesCount); auto visibility = groupIndex == 0 ? WGPUShaderStage_Vertex : WGPUShaderStage_Fragment; +#if WEBGPU_LOG_PSO + if (log) + LOG(Info, " > group {} - {}", groupIndex, groupIndex == 0 ? TEXT("Vertex") : TEXT("Fragment")); + const Char* samplerType = TEXT("?"); +#endif for (int32 index = 0; index < entriesCount; index++) { auto& descriptor = descriptors->DescriptorTypes[index]; @@ -287,16 +304,87 @@ void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX { case VK_DESCRIPTOR_TYPE_SAMPLER: entry.sampler.type = WGPUSamplerBindingType_Undefined; + if (descriptor.Slot == 4 || descriptor.Slot == 5) // Hack for ShadowSampler and ShadowSamplerLinear (this could get binded samplers table just like for shaderResources) + entry.sampler.type = WGPUSamplerBindingType_Comparison; +#if WEBGPU_LOG_PSO + switch (entry.sampler.type) + { + case WGPUSamplerBindingType_BindingNotUsed: + samplerType = TEXT("BindingNotUsed"); + break; + case WGPUSamplerBindingType_Undefined: + samplerType = TEXT("Undefined"); + break; + case WGPUSamplerBindingType_Filtering: + samplerType = TEXT("Filtering"); + break; + case WGPUSamplerBindingType_NonFiltering: + samplerType = TEXT("NonFiltering"); + break; + case WGPUSamplerBindingType_Comparison: + samplerType = TEXT("Comparison"); + break; + } + if (log) + LOG(Info, " > [{}] sampler ({})", entry.binding, samplerType); +#endif break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: entry.texture.sampleType = WGPUTextureSampleType_Undefined; if (shaderResources[descriptor.Slot]) { - // Hack to use the sample type directly from the view which allows to fix incorrect Depth Buffer reading that allows only manual Load when UnfilterableFloat is used (see SAMPLE_RT_LOAD) + // Hack to use the sample type directly from the view which allows to fix incorrect Depth Buffer reading that allows only manual Load when UnfilterableFloat is used (see SAMPLE_RT_DEPTH) auto ptr = (GPUResourceViewPtrWebGPU*)shaderResources[descriptor.Slot]->GetNativePtr(); if (ptr && ptr->TextureView) entry.texture.sampleType = ptr->TextureView->SampleType; } +#if WEBGPU_LOG_PSO + if (log) + { + switch (entry.texture.sampleType) + { + case WGPUTextureSampleType_BindingNotUsed: + samplerType = TEXT("BindingNotUsed"); + break; + case WGPUTextureSampleType_Undefined: + samplerType = TEXT("Undefined"); + break; + case WGPUTextureSampleType_Float: + samplerType = TEXT("Float"); + break; + case WGPUTextureSampleType_UnfilterableFloat: + samplerType = TEXT("UnfilterableFloat"); + break; + case WGPUTextureSampleType_Depth: + samplerType = TEXT("Depth"); + break; + case WGPUTextureSampleType_Sint: + samplerType = TEXT("Sint"); + break; + case WGPUTextureSampleType_Uint: + samplerType = TEXT("Uint"); + break; + } + switch (descriptor.ResourceType) + { + case SpirvShaderResourceType::Texture1D: + LOG(Info, " > [{}] texture 1D ({})", entry.binding, samplerType); + break; + case SpirvShaderResourceType::Texture2D: + LOG(Info, " > [{}] texture 2D ({})", entry.binding, samplerType); + break; + case SpirvShaderResourceType::Texture3D: + LOG(Info, " > [{}] texture 3D ({})", entry.binding, samplerType); + break; + case SpirvShaderResourceType::TextureCube: + LOG(Info, " > [{}] texture Cube ({})", entry.binding, samplerType); + break; + case SpirvShaderResourceType::Texture2DArray: + LOG(Info, " > [{}] texture 2D array ({})", entry.binding, samplerType); + break; + } + } +#endif switch (descriptor.ResourceType) { case SpirvShaderResourceType::Texture1D: @@ -326,11 +414,19 @@ void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX entry.buffer.type = WGPUBufferBindingType_ReadOnlyStorage; else entry.buffer.type = WGPUBufferBindingType_Storage; +#if WEBGPU_LOG_PSO + if (log) + LOG(Info, " > [{}] storage buffer (read-only = {}, dynamic = {})", entry.binding, entry.buffer.type == WGPUBufferBindingType_ReadOnlyStorage, entry.buffer.hasDynamicOffset); +#endif break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: entry.buffer.hasDynamicOffset = true; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: entry.buffer.type = WGPUBufferBindingType_Uniform; +#if WEBGPU_LOG_PSO + if (log) + LOG(Info, " > [{}] uniform buffer (dynamic = {})", entry.binding, entry.buffer.hasDynamicOffset); +#endif break; default: #if GPU_ENABLE_DIAGNOSTICS @@ -414,33 +510,33 @@ bool GPUPipelineStateWebGPU::Init(const Description& desc) } } PipelineDesc.multisample.alphaToCoverageEnabled = desc.BlendMode.AlphaToCoverageEnable; - PipelineDesc.fragment = &_fragmentDesc; - _fragmentDesc = WGPU_FRAGMENT_STATE_INIT; - _fragmentDesc.targets = _colorTargets; - Platform::MemoryClear(&_colorTargets, sizeof(_colorTargets)); - if (desc.BlendMode.BlendEnable) - { - _blendState = WGPU_BLEND_STATE_INIT; - _blendState.color = ToBlendComponent(desc.BlendMode.BlendOp, desc.BlendMode.SrcBlend, desc.BlendMode.DestBlend); - _blendState.alpha = ToBlendComponent(desc.BlendMode.BlendOpAlpha, desc.BlendMode.SrcBlendAlpha, desc.BlendMode.DestBlendAlpha); - for (auto& e : _colorTargets) - e.blend = &_blendState; - } - WGPUColorWriteMask writeMask = WGPUColorWriteMask_All; - if (desc.BlendMode.RenderTargetWriteMask != BlendingMode::ColorWrite::All) - { - writeMask = 0; - if (EnumHasAllFlags(desc.BlendMode.RenderTargetWriteMask, BlendingMode::ColorWrite::Red)) - writeMask |= WGPUColorWriteMask_Red; - if (EnumHasAllFlags(desc.BlendMode.RenderTargetWriteMask, BlendingMode::ColorWrite::Green)) - writeMask |= WGPUColorWriteMask_Green; - if (EnumHasAllFlags(desc.BlendMode.RenderTargetWriteMask, BlendingMode::ColorWrite::Blue)) - writeMask |= WGPUColorWriteMask_Blue; - if (EnumHasAllFlags(desc.BlendMode.RenderTargetWriteMask, BlendingMode::ColorWrite::Alpha)) - writeMask |= WGPUColorWriteMask_Alpha; - } if (desc.PS) { + PipelineDesc.fragment = &_fragmentDesc; + _fragmentDesc = WGPU_FRAGMENT_STATE_INIT; + _fragmentDesc.targets = _colorTargets; + Platform::MemoryClear(&_colorTargets, sizeof(_colorTargets)); + if (desc.BlendMode.BlendEnable) + { + _blendState = WGPU_BLEND_STATE_INIT; + _blendState.color = ToBlendComponent(desc.BlendMode.BlendOp, desc.BlendMode.SrcBlend, desc.BlendMode.DestBlend); + _blendState.alpha = ToBlendComponent(desc.BlendMode.BlendOpAlpha, desc.BlendMode.SrcBlendAlpha, desc.BlendMode.DestBlendAlpha); + for (auto& e : _colorTargets) + e.blend = &_blendState; + } + WGPUColorWriteMask writeMask = WGPUColorWriteMask_All; + if (desc.BlendMode.RenderTargetWriteMask != BlendingMode::ColorWrite::All) + { + writeMask = 0; + if (EnumHasAllFlags(desc.BlendMode.RenderTargetWriteMask, BlendingMode::ColorWrite::Red)) + writeMask |= WGPUColorWriteMask_Red; + if (EnumHasAllFlags(desc.BlendMode.RenderTargetWriteMask, BlendingMode::ColorWrite::Green)) + writeMask |= WGPUColorWriteMask_Green; + if (EnumHasAllFlags(desc.BlendMode.RenderTargetWriteMask, BlendingMode::ColorWrite::Blue)) + writeMask |= WGPUColorWriteMask_Blue; + if (EnumHasAllFlags(desc.BlendMode.RenderTargetWriteMask, BlendingMode::ColorWrite::Alpha)) + writeMask |= WGPUColorWriteMask_Alpha; + } uint16 outputsCount = desc.PS->GetBindings().OutputsCount; for (uint16 rtIndex = 0; rtIndex < outputsCount; rtIndex++) _colorTargets[rtIndex].writeMask = writeMask; diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h index ad77afa43..261015797 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUPipelineStateWebGPU.h @@ -28,7 +28,7 @@ public: uint8 RenderTargetFormats[GPU_MAX_RT_BINDED]; class GPUVertexLayoutWebGPU* VertexLayout; }; - uint64 Packed[3]; + uint64 Packed[2]; }; FORCE_INLINE bool operator==(const Key& other) const diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUSamplerWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUSamplerWebGPU.cpp index 218c35a72..6e4bb7851 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUSamplerWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUSamplerWebGPU.cpp @@ -41,19 +41,19 @@ bool GPUSamplerWebGPU::OnInit() switch (_desc.Filter) { case GPUSamplerFilter::Point: - samplerDesc.magFilter = samplerDesc.magFilter = WGPUFilterMode_Nearest; + samplerDesc.magFilter = samplerDesc.minFilter = WGPUFilterMode_Nearest; samplerDesc.mipmapFilter = WGPUMipmapFilterMode_Nearest; break; case GPUSamplerFilter::Bilinear: - samplerDesc.magFilter = samplerDesc.magFilter = WGPUFilterMode_Linear; + samplerDesc.magFilter = samplerDesc.minFilter = WGPUFilterMode_Linear; samplerDesc.mipmapFilter = WGPUMipmapFilterMode_Nearest; break; case GPUSamplerFilter::Trilinear: - samplerDesc.magFilter = samplerDesc.magFilter = WGPUFilterMode_Linear; + samplerDesc.magFilter = samplerDesc.minFilter = WGPUFilterMode_Linear; samplerDesc.mipmapFilter = WGPUMipmapFilterMode_Linear; break; case GPUSamplerFilter::Anisotropic: - samplerDesc.magFilter = samplerDesc.magFilter = WGPUFilterMode_Linear; + samplerDesc.magFilter = samplerDesc.minFilter = WGPUFilterMode_Linear; samplerDesc.mipmapFilter = WGPUMipmapFilterMode_Linear; break; } diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUSwapChainWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUSwapChainWebGPU.cpp index bc2aefbdb..2414f3906 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUSwapChainWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUSwapChainWebGPU.cpp @@ -70,7 +70,7 @@ GPUTextureView* GPUSwapChainWebGPU::GetBackBufferView() viewDesc.arrayLayerCount = 1; viewDesc.aspect = WGPUTextureAspect_All; viewDesc.usage = wgpuTextureGetUsage(surfaceTexture.texture); - _surfaceView.Create(surfaceTexture.texture, &viewDesc); + _surfaceView.Create(surfaceTexture.texture, viewDesc); } return &_surfaceView; } diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.cpp b/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.cpp index 0351a37fd..6b1dfe626 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.cpp +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.cpp @@ -32,12 +32,38 @@ WGPUTextureFormat DropStencil(WGPUTextureFormat format) } } -void GPUTextureViewWebGPU::Create(WGPUTexture texture, WGPUTextureViewDescriptor const* desc) +void SetWebGPUTextureViewSampler(GPUTextureView* view, uint32 samplerType) +{ + ((GPUTextureViewWebGPU*)view)->SampleType = (WGPUTextureSampleType)samplerType; +} + +void GPUTextureViewWebGPU::Create(WGPUTexture texture, const WGPUTextureViewDescriptor& desc) { if (View) wgpuTextureViewRelease(View); Texture = texture; - View = wgpuTextureCreateView(texture, desc); + + auto viewDesc = desc; + auto renderDesc = desc; + auto separateViews = false; + + // Render views cannot have more than 1 mip levels count + if (desc.usage & WGPUTextureUsage_RenderAttachment && renderDesc.mipLevelCount > 1) + { + renderDesc.mipLevelCount = 1; + separateViews = true; + } + + // Depth-stencil textures expose depth-only when binding to shaders (unless via custom _handleStencil view) so make separate ViewRender for rendering with all components + if (desc.aspect == WGPUTextureAspect_All && ::HasStencil(desc.format)) + { + viewDesc.aspect = WGPUTextureAspect_DepthOnly; + viewDesc.format = DropStencil(viewDesc.format); + separateViews = true; + } + + // Create views + View = wgpuTextureCreateView(texture, &viewDesc); if (!View) { #if GPU_ENABLE_RESOURCE_NAMING @@ -46,18 +72,13 @@ void GPUTextureViewWebGPU::Create(WGPUTexture texture, WGPUTextureViewDescriptor LOG(Error, "Failed to create a view for texture"); #endif } - ViewRender = View; + if (separateViews) + ViewRender = wgpuTextureCreateView(texture, &renderDesc); + else + ViewRender = View; - // Depth-stencil textures expose depth-only when binding to shaders (unless via custom _handleStencil view) so make separate ViewRender for rendering with all components - if (desc && desc->aspect == WGPUTextureAspect_All && ::HasStencil(desc->format)) - { - auto depthOnlyDesc = *desc; - depthOnlyDesc.aspect = WGPUTextureAspect_DepthOnly; - depthOnlyDesc.format = DropStencil(depthOnlyDesc.format); - View = wgpuTextureCreateView(texture, &depthOnlyDesc); - } - - Format = desc ? desc->format : wgpuTextureGetFormat(texture); + // Cache metadata + Format = desc.format; switch (Format) { case WGPUTextureFormat_Depth16Unorm: @@ -76,6 +97,8 @@ void GPUTextureViewWebGPU::Create(WGPUTexture texture, WGPUTextureViewDescriptor SampleType = WGPUTextureSampleType_Undefined; break; } + RenderSize.Width = Math::Max(wgpuTextureGetWidth(Texture) >> renderDesc.baseMipLevel, 1); + RenderSize.Height = Math::Max(wgpuTextureGetHeight(Texture) >> renderDesc.baseMipLevel, 1); } void GPUTextureViewWebGPU::Release() @@ -113,21 +136,22 @@ bool GPUTextureWebGPU::OnInit() textureDesc.usage |= WGPUTextureUsage_RenderAttachment; textureDesc.size.width = _desc.Width; textureDesc.size.height = _desc.Height; - textureDesc.size.depthOrArrayLayers = _desc.Depth; switch (_desc.Dimensions) { case TextureDimensions::Texture: _viewDimension = IsArray() ? WGPUTextureViewDimension_2DArray : WGPUTextureViewDimension_2D; textureDesc.dimension = WGPUTextureDimension_2D; + textureDesc.size.depthOrArrayLayers = _desc.ArraySize; break; case TextureDimensions::VolumeTexture: _viewDimension = WGPUTextureViewDimension_3D; textureDesc.dimension = WGPUTextureDimension_3D; + textureDesc.size.depthOrArrayLayers = _desc.Depth; break; case TextureDimensions::CubeTexture: _viewDimension = _desc.ArraySize > 6 ? WGPUTextureViewDimension_CubeArray : WGPUTextureViewDimension_Cube; textureDesc.dimension = WGPUTextureDimension_2D; - textureDesc.size.depthOrArrayLayers *= 6; // Each cubemap uses 6 array slices + textureDesc.size.depthOrArrayLayers = _desc.ArraySize; break; } textureDesc.format = RenderToolsWebGPU::ToTextureFormat(Format()); @@ -136,7 +160,7 @@ bool GPUTextureWebGPU::OnInit() textureDesc.viewFormats = &textureDesc.format; textureDesc.viewFormatCount = 1; _format = textureDesc.format; - _usage = textureDesc.usage; + Usage = textureDesc.usage; Texture = wgpuDeviceCreateTexture(_device->Device, &textureDesc); if (!Texture) return true; @@ -179,7 +203,7 @@ void GPUTextureWebGPU::OnResidentMipsChanged() // Update the view to handle base mip level as highest resident mip WGPUTextureViewDescriptor viewDesc = WGPU_TEXTURE_VIEW_DESCRIPTOR_INIT; viewDesc.format = _format; - viewDesc.usage = _usage; + viewDesc.usage = Usage; viewDesc.dimension = _viewDimension; viewDesc.baseMipLevel = MipLevels() - ResidentMipLevels(); viewDesc.mipLevelCount = ResidentMipLevels(); @@ -188,7 +212,7 @@ void GPUTextureWebGPU::OnResidentMipsChanged() GPUTextureViewWebGPU& view = IsVolume() ? _handleVolume : _handlesPerSlice[0]; if (view.GetParent() == nullptr) view.Init(this, _desc.Format, _desc.MultiSampleLevel); - view.Create(Texture, &viewDesc); + view.Create(Texture, viewDesc); } void GPUTextureWebGPU::OnReleaseGPU() @@ -220,7 +244,7 @@ void GPUTextureWebGPU::InitHandles() viewDesc.label = { _name.Get(), (size_t)_name.Length() }; #endif viewDesc.format = _format; - viewDesc.usage = _usage; + viewDesc.usage = Usage; viewDesc.dimension = _viewDimension; viewDesc.mipLevelCount = MipLevels(); viewDesc.arrayLayerCount = ArraySize(); @@ -235,7 +259,7 @@ void GPUTextureWebGPU::InitHandles() { auto& view = _handleVolume; view.Init(this, format, msaa); - view.Create(Texture, &viewDesc); + view.Create(Texture, viewDesc); } // Init per slice views @@ -249,7 +273,7 @@ void GPUTextureWebGPU::InitHandles() //viewDesc.arrayLayerCount = 1; auto& view = _handlesPerSlice[sliceIndex]; view.Init(this, format, msaa); - view.Create(Texture, &viewDesc); + view.Create(Texture, viewDesc); view.DepthSlice = sliceIndex; } } @@ -263,7 +287,7 @@ void GPUTextureWebGPU::InitHandles() { auto& view = _handleArray; view.Init(this, format, msaa); - view.Create(Texture, &viewDesc); + view.Create(Texture, viewDesc); } // Create per array slice handles @@ -275,7 +299,7 @@ void GPUTextureWebGPU::InitHandles() viewDesc.arrayLayerCount = 1; auto& view = _handlesPerSlice[arrayIndex]; view.Init(this, format, msaa); - view.Create(Texture, &viewDesc); + view.Create(Texture, viewDesc); } viewDesc.baseArrayLayer = 0; viewDesc.arrayLayerCount = MipLevels(); @@ -287,7 +311,7 @@ void GPUTextureWebGPU::InitHandles() _handlesPerSlice.Resize(1, false); auto& view = _handlesPerSlice[0]; view.Init(this, format, msaa); - view.Create(Texture, &viewDesc); + view.Create(Texture, viewDesc); } // Init per mip map handles @@ -308,7 +332,7 @@ void GPUTextureWebGPU::InitHandles() auto& view = slice[mipIndex]; viewDesc.baseMipLevel = mipIndex; view.Init(this, format, msaa); - view.Create(Texture, &viewDesc); + view.Create(Texture, viewDesc); } } viewDesc.dimension = _viewDimension; @@ -319,7 +343,7 @@ void GPUTextureWebGPU::InitHandles() { auto& view = _handleReadOnlyDepth; view.Init(this, format, msaa); - view.Create(Texture, &viewDesc); + view.Create(Texture, viewDesc); view.ReadOnly = true; } @@ -339,7 +363,7 @@ void GPUTextureWebGPU::InitHandles() viewDesc.aspect = WGPUTextureAspect_StencilOnly; viewDesc.format = WGPUTextureFormat_Stencil8; _handleStencil.Init(this, stencilFormat, msaa); - _handleStencil.Create(Texture, &viewDesc); + _handleStencil.Create(Texture, viewDesc); } } diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.h index 0ab91f3e1..2f9a3b329 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUTextureWebGPU.h @@ -9,6 +9,28 @@ #if GRAPHICS_API_WEBGPU +struct GPUTextureViewSizeWebGPU +{ + union + { + struct + { + uint16 Width, Height; + }; + uint32 Packed = 0; + }; + + FORCE_INLINE void Set(GPUTextureViewSizeWebGPU other) + { + if (Packed == 0) + Packed = other.Packed; + else + { + ASSERT(Packed == other.Packed); + } + } +}; + /// /// The texture view for Web GPU backend. /// @@ -36,13 +58,14 @@ public: bool HasStencil = false; bool ReadOnly = false; uint32 DepthSlice = WGPU_DEPTH_SLICE_UNDEFINED; + GPUTextureViewSizeWebGPU RenderSize; WGPUTextureFormat Format = WGPUTextureFormat_Undefined; WGPUTextureSampleType SampleType = WGPUTextureSampleType_Undefined; GPUResourceViewPtrWebGPU Ptr; public: using GPUTextureView::Init; - void Create(WGPUTexture texture, WGPUTextureViewDescriptor const* desc = nullptr); + void Create(WGPUTexture texture, const WGPUTextureViewDescriptor& desc); void Release(); public: @@ -70,7 +93,6 @@ private: #endif WGPUTextureFormat _format = WGPUTextureFormat_Undefined; WGPUTextureViewDimension _viewDimension = WGPUTextureViewDimension_Undefined; - WGPUTextureUsage _usage = 0; public: GPUTextureWebGPU(GPUDeviceWebGPU* device, const StringView& name) @@ -81,6 +103,8 @@ public: public: // Handle to the WebGPU texture object. WGPUTexture Texture = nullptr; + // Usage flags fo the created texture. + WGPUTextureUsage Usage = 0; public: // [GPUTexture] diff --git a/Source/Engine/GraphicsDevice/WebGPU/GPUVertexLayoutWebGPU.h b/Source/Engine/GraphicsDevice/WebGPU/GPUVertexLayoutWebGPU.h index a2ca54b2a..44844a4a2 100644 --- a/Source/Engine/GraphicsDevice/WebGPU/GPUVertexLayoutWebGPU.h +++ b/Source/Engine/GraphicsDevice/WebGPU/GPUVertexLayoutWebGPU.h @@ -14,10 +14,6 @@ class GPUVertexLayoutWebGPU : public GPUResourceBaseGetParent(); - const bool isCubemap = texture.Type == MaterialParameterType::CubeTexture; - const bool isVolume = texture.Type == MaterialParameterType::GPUTextureVolume; - const bool isArray = texture.Type == MaterialParameterType::GPUTextureArray; // Check if has variable assigned and it's a valid type if (texture.Type != MaterialParameterType::Texture diff --git a/Source/Engine/Renderer/AmbientOcclusionPass.cpp b/Source/Engine/Renderer/AmbientOcclusionPass.cpp index a325f5f96..437b3077d 100644 --- a/Source/Engine/Renderer/AmbientOcclusionPass.cpp +++ b/Source/Engine/Renderer/AmbientOcclusionPass.cpp @@ -121,6 +121,7 @@ bool AmbientOcclusionPass::setupResources() // Create pipeline states auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; + psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::Red; if (!_psPrepareDepths->IsValid()) { psDesc.PS = shader->GetPS("PS_PrepareDepths"); @@ -144,6 +145,7 @@ bool AmbientOcclusionPass::setupResources() return true; } } + psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::RG; for (int32 i = 0; i < ARRAY_COUNT(_psGenerate); i++) { if (!_psGenerate[i]->IsValid()) @@ -174,7 +176,7 @@ bool AmbientOcclusionPass::setupResources() return true; } psDesc.BlendMode = BlendingMode::Multiply; - psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::Alpha; + psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::Alpha; // Write only into AO channel in GBuffer if (_depthBounds) { psDesc.DepthEnable = psDesc.DepthBoundsEnable = true; @@ -278,7 +280,7 @@ void AmbientOcclusionPass::Render(RenderContext& renderContext) GPUTextureView* depthBufferApply = _depthBounds ? renderContext.Buffers->DepthBuffer->ViewReadOnlyDepth() : nullptr; // Request temporary buffers - GPUTexture* m_halfDepths[4]; + GPUTexture* m_halfDepths[4] = {}; GPUTexture* m_pingPongHalfResultA; GPUTexture* m_pingPongHalfResultB; GPUTexture* m_finalResults; @@ -286,6 +288,8 @@ void AmbientOcclusionPass::Render(RenderContext& renderContext) GPUTextureDescription tempDesc; for (int i = 0; i < 4; i++) { + if (settings.SkipHalfPixels && (i == 1 || i == 2)) + continue; #if SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET < 99 tempDesc = GPUTextureDescription::New2D(m_halfSizeX, m_halfSizeY, 0, SSAO_DEPTH_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::RenderTarget | GPUTextureFlags::PerMipViews); #else @@ -407,7 +411,7 @@ void AmbientOcclusionPass::Render(RenderContext& renderContext) } // Only do mipmaps for higher quality levels (not beneficial on quality level 1, and detrimental on quality level 0) - if (settings.QualityLevel > 1 && SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET < 99) + if (settings.QualityLevel > 1 && SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET < 99 && !settings.SkipHalfPixels) { for (int i = 1; i < SSAO_DEPTH_MIP_LEVELS; i++) { diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 1b3cb89eb..087398e49 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -395,6 +395,7 @@ void RenderList::RunPostFxPass(GPUContext* context, RenderContext& renderContext auto material = Settings.PostFxMaterials.Materials[i].Get(); if (material && material->IsReady() && material->IsPostFx() && material->GetInfo().PostFxLocation == locationA) { + context->ResetSR(); ASSERT(needTempTarget); context->SetRenderTarget(*output); bindParams.Input = *input; @@ -431,6 +432,7 @@ void RenderList::RunPostFxPass(GPUContext* context, RenderContext& renderContext if (needTempTarget) RenderTargetPool::Release(output); + context->ResetSR(); } void RenderList::RunMaterialPostFxPass(GPUContext* context, RenderContext& renderContext, MaterialPostFxLocation location, GPUTexture*& input, GPUTexture*& output) @@ -441,6 +443,7 @@ void RenderList::RunMaterialPostFxPass(GPUContext* context, RenderContext& rende auto material = Settings.PostFxMaterials.Materials[i].Get(); if (material && material->IsReady() && material->IsPostFx() && material->GetInfo().PostFxLocation == location) { + context->ResetSR(); context->SetRenderTarget(*output); bindParams.Input = *input; material->Bind(bindParams); @@ -459,6 +462,7 @@ void RenderList::RunCustomPostFxPass(GPUContext* context, RenderContext& renderC { if (fx->Location == location) { + context->ResetSR(); if (fx->UseSingleTarget || output == nullptr) { fx->Render(context, renderContext, input, nullptr); diff --git a/Source/Engine/Renderer/ShadowsPass.cpp b/Source/Engine/Renderer/ShadowsPass.cpp index 91f2d3832..825e65661 100644 --- a/Source/Engine/Renderer/ShadowsPass.cpp +++ b/Source/Engine/Renderer/ShadowsPass.cpp @@ -1178,6 +1178,11 @@ void ShadowsPass::SetupShadows(RenderContext& renderContext, RenderContextBatch& LOG(Fatal, "Failed to setup shadow map of size {0}x{1} and format {2}", desc.Width, desc.Height, ScriptingEnum::ToString(desc.Format)); return; } +#if PLATFORM_WEB + // Hack to fix WebGPU limitation that requires to specify different sampler type manually to sample depth texture + void SetWebGPUTextureViewSampler(GPUTextureView * view, uint32 samplerType); + SetWebGPUTextureViewSampler(shadows.ShadowMapAtlas->View(), 0x00000004); // WGPUTextureSampleType_Depth +#endif shadows.ClearShadowMapAtlas = true; shadows.Resolution = atlasResolution; shadows.ViewOrigin = renderContext.View.Origin; diff --git a/Source/Engine/Renderer/Utils/MultiScaler.cpp b/Source/Engine/Renderer/Utils/MultiScaler.cpp index c35aac08e..acfe9b001 100644 --- a/Source/Engine/Renderer/Utils/MultiScaler.cpp +++ b/Source/Engine/Renderer/Utils/MultiScaler.cpp @@ -68,13 +68,14 @@ bool MultiScaler::setupResources() } if (!_psHalfDepth.IsValid()) { + psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::Red; psDesc.PS = shader->GetPS("PS_HalfDepth", 0); if (_psHalfDepth[0]->Init(psDesc)) return true; psDesc.PS = shader->GetPS("PS_HalfDepth", 2); - psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::Red; if (_psHalfDepth[2]->Init(psDesc)) return true; + psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::None; psDesc.PS = shader->GetPS("PS_HalfDepth", 1); psDesc.DepthWriteEnable = true; psDesc.DepthEnable = true; @@ -214,9 +215,13 @@ void MultiScaler::Filter(FilterMode mode, GPUContext* context, int32 width, int3 void MultiScaler::DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstHeight, GPUTexture* src, GPUTextureView* dst) { PROFILE_GPU_CPU("Downscale Depth"); + bool outputDepth = ((GPUTexture*)dst->GetParent())->IsDepthStencil(); if (checkIfSkipPass()) { - context->ClearDepth(dst); + if (outputDepth) + context->ClearDepth(dst); + else + context->Clear(dst, Color::Transparent); return; } @@ -224,14 +229,16 @@ void MultiScaler::DownscaleDepth(GPUContext* context, int32 dstWidth, int32 dstH Data data; data.TexelSize.X = 1.0f / (float)src->Width(); data.TexelSize.Y = 1.0f / (float)src->Height(); - bool outputDepth = ((GPUTexture*)dst->GetParent())->IsDepthStencil(); auto cb = _shader->GetShader()->GetCB(0); context->UpdateCB(cb, &data); context->BindCB(0, cb); // Draw context->SetViewportAndScissors((float)dstWidth, (float)dstHeight); - context->SetRenderTarget(dst, (GPUTextureView*)nullptr); + if (outputDepth) + context->SetRenderTarget(dst, (GPUTextureView*)nullptr); + else + context->SetRenderTarget(dst); context->BindSR(0, src); context->SetState(_psHalfDepth[outputDepth ? 1 : 0]); context->DrawFullscreenTriangle(); diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp index 13b835185..09ad93cec 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp @@ -27,8 +27,6 @@ namespace MaterialValue* MaterialGenerator::sampleTextureRaw(Node* caller, Value& value, Box* box, SerializedMaterialParam* texture) { ASSERT(texture && box); - - // Cache data const auto parent = box->GetParent>(); const bool isCubemap = texture->Type == MaterialParameterType::CubeTexture; const bool isArray = texture->Type == MaterialParameterType::GPUTextureArray; @@ -101,7 +99,13 @@ MaterialValue* MaterialGenerator::sampleTextureRaw(Node* caller, Value& value, B const Char* sampler = TEXT("SamplerLinearWrap"); // Sample texture - if (isNormalMap) + if (texture->AsInteger == (int32)MaterialSceneTextures::SceneDepth) + { + // Sample depth buffer + String sampledValue = String::Format(TEXT("SAMPLE_RT_DEPTH({0}, {1})"), texture->ShaderName, uv); + valueBox->Cache = writeLocal(VariantType::Float, sampledValue, parent); + } + else if (isNormalMap) { const Char* format = canUseSample ? TEXT("{0}.Sample({1}, {2}).xyz") : TEXT("{0}.SampleLevel({1}, {2}, {3}).xyz"); diff --git a/Source/Shaders/Common.hlsl b/Source/Shaders/Common.hlsl index 698320429..eaf4793e5 100644 --- a/Source/Shaders/Common.hlsl +++ b/Source/Shaders/Common.hlsl @@ -74,12 +74,20 @@ #else #define CAN_USE_TESSELLATION 0 #endif + #if defined(WGSL) -// Wrap not supported read-only Buffer binded as shader resource into StructuredBuffer to be used as storage on WebGPU +// Alias read-only Buffer binded as shader resource into StructuredBuffer to be used as storage on WebGPU (not supported) #define CAN_USE_TYPED_BUFFER_LOADS 0 #define Buffer StructuredBuffer + +// Hack matrix multiplication order for WebGPU (row-major vs column-major bug?) +#define PROJECT_POINT(p, m) mul(m, p) + +// Stenil8 is in Red channel on WebGPU +#define STENCIL_BUFFER_SWIZZLE .r #else #define CAN_USE_TYPED_BUFFER_LOADS 1 +#define PROJECT_POINT(p, m) mul(p, m) #endif // Compiler attributes @@ -144,9 +152,9 @@ float4 LoadTextureWGSL(Texture2D tex, float2 uv) tex.GetDimensions(size.x, size.y); return tex.Load(uint3(size * uv, 0)); } -#define SAMPLE_RT_LOAD(rt, texCoord) LoadTextureWGSL(rt, texCoord) +#define SAMPLE_RT_DEPTH(rt, texCoord) LoadTextureWGSL(rt, texCoord).r #else -#define SAMPLE_RT_LOAD(rt, texCoord) SAMPLE_RT(rt, texCoord) +#define SAMPLE_RT_DEPTH(rt, texCoord) SAMPLE_RT(rt, texCoord).r #endif #define HDR_CLAMP_MAX 65472.0 #define PI 3.1415926535897932 diff --git a/Source/Shaders/DebugDraw.shader b/Source/Shaders/DebugDraw.shader index 127196318..4f1aa9ba5 100644 --- a/Source/Shaders/DebugDraw.shader +++ b/Source/Shaders/DebugDraw.shader @@ -21,7 +21,7 @@ META_VS(true, FEATURE_LEVEL_ES2) VS2PS VS(float3 Position : POSITION, float4 Color : COLOR) { VS2PS output; - output.Position = mul(float4(Position, 1), ViewProjection); + output.Position = PROJECT_POINT(float4(Position, 1), ViewProjection); output.Position.z += ClipPosZBias; output.Color = Color; return output; diff --git a/Source/Shaders/Fog.shader b/Source/Shaders/Fog.shader index bc4f8253f..ce5a95a00 100644 --- a/Source/Shaders/Fog.shader +++ b/Source/Shaders/Fog.shader @@ -34,7 +34,7 @@ META_PERMUTATION_1(VOLUMETRIC_FOG=1) float4 PS_Fog(Quad_VS2PS input) : SV_Target0 { // Get world space position at given pixel coordinate - float rawDepth = SAMPLE_RT_LOAD(Depth, input.TexCoord).r; + float rawDepth = SAMPLE_RT_DEPTH(Depth, input.TexCoord); GBufferData gBufferData = GetGBufferData(); float3 viewPos = GetViewPos(gBufferData, input.TexCoord, rawDepth); float3 worldPos = mul(float4(viewPos, 1), gBufferData.InvViewMatrix).xyz; diff --git a/Source/Shaders/GBuffer.hlsl b/Source/Shaders/GBuffer.hlsl index 41cdaca02..c63a1bb05 100644 --- a/Source/Shaders/GBuffer.hlsl +++ b/Source/Shaders/GBuffer.hlsl @@ -56,7 +56,7 @@ float3 GetWorldPos(GBufferData gBuffer, float2 uv, float deviceDepth) // Sample raw device depth buffer float SampleZ(float2 uv) { - return SAMPLE_RT_LOAD(Depth, uv).r; + return SAMPLE_RT_DEPTH(Depth, uv); } // Sample linear depth diff --git a/Source/Shaders/GUI.shader b/Source/Shaders/GUI.shader index 5d02cd60e..16f961839 100644 --- a/Source/Shaders/GUI.shader +++ b/Source/Shaders/GUI.shader @@ -31,7 +31,7 @@ VS2PS VS(Render2DVertex input) if ((int)input.CustomDataAndClipOrigin.y & RENDER2D_FEATURE_VERTEX_SNAPPING) input.Position = (float2)(int2)input.Position; - output.Position = mul(float4(input.Position, 0, 1), ViewProjection); + output.Position = PROJECT_POINT(float4(input.Position, 0, 1), ViewProjection); output.Color = input.Color; output.TexCoord = input.TexCoord; output.ClipOriginAndPos = float4(input.CustomDataAndClipOrigin.zw, input.Position); @@ -116,7 +116,7 @@ float4 PS_Downscale(Quad_VS2PS input) : SV_Target0 { float2 boundsPos = input.TexCoord * Bounds.zw + Bounds.xy; - float4 clipPos = mul(float4(boundsPos, 0, 1), ViewProjection); + float4 clipPos = PROJECT_POINT(float4(boundsPos, 0, 1), ViewProjection); clipPos.xy /= clipPos.w; float2 uvPos = ClipToUv(clipPos.xy); diff --git a/Source/Shaders/Gather.hlsl b/Source/Shaders/Gather.hlsl index f96d3a817..8b2d26d11 100644 --- a/Source/Shaders/Gather.hlsl +++ b/Source/Shaders/Gather.hlsl @@ -96,4 +96,21 @@ float4 TextureGatherBlue(Texture2D tex, SamplerState sam, float2 uv) #endif } +float4 TextureGatherDepth(Texture2D tex, float2 uv) +{ +#if defined(WGSL) + // WebGPU doesn't allow to sample depth texture with regular sampler, need to use Load instead of Sample and get texture size for UV to pixel coordinate conversion + uint2 size; + tex.GetDimensions(size.x, size.y); + uint2 coord = (uint2)((float2)size * uv - 0.5f); + float x = tex.Load(uint3(coord + uint2(0, 1), 0)).x; + float y = tex.Load(uint3(coord + uint2(1, 1), 0)).x; + float z = tex.Load(uint3(coord + uint2(1, 0), 0)).x; + float w = tex.Load(uint3(coord + uint2(0, 0), 0)).x; + return float4(x, y, z, w); +#else + return TextureGatherRed(tex, SamplerPointClamp, uv); +#endif +} + #endif diff --git a/Source/Shaders/Lights.shader b/Source/Shaders/Lights.shader index 3a17f490c..0b4b77bea 100644 --- a/Source/Shaders/Lights.shader +++ b/Source/Shaders/Lights.shader @@ -34,7 +34,7 @@ META_VS_IN_ELEMENT(POSITION, 0, R32G32B32_FLOAT, 0, 0, PER_VERTEX, 0, true) Model_VS2PS VS_Model(ModelInput_PosOnly input) { Model_VS2PS output; - output.Position = mul(float4(input.Position.xyz, 1), WVP); + output.Position = PROJECT_POINT(float4(input.Position.xyz, 1), WVP); output.ScreenPos = output.Position; return output; } diff --git a/Source/Shaders/MotionBlur.shader b/Source/Shaders/MotionBlur.shader index c89daf40f..1b33210d0 100644 --- a/Source/Shaders/MotionBlur.shader +++ b/Source/Shaders/MotionBlur.shader @@ -37,7 +37,7 @@ META_PS(true, FEATURE_LEVEL_ES2) float4 PS_CameraMotionVectors(Quad_VS2PS input) : SV_Target { // Get the pixel world space position - float deviceDepth = SAMPLE_RT(Input0, input.TexCoord).r; + float deviceDepth = SAMPLE_RT_DEPTH(Input0, input.TexCoord); GBufferData gBufferData = GetGBufferData(); float4 worldPos = float4(GetWorldPos(gBufferData, input.TexCoord, deviceDepth), 1); @@ -204,7 +204,7 @@ float4 PS_MotionBlur(Quad_VS2PS input) : SV_Target // Sample pixel depth GBufferData gBufferData = GetGBufferData(); - float pixelDepth = LinearizeZ(gBufferData, SAMPLE_RT_LOAD(Input3, input.TexCoord).x); + float pixelDepth = LinearizeZ(gBufferData, SAMPLE_RT_DEPTH(Input3, input.TexCoord)); // Calculate noise to make it look better with less samples per pixel float noise = FullscreenGradientNoise(input.TexCoord); @@ -229,12 +229,12 @@ float4 PS_MotionBlur(Quad_VS2PS input) : SV_Target float weight1 = 1; float weight2 = 1; #else - float depth1 = LinearizeZ(gBufferData, SAMPLE_RT_LOAD(Input3, sampleUV1).x); + float depth1 = LinearizeZ(gBufferData, SAMPLE_RT_DEPTH(Input3, sampleUV1)); float2 velocity1 = Input1.SampleLevel(SamplerPointClamp, sampleUV1, 0).xy; velocity1 = ClampVelocity(velocity1); float velocityLength1 = length(velocity1); - float depth2 = LinearizeZ(gBufferData, SAMPLE_RT_LOAD(Input3, sampleUV2).x); + float depth2 = LinearizeZ(gBufferData, SAMPLE_RT_DEPTH(Input3, sampleUV2)); float2 velocity2 = Input1.SampleLevel(SamplerPointClamp, sampleUV2, 0).xy; velocity2 = ClampVelocity(velocity2); float velocityLength2 = length(velocity2); diff --git a/Source/Shaders/MultiScaler.shader b/Source/Shaders/MultiScaler.shader index ed3138567..828483198 100644 --- a/Source/Shaders/MultiScaler.shader +++ b/Source/Shaders/MultiScaler.shader @@ -9,7 +9,7 @@ float2 Padding; META_CB_END // Use linear sampling (less texture fetches required) -#define SAMPLE(rt, texCoord) SAMPLE_RT_LINEAR(rt, texCoord) +#define SAMPLE_BLUR(rt, texCoord) SAMPLE_RT_LINEAR(rt, texCoord) Texture2D Input : register(t0); @@ -26,7 +26,7 @@ float PS_HalfDepth(Quad_VS2PS input) #endif { // Load 4 depth values (2x2 quad) - float4 depths = TextureGatherRed(Input, SamplerPointClamp, input.TexCoord); + float4 depths = TextureGatherDepth(Input, input.TexCoord); #if HZB_CLOSEST return min(depths.x, min(depths.y, min(depths.z, depths.w))); @@ -50,7 +50,7 @@ float4 PS_Blur5(Quad_VS2PS input) : SV_Target0 0.35294118 }; - float4 color = SAMPLE(Input, input.TexCoord) * weights[0]; + float4 color = SAMPLE_BLUR(Input, input.TexCoord) * weights[0]; UNROLL for (int i = 1; i < 2; i++) @@ -61,8 +61,8 @@ float4 PS_Blur5(Quad_VS2PS input) : SV_Target0 float2 texCoordOffset = float2(0, offsets[i]) * TexelSize; #endif - color += (SAMPLE(Input, input.TexCoord + texCoordOffset) - + SAMPLE(Input, input.TexCoord - texCoordOffset)) + color += (SAMPLE_BLUR(Input, input.TexCoord + texCoordOffset) + + SAMPLE_BLUR(Input, input.TexCoord - texCoordOffset)) * weights[i]; } @@ -86,7 +86,7 @@ float4 PS_Blur9(Quad_VS2PS input) : SV_Target0 0.07027027 }; - float4 color = SAMPLE(Input, input.TexCoord) * weights[0]; + float4 color = SAMPLE_BLUR(Input, input.TexCoord) * weights[0]; UNROLL for (int i = 1; i < 3; i++) @@ -97,8 +97,8 @@ float4 PS_Blur9(Quad_VS2PS input) : SV_Target0 float2 texCoordOffset = float2(0, offsets[i]) * TexelSize; #endif - color += (SAMPLE(Input, input.TexCoord + texCoordOffset) - + SAMPLE(Input, input.TexCoord - texCoordOffset)) + color += (SAMPLE_BLUR(Input, input.TexCoord + texCoordOffset) + + SAMPLE_BLUR(Input, input.TexCoord - texCoordOffset)) * weights[i]; } @@ -124,7 +124,7 @@ float4 PS_Blur13(Quad_VS2PS input) : SV_Target0 0.01038136 }; - float4 color = SAMPLE(Input, input.TexCoord) * weights[0]; + float4 color = SAMPLE_BLUR(Input, input.TexCoord) * weights[0]; UNROLL for (int i = 1; i < 4; i++) @@ -135,8 +135,8 @@ float4 PS_Blur13(Quad_VS2PS input) : SV_Target0 float2 texCoordOffset = float2(0, offsets[i]) * TexelSize; #endif - color += (SAMPLE(Input, input.TexCoord + texCoordOffset) - + SAMPLE(Input, input.TexCoord - texCoordOffset)) + color += (SAMPLE_BLUR(Input, input.TexCoord + texCoordOffset) + + SAMPLE_BLUR(Input, input.TexCoord - texCoordOffset)) * weights[i]; } diff --git a/Source/Shaders/Quad.shader b/Source/Shaders/Quad.shader index a97109fde..815649ec8 100644 --- a/Source/Shaders/Quad.shader +++ b/Source/Shaders/Quad.shader @@ -70,7 +70,7 @@ Texture2D Source : register(t0); META_PS(true, FEATURE_LEVEL_ES2) float PS_DepthCopy(Quad_VS2PS input) : SV_Depth { - return Source.SampleLevel(SamplerPointClamp, input.TexCoord * Color.xy + Color.zw, 0).r; + return SAMPLE_RT_DEPTH(Source, input.TexCoord * Color.xy + Color.zw); } #endif diff --git a/Source/Shaders/Reflections.shader b/Source/Shaders/Reflections.shader index 11478c7d1..5a06853b8 100644 --- a/Source/Shaders/Reflections.shader +++ b/Source/Shaders/Reflections.shader @@ -31,7 +31,7 @@ META_VS_IN_ELEMENT(POSITION, 0, R32G32B32_FLOAT, 0, ALIGN, PER_VERTEX, 0, true) Model_VS2PS VS_Model(ModelInput_PosOnly input) { Model_VS2PS output; - output.Position = mul(float4(input.Position.xyz, 1), WVP); + output.Position = PROJECT_POINT(float4(input.Position.xyz, 1), WVP); output.ScreenPos = output.Position; return output; } diff --git a/Source/Shaders/SSAO.shader b/Source/Shaders/SSAO.shader index 91ddf50cb..28b7d39db 100644 --- a/Source/Shaders/SSAO.shader +++ b/Source/Shaders/SSAO.shader @@ -234,7 +234,7 @@ META_PS(true, FEATURE_LEVEL_ES2) void PS_PrepareDepthsHalf(in float4 inPos : SV_POSITION, out float out0 : SV_Target0, out float out1 : SV_Target1) { int3 baseCoord = int3(int2(inPos.xy) * InputDepthScale, 0); - float a = g_DepthSource.Load(baseCoord, int2(0, 0)).x; + float a = g_DepthSource.Load(baseCoord).x; float d = g_DepthSource.Load(baseCoord, int2(1, 1)).x; GBufferData gBufferData = GetGBufferData(); @@ -325,7 +325,7 @@ float3 LoadNormal(int2 pos) { float3 normalEncoded = g_NormalmapSource.Load(int3(pos, 0)).xyz; float3 normalWS = DecodeNormal(normalEncoded); - float3 normalVS = mul(normalWS, (float3x3)ViewMatrix); + float3 normalVS = PROJECT_POINT(normalWS, (float3x3)ViewMatrix); return normalVS; } @@ -333,7 +333,7 @@ float3 LoadNormal(int2 pos, int2 offset) { float3 normalEncoded = g_NormalmapSource.Load(int3(pos, 0), offset).xyz; float3 normalWS = DecodeNormal(normalEncoded); - float3 normalVS = mul(normalWS, (float3x3)ViewMatrix); + float3 normalVS = PROJECT_POINT(normalWS, (float3x3)ViewMatrix); return normalVS; } @@ -720,21 +720,21 @@ float2 SampleBlurred(float4 inPos, float2 coord) // Edge-sensitive blur META_PS(true, FEATURE_LEVEL_ES2) -float2 PS_SmartBlur(in float4 inPos : SV_POSITION, in float2 inUV : TEXCOORD0) : SV_Target +float2 PS_SmartBlur(in float4 inPos : SV_POSITION, in noperspective float2 inUV : TEXCOORD0) : SV_Target { return SampleBlurred(inPos, inUV); } // Edge-sensitive blur (wider kernel) META_PS(true, FEATURE_LEVEL_ES2) -float2 PS_SmartBlurWide(in float4 inPos : SV_POSITION, in float2 inUV : TEXCOORD0) : SV_Target +float2 PS_SmartBlurWide(in float4 inPos : SV_POSITION, in noperspective float2 inUV : TEXCOORD0) : SV_Target { return SampleBlurredWide(inPos, inUV); } // Edge-ignorant blur in x and y directions, 9 pixels touched (for the lowest quality level 0) META_PS(true, FEATURE_LEVEL_ES2) -float2 PS_NonSmartBlur(in float4 inPos : SV_POSITION, in float2 inUV : TEXCOORD0) : SV_Target +float2 PS_NonSmartBlur(in float4 inPos : SV_POSITION, in noperspective float2 inUV : TEXCOORD0) : SV_Target { float2 halfPixel = HalfViewportPixelSize * 0.5f; @@ -751,7 +751,7 @@ float2 PS_NonSmartBlur(in float4 inPos : SV_POSITION, in float2 inUV : TEXCOORD0 // Edge-ignorant blur & apply (for the lowest quality level 0) META_PS(true, FEATURE_LEVEL_ES2) -float4 PS_Apply(in float4 inPos : SV_POSITION, in float2 inUV : TEXCOORD0) : SV_Target +float4 PS_Apply(in float4 inPos : SV_POSITION, in noperspective float2 inUV : TEXCOORD0) : SV_Target { float a = g_FinalSSAO.SampleLevel(SamplerLinearClamp, float3(inUV.xy, 0), 0.0).x; float b = g_FinalSSAO.SampleLevel(SamplerLinearClamp, float3(inUV.xy, 1), 0.0).x; @@ -765,7 +765,7 @@ float4 PS_Apply(in float4 inPos : SV_POSITION, in float2 inUV : TEXCOORD0) : SV_ // Edge-ignorant blur & apply, skipping half pixels in checkerboard pattern META_PS(true, FEATURE_LEVEL_ES2) -float4 PS_ApplyHalf(in float4 inPos : SV_POSITION, in float2 inUV : TEXCOORD0) : SV_Target +float4 PS_ApplyHalf(in float4 inPos : SV_POSITION, in noperspective float2 inUV : TEXCOORD0) : SV_Target { float a = g_FinalSSAO.SampleLevel(SamplerLinearClamp, float3(inUV.xy, 0), 0.0).x; float d = g_FinalSSAO.SampleLevel(SamplerLinearClamp, float3(inUV.xy, 3), 0.0).x; diff --git a/Source/Shaders/SSR.hlsl b/Source/Shaders/SSR.hlsl index e1949cd5a..a5f6679b1 100644 --- a/Source/Shaders/SSR.hlsl +++ b/Source/Shaders/SSR.hlsl @@ -18,7 +18,7 @@ float2 ClipToUv(float2 clipPos) // go into clip space (-1:1 from bottom/left to up/right) float3 ProjectWorldToClip(float3 wsPos, float4x4 viewProjectionMatrix) { - float4 clipPos = mul(float4(wsPos, 1), viewProjectionMatrix); + float4 clipPos = PROJECT_POINT(float4(wsPos, 1), viewProjectionMatrix); return clipPos.xyz / clipPos.w; } @@ -74,7 +74,7 @@ float3 TraceScreenSpaceReflection( #endif // Calculate view space normal vector - float3 normalVS = mul(gBuffer.Normal, (float3x3)viewMatrix); + float3 normalVS = PROJECT_POINT(gBuffer.Normal, (float3x3)viewMatrix); float3 reflectVS = normalize(reflect(gBuffer.ViewPos, normalVS)); if (reflectVS.z < 0.001f) return 0; // Ray goes towards the view @@ -116,7 +116,7 @@ float3 TraceScreenSpaceReflection( while (currSampleIndex < numSamples) { // Sample depth buffer and calculate depth difference - float currSample = SAMPLE_RT_LOAD(depthBuffer, currOffset.xy).r; + float currSample = SAMPLE_RT_DEPTH(depthBuffer, currOffset.xy); float depthDiff = currOffset.z - currSample; // Check intersection diff --git a/Source/Shaders/Shadows.shader b/Source/Shaders/Shadows.shader index c76183824..ae3c22694 100644 --- a/Source/Shaders/Shadows.shader +++ b/Source/Shaders/Shadows.shader @@ -37,8 +37,8 @@ float RayCastScreenSpaceShadow(GBufferData gBufferData, GBufferSample gBuffer, f #endif float distanceFade = 1 - saturate(pow(length(gBuffer.WorldPos - gBufferData.ViewPos) / ContactShadowsDistance, 2)); float maxShadowLength = gBufferData.InvProjectionMatrix[1][1] * gBuffer.ViewPos.z * rayLength * distanceFade; - float4 rayStartCS = mul(float4(rayStartWS, 1), ViewProjectionMatrix); - float4 rayEndCS = mul(float4(rayStartWS + rayDirWS * maxShadowLength, 1), ViewProjectionMatrix); + float4 rayStartCS = PROJECT_POINT(float4(rayStartWS, 1), ViewProjectionMatrix); + float4 rayEndCS = PROJECT_POINT(float4(rayStartWS + rayDirWS * maxShadowLength, 1), ViewProjectionMatrix); float4 rayStepCS = (rayEndCS - rayStartCS) / maxSteps; float4 rayCS = rayStartCS + rayStepCS; float lightAmountMax = 0; @@ -65,7 +65,7 @@ META_VS_IN_ELEMENT(POSITION, 0, R32G32B32_FLOAT, 0, 0, PER_VERTEX, 0, true) Model_VS2PS VS_Model(ModelInput_PosOnly input) { Model_VS2PS output; - output.Position = mul(float4(input.Position.xyz, 1), WVP); + output.Position = PROJECT_POINT(float4(input.Position.xyz, 1), WVP); output.ScreenPos = output.Position; return output; } diff --git a/Source/Shaders/ShadowsSampling.hlsl b/Source/Shaders/ShadowsSampling.hlsl index ac181732b..f30952f87 100644 --- a/Source/Shaders/ShadowsSampling.hlsl +++ b/Source/Shaders/ShadowsSampling.hlsl @@ -17,17 +17,19 @@ #include "./Flax/Random.hlsl" #endif -#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5 +#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5 || defined(WGSL) #define SAMPLE_SHADOW_MAP(shadowMap, shadowUV, sceneDepth) shadowMap.SampleCmpLevelZero(ShadowSamplerLinear, shadowUV, sceneDepth) #define SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowUV, texelOffset, sceneDepth) shadowMap.SampleCmpLevelZero(ShadowSamplerLinear, shadowUV, sceneDepth, texelOffset) #else #define SAMPLE_SHADOW_MAP(shadowMap, shadowUV, sceneDepth) (sceneDepth < shadowMap.SampleLevel(SamplerLinearClamp, shadowUV, 0).r) #define SAMPLE_SHADOW_MAP_OFFSET(shadowMap, shadowUV, texelOffset, sceneDepth) (sceneDepth < shadowMap.SampleLevel(SamplerLinearClamp, shadowUV, 0, texelOffset).r) #endif -#if VULKAN || FEATURE_LEVEL < FEATURE_LEVEL_SM5 -#define SAMPLE_SHADOW_MAP_SAMPLER SamplerPointClamp +#if defined(WGSL) +#define LOAD_SHADOW_MAP(shadowMap, shadowUV) SAMPLE_RT_DEPTH(shadowMap, shadowUV) +#elif VULKAN || FEATURE_LEVEL < FEATURE_LEVEL_SM5 +#define LOAD_SHADOW_MAP(shadowMap, shadowUV) shadowMap.SampleLevel(SamplerPointClamp, shadowUV, 0).r #else -#define SAMPLE_SHADOW_MAP_SAMPLER SamplerLinearClamp +#define LOAD_SHADOW_MAP(shadowMap, shadowUV) shadowMap.SampleLevel(SamplerLinearClamp, shadowUV, 0).r #endif float4 GetShadowMask(ShadowSample shadow) @@ -236,7 +238,7 @@ ShadowSample SampleDirectionalLightShadowCascade(LightData light, Buffer { float opacity = gBuffer.CustomData.a; shadowMapUV = GetLightShadowAtlasUV(shadow, shadowTile, gBuffer.WorldPos, shadowPosition); - float shadowMapDepth = shadowMap.SampleLevel(SAMPLE_SHADOW_MAP_SAMPLER, shadowMapUV, 0).r; + float shadowMapDepth = LOAD_SHADOW_MAP(shadowMap, shadowMapUV); result.TransmissionShadow = CalculateSubsurfaceOcclusion(opacity, shadowPosition.z, shadowMapDepth); result.TransmissionShadow = PostProcessShadow(shadow, result.TransmissionShadow); } @@ -374,7 +376,7 @@ ShadowSample SampleLocalLightShadow(LightData light, Buffer shadowsBuffe { float opacity = gBuffer.CustomData.a; shadowMapUV = GetLightShadowAtlasUV(shadow, shadowTile, gBuffer.WorldPos, shadowPosition); - float shadowMapDepth = shadowMap.SampleLevel(SAMPLE_SHADOW_MAP_SAMPLER, shadowMapUV, 0).r; + float shadowMapDepth = LOAD_SHADOW_MAP(shadowMap, shadowMapUV); result.TransmissionShadow = CalculateSubsurfaceOcclusion(opacity, shadowPosition.z, shadowMapDepth); result.TransmissionShadow = PostProcessShadow(shadow, result.TransmissionShadow); } diff --git a/Source/Shaders/Sky.shader b/Source/Shaders/Sky.shader index ea2012b26..9faef41f7 100644 --- a/Source/Shaders/Sky.shader +++ b/Source/Shaders/Sky.shader @@ -32,7 +32,7 @@ MaterialInput VS(ModelInput_PosOnly input) MaterialInput output; // Compute vertex position - output.Position = mul(float4(input.Position.xyz, 1), WorldViewProjection); + output.Position = PROJECT_POINT(float4(input.Position.xyz, 1), WorldViewProjection); output.ScreenPos = output.Position; return output; @@ -47,7 +47,7 @@ GBufferOutput PS_Sky(MaterialInput input) // Calculate view vector (unproject at the far plane) GBufferData gBufferData = GetGBufferData(); float4 clipPos = float4(input.ScreenPos.xy / input.ScreenPos.w, 1.0, 1.0); - clipPos = mul(clipPos, InvViewProjection); + clipPos = PROJECT_POINT(clipPos, InvViewProjection); float3 worldPos = clipPos.xyz / clipPos.w; float3 viewVector = normalize(worldPos - gBufferData.ViewPos); diff --git a/Source/Shaders/TAA.shader b/Source/Shaders/TAA.shader index d841cb889..fd779ca4e 100644 --- a/Source/Shaders/TAA.shader +++ b/Source/Shaders/TAA.shader @@ -35,7 +35,7 @@ float4 PS(Quad_VS2PS input) : SV_Target0 float2 velocity = SAMPLE_RT_LINEAR(MotionVectors, input.TexCoord).xy; float velocityLength = length(velocity); float2 prevUV = input.TexCoord - velocity; - float prevDepth = LinearizeZ(GBuffer, SAMPLE_RT_LOAD(Depth, prevUV).r); + float prevDepth = LinearizeZ(GBuffer, SAMPLE_RT_DEPTH(Depth, prevUV)); // Find the closest pixel in 3x3 neighborhood float currentDepth = 1; @@ -55,7 +55,7 @@ float4 PS(Quad_VS2PS input) : SV_Target0 neighborhoodMax = max(neighborhoodMax, neighbor); neighborhoodSum += neighbor; - float neighborDepth = LinearizeZ(GBuffer, SAMPLE_RT_LOAD(Depth, sampleUV).r); + float neighborDepth = LinearizeZ(GBuffer, SAMPLE_RT_DEPTH(Depth, sampleUV)); float depthDiff = abs(max(neighborDepth - prevDepth, 0)); minDepthDiff = min(minDepthDiff, depthDiff); if (x == 0 && y == 0)