Fix WebGPU error when sampling depth texture in shader which requires explicit UnfilterableFloat sampler type

This commit is contained in:
Wojtek Figat
2026-02-26 16:59:25 +01:00
parent 63f19aa4d6
commit b36c757753
11 changed files with 167 additions and 134 deletions

View File

@@ -760,7 +760,7 @@ void GPUContextWebGPU::OnDrawCall()
if (_pipelineDirty)
{
_pipelineDirty = false;
WGPURenderPipeline pipeline = _pipelineState ? _pipelineState->GetPipeline(_pipelineKey) : nullptr;
WGPURenderPipeline pipeline = _pipelineState ? _pipelineState->GetPipeline(_pipelineKey, _shaderResources) : nullptr;
wgpuRenderPassEncoderSetPipeline(_renderPass, pipeline);
RENDER_STAT_PS_STATE_CHANGE();

View File

@@ -3,6 +3,7 @@
#if GRAPHICS_API_WEBGPU
#include "GPUPipelineStateWebGPU.h"
#include "GPUTextureWebGPU.h"
#include "GPUVertexLayoutWebGPU.h"
#include "RenderToolsWebGPU.h"
#include "Engine/Core/Log.h"
@@ -157,13 +158,14 @@ void GPUPipelineStateWebGPU::OnReleaseGPU()
uint32 GetHash(const GPUPipelineStateWebGPU::Key& key)
{
static_assert(sizeof(GPUPipelineStateWebGPU::Key) == sizeof(uint64) * 4, "Invalid PSO key size.");
static_assert(sizeof(GPUPipelineStateWebGPU::Key) == sizeof(uint64) * 3, "Invalid PSO key size.");
uint32 hash = GetHash(key.Packed[0]);
CombineHash(hash, GetHash(key.Packed[1]));
CombineHash(hash, GetHash(key.Packed[2]));
return hash;
}
WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const Key& key)
WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const Key& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED])
{
WGPURenderPipeline pipeline;
if (_pipelines.TryGet(key, pipeline))
@@ -174,6 +176,10 @@ WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const Key& key)
ZoneText(_debugName.Get(), _debugName.Count() - 1);
#endif
// Lazy-init layout (cannot do it during Init as texture samplers that access eg. depth need to explicitly use UnfilterableFloat)
if (!PipelineDesc.layout)
InitLayout(shaderResources);
// Build final pipeline description
_depthStencilDesc.format = (WGPUTextureFormat)key.DepthStencilFormat;
PipelineDesc.multisample.count = key.MultiSampleCount;
@@ -245,6 +251,119 @@ WGPURenderPipeline GPUPipelineStateWebGPU::GetPipeline(const Key& key)
return pipeline;
}
void GPUPipelineStateWebGPU::InitLayout(GPUResourceView* shaderResources[GPU_MAX_SR_BINDED])
{
// Count the biggest bind group entries (for all shaders) to allocate reused memory
int32 maxEntriesCount = 0;
for (int32 groupIndex = 0; groupIndex < ARRAY_COUNT(BindGroupDescriptors); groupIndex++)
{
auto descriptors = BindGroupDescriptors[groupIndex];
if (descriptors && maxEntriesCount < descriptors->DescriptorTypesCount)
maxEntriesCount = (int32)descriptors->DescriptorTypesCount;
}
Array<WGPUBindGroupLayoutEntry, InlinedAllocation<8>> entries;
entries.Resize(maxEntriesCount);
// Setup bind groups
WGPUBindGroupLayoutEntry* entriesPtr = entries.Get();
for (int32 groupIndex = 0; groupIndex < ARRAY_COUNT(BindGroupDescriptors); groupIndex++)
{
auto descriptors = BindGroupDescriptors[groupIndex];
if (!descriptors || descriptors->DescriptorTypesCount == 0)
continue;
int32 entriesCount = descriptors->DescriptorTypesCount;
Platform::MemoryClear(entries.Get(), sizeof(WGPUBindGroupLayoutEntry) * entriesCount);
auto visibility = groupIndex == 0 ? WGPUShaderStage_Vertex : WGPUShaderStage_Fragment;
for (int32 index = 0; index < entriesCount; index++)
{
auto& descriptor = descriptors->DescriptorTypes[index];
auto& entry = entriesPtr[index];
entry.binding = descriptor.Binding;
entry.bindingArraySize = descriptor.Count;
entry.visibility = visibility;
switch (descriptor.DescriptorType)
{
case VK_DESCRIPTOR_TYPE_SAMPLER:
entry.sampler.type = WGPUSamplerBindingType_Undefined;
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
entry.texture.sampleType = WGPUTextureSampleType_Undefined;
if (shaderResources[descriptor.Slot])
{
// Hack to use the sample type directly from the view which allows to fix incorrect Depth Buffer reading that allows only manual Load when UnfilterableFloat is used (see SAMPLE_RT_LOAD)
auto ptr = (GPUResourceViewPtrWebGPU*)shaderResources[descriptor.Slot]->GetNativePtr();
if (ptr && ptr->TextureView)
entry.texture.sampleType = ptr->TextureView->SampleType;
}
switch (descriptor.ResourceType)
{
case SpirvShaderResourceType::Texture1D:
entry.texture.viewDimension = WGPUTextureViewDimension_1D;
break;
case SpirvShaderResourceType::Texture2D:
entry.texture.viewDimension = WGPUTextureViewDimension_2D;
break;
case SpirvShaderResourceType::Texture3D:
entry.texture.viewDimension = WGPUTextureViewDimension_3D;
break;
case SpirvShaderResourceType::TextureCube:
entry.texture.viewDimension = WGPUTextureViewDimension_Cube;
break;
case SpirvShaderResourceType::Texture1DArray:
CRASH; // Not supported TODO: add error at compile time (in ShaderCompilerWebGPU::Write)
break;
case SpirvShaderResourceType::Texture2DArray:
entry.texture.viewDimension = WGPUTextureViewDimension_2DArray;
break;
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
entry.buffer.hasDynamicOffset = true;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
if (descriptor.BindingType == SpirvShaderResourceBindingType::SRV)
entry.buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
else
entry.buffer.type = WGPUBufferBindingType_Storage;
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
entry.buffer.hasDynamicOffset = true;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
entry.buffer.type = WGPUBufferBindingType_Uniform;
break;
default:
#if GPU_ENABLE_DIAGNOSTICS
LOG(Fatal, "Unknown descriptor type: {} used as {} in '{}'", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType, String(_debugName.Get(), _debugName.Count() - 1));
#else
CRASH;
#endif
return;
}
}
// Create a bind group layout
WGPUBindGroupLayoutDescriptor bindGroupLayoutDesc = WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT;
bindGroupLayoutDesc.entryCount = entriesCount;
bindGroupLayoutDesc.entries = entriesPtr;
BindGroupLayouts[groupIndex] = wgpuDeviceCreateBindGroupLayout(_device->Device, &bindGroupLayoutDesc);
}
// Create the pipeline layout
WGPUPipelineLayoutDescriptor layoutDesc = WGPU_PIPELINE_LAYOUT_DESCRIPTOR_INIT;
#if GPU_ENABLE_RESOURCE_NAMING
layoutDesc.label = PipelineDesc.label;
#endif
layoutDesc.bindGroupLayoutCount = GPUBindGroupsWebGPU::GraphicsMax;
layoutDesc.bindGroupLayouts = BindGroupLayouts;
PipelineDesc.layout = wgpuDeviceCreatePipelineLayout(_device->Device, &layoutDesc);
if (!PipelineDesc.layout)
{
LOG(Error, "wgpuDeviceCreatePipelineLayout failed");
return;
}
}
bool GPUPipelineStateWebGPU::IsValid() const
{
return _memoryUsage != 0;
@@ -338,108 +457,6 @@ bool GPUPipelineStateWebGPU::Init(const Description& desc)
_fragmentDesc.module = PS->ShaderModule;
}
// Count the biggest bind group entries (for all shaders) to allocate reused memory
int32 maxEntriesCount = 0;
for (int32 groupIndex = 0; groupIndex < ARRAY_COUNT(BindGroupDescriptors); groupIndex++)
{
auto descriptors = BindGroupDescriptors[groupIndex];
if (descriptors && maxEntriesCount < descriptors->DescriptorTypesCount)
maxEntriesCount = (int32)descriptors->DescriptorTypesCount;
}
Array<WGPUBindGroupLayoutEntry, InlinedAllocation<8>> entries;
entries.Resize(maxEntriesCount);
// Setup bind groups
WGPUBindGroupLayoutEntry* entriesPtr = entries.Get();
for (int32 groupIndex = 0; groupIndex < ARRAY_COUNT(BindGroupDescriptors); groupIndex++)
{
auto descriptors = BindGroupDescriptors[groupIndex];
if (!descriptors || descriptors->DescriptorTypesCount == 0)
continue;
int32 entriesCount = descriptors->DescriptorTypesCount;
Platform::MemoryClear(entries.Get(), sizeof(WGPUBindGroupLayoutEntry) * entriesCount);
auto visibility = groupIndex == 0 ? WGPUShaderStage_Vertex : WGPUShaderStage_Fragment;
for (int32 index = 0; index < entriesCount; index++)
{
auto& descriptor = descriptors->DescriptorTypes[index];
auto& entry = entriesPtr[index];
entry.binding = descriptor.Binding;
entry.bindingArraySize = descriptor.Count;
entry.visibility = visibility;
switch (descriptor.DescriptorType)
{
case VK_DESCRIPTOR_TYPE_SAMPLER:
entry.sampler.type = WGPUSamplerBindingType_Undefined;
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
entry.texture.sampleType = WGPUTextureSampleType_Undefined;
switch (descriptor.ResourceType)
{
case SpirvShaderResourceType::Texture1D:
entry.texture.viewDimension = WGPUTextureViewDimension_1D;
break;
case SpirvShaderResourceType::Texture2D:
entry.texture.viewDimension = WGPUTextureViewDimension_2D;
break;
case SpirvShaderResourceType::Texture3D:
entry.texture.viewDimension = WGPUTextureViewDimension_3D;
break;
case SpirvShaderResourceType::TextureCube:
entry.texture.viewDimension = WGPUTextureViewDimension_Cube;
break;
case SpirvShaderResourceType::Texture1DArray:
CRASH; // Not supported TODO: add error at compile time (in ShaderCompilerWebGPU::Write)
break;
case SpirvShaderResourceType::Texture2DArray:
entry.texture.viewDimension = WGPUTextureViewDimension_2DArray;
break;
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
entry.buffer.hasDynamicOffset = true;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
if (descriptor.BindingType == SpirvShaderResourceBindingType::SRV)
entry.buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
else
entry.buffer.type = WGPUBufferBindingType_Storage;
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
entry.buffer.hasDynamicOffset = true;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
entry.buffer.type = WGPUBufferBindingType_Uniform;
break;
default:
#if GPU_ENABLE_DIAGNOSTICS
LOG(Fatal, "Unknown descriptor type: {} used as {} in '{}'", (uint32)descriptor.DescriptorType, (uint32)descriptor.BindingType, String(_debugName.Get(), _debugName.Count() - 1));
#else
CRASH;
#endif
return true;
}
}
// Create a bind group layout
WGPUBindGroupLayoutDescriptor bindGroupLayoutDesc = WGPU_BIND_GROUP_LAYOUT_DESCRIPTOR_INIT;
bindGroupLayoutDesc.entryCount = entriesCount;
bindGroupLayoutDesc.entries = entriesPtr;
BindGroupLayouts[groupIndex] = wgpuDeviceCreateBindGroupLayout(_device->Device, &bindGroupLayoutDesc);
}
// Create the pipeline layout
WGPUPipelineLayoutDescriptor layoutDesc = WGPU_PIPELINE_LAYOUT_DESCRIPTOR_INIT;
#if GPU_ENABLE_RESOURCE_NAMING
layoutDesc.label = PipelineDesc.label;
#endif
layoutDesc.bindGroupLayoutCount = GPUBindGroupsWebGPU::GraphicsMax;
layoutDesc.bindGroupLayouts = BindGroupLayouts;
PipelineDesc.layout = wgpuDeviceCreatePipelineLayout(_device->Device, &layoutDesc);
if (!PipelineDesc.layout)
{
LOG(Error, "wgpuDeviceCreatePipelineLayout failed");
return true;
}
_memoryUsage = 1;
return GPUPipelineState::Init(desc);
}

View File

@@ -28,7 +28,7 @@ public:
uint8 RenderTargetFormats[GPU_MAX_RT_BINDED];
class GPUVertexLayoutWebGPU* VertexLayout;
};
uint64 Packed[4];
uint64 Packed[3];
};
FORCE_INLINE bool operator==(const Key& other) const
@@ -62,7 +62,11 @@ public:
}
public:
WGPURenderPipeline GetPipeline(const Key& key);
// Gets the pipeline for the given rendering state. Pipelines are cached and reused for the same key.
WGPURenderPipeline GetPipeline(const Key& key, GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]);
private:
void InitLayout(GPUResourceView* shaderResources[GPU_MAX_SR_BINDED]);
public:
// [GPUPipelineState]

View File

@@ -58,6 +58,24 @@ void GPUTextureViewWebGPU::Create(WGPUTexture texture, WGPUTextureViewDescriptor
}
Format = desc ? desc->format : wgpuTextureGetFormat(texture);
switch (Format)
{
case WGPUTextureFormat_Depth16Unorm:
case WGPUTextureFormat_Depth24Plus:
case WGPUTextureFormat_Depth24PlusStencil8:
case WGPUTextureFormat_Depth32Float:
case WGPUTextureFormat_Depth32FloatStencil8:
// https://www.w3.org/TR/webgpu/#depth-formats
SampleType = WGPUTextureSampleType_UnfilterableFloat;
break;
case WGPUTextureFormat_Stencil8:
// https://www.w3.org/TR/webgpu/#depth-formats
SampleType = WGPUTextureSampleType_Uint;
break;
default:
SampleType = WGPUTextureSampleType_Undefined;
break;
}
}
void GPUTextureViewWebGPU::Release()

View File

@@ -37,6 +37,7 @@ public:
bool ReadOnly = false;
uint32 DepthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
WGPUTextureFormat Format = WGPUTextureFormat_Undefined;
WGPUTextureSampleType SampleType = WGPUTextureSampleType_Undefined;
GPUResourceViewPtrWebGPU Ptr;
public:

View File

@@ -136,6 +136,18 @@ SamplerComparisonState ShadowSamplerLinear : register(s5);
// General purpose macros
#define SAMPLE_RT(rt, texCoord) rt.SampleLevel(SamplerPointClamp, texCoord, 0)
#define SAMPLE_RT_LINEAR(rt, texCoord) rt.SampleLevel(SamplerLinearClamp, texCoord, 0)
#if defined(WGSL)
// WebGPU doesn't allow to sample depth texture with regular sampler, need to use Load instead of Sample and get texture size for UV to pixel coordinate conversion
float4 LoadTextureWGSL(Texture2D tex, float2 uv)
{
uint2 size;
tex.GetDimensions(size.x, size.y);
return tex.Load(uint3(size * uv, 0));
}
#define SAMPLE_RT_LOAD(rt, texCoord) LoadTextureWGSL(rt, texCoord)
#else
#define SAMPLE_RT_LOAD(rt, texCoord) SAMPLE_RT(rt, texCoord)
#endif
#define HDR_CLAMP_MAX 65472.0
#define PI 3.1415926535897932

View File

@@ -34,7 +34,7 @@ META_PERMUTATION_1(VOLUMETRIC_FOG=1)
float4 PS_Fog(Quad_VS2PS input) : SV_Target0
{
// Get world space position at given pixel coordinate
float rawDepth = SAMPLE_RT(Depth, input.TexCoord).r;
float rawDepth = SAMPLE_RT_LOAD(Depth, input.TexCoord).r;
GBufferData gBufferData = GetGBufferData();
float3 viewPos = GetViewPos(gBufferData, input.TexCoord, rawDepth);
float3 worldPos = mul(float4(viewPos, 1), gBufferData.InvViewMatrix).xyz;

View File

@@ -56,7 +56,7 @@ float3 GetWorldPos(GBufferData gBuffer, float2 uv, float deviceDepth)
// Sample raw device depth buffer
float SampleZ(float2 uv)
{
return SAMPLE_RT(Depth, uv).r;
return SAMPLE_RT_LOAD(Depth, uv).r;
}
// Sample linear depth
@@ -125,25 +125,6 @@ GBufferSample SampleGBuffer(GBufferData gBuffer, float2 uv)
return result;
}
// Sample GBuffer (fast - only few parameters are being sampled)
GBufferSample SampleGBufferFast(GBufferData gBuffer, float2 uv)
{
GBufferSample result;
// Sample GBuffer
float4 gBuffer1 = SAMPLE_RT(GBuffer1, uv);
// Decode normal and shading model
result.Normal = DecodeNormal(gBuffer1.rgb);
result.ShadingModel = (int)(gBuffer1.a * 3.999);
// Calculate view space position
result.ViewPos = GetViewPos(gBuffer, uv);
result.WorldPos = mul(float4(result.ViewPos, 1), gBuffer.InvViewMatrix).xyz;
return result;
}
#if defined(USE_GBUFFER_CUSTOM_DATA)
// Sample GBuffer custom data only

View File

@@ -204,7 +204,7 @@ float4 PS_MotionBlur(Quad_VS2PS input) : SV_Target
// Sample pixel depth
GBufferData gBufferData = GetGBufferData();
float pixelDepth = LinearizeZ(gBufferData, SAMPLE_RT(Input3, input.TexCoord).x);
float pixelDepth = LinearizeZ(gBufferData, SAMPLE_RT_LOAD(Input3, input.TexCoord).x);
// Calculate noise to make it look better with less samples per pixel
float noise = FullscreenGradientNoise(input.TexCoord);
@@ -229,12 +229,12 @@ float4 PS_MotionBlur(Quad_VS2PS input) : SV_Target
float weight1 = 1;
float weight2 = 1;
#else
float depth1 = LinearizeZ(gBufferData, SAMPLE_RT(Input3, sampleUV1).x);
float depth1 = LinearizeZ(gBufferData, SAMPLE_RT_LOAD(Input3, sampleUV1).x);
float2 velocity1 = Input1.SampleLevel(SamplerPointClamp, sampleUV1, 0).xy;
velocity1 = ClampVelocity(velocity1);
float velocityLength1 = length(velocity1);
float depth2 = LinearizeZ(gBufferData, SAMPLE_RT(Input3, sampleUV2).x);
float depth2 = LinearizeZ(gBufferData, SAMPLE_RT_LOAD(Input3, sampleUV2).x);
float2 velocity2 = Input1.SampleLevel(SamplerPointClamp, sampleUV2, 0).xy;
velocity2 = ClampVelocity(velocity2);
float velocityLength2 = length(velocity2);

View File

@@ -116,7 +116,7 @@ float3 TraceScreenSpaceReflection(
while (currSampleIndex < numSamples)
{
// Sample depth buffer and calculate depth difference
float currSample = SAMPLE_RT(depthBuffer, currOffset.xy).r;
float currSample = SAMPLE_RT_LOAD(depthBuffer, currOffset.xy).r;
float depthDiff = currOffset.z - currSample;
// Check intersection

View File

@@ -35,7 +35,7 @@ float4 PS(Quad_VS2PS input) : SV_Target0
float2 velocity = SAMPLE_RT_LINEAR(MotionVectors, input.TexCoord).xy;
float velocityLength = length(velocity);
float2 prevUV = input.TexCoord - velocity;
float prevDepth = LinearizeZ(GBuffer, SAMPLE_RT(Depth, prevUV).r);
float prevDepth = LinearizeZ(GBuffer, SAMPLE_RT_LOAD(Depth, prevUV).r);
// Find the closest pixel in 3x3 neighborhood
float currentDepth = 1;
@@ -55,7 +55,7 @@ float4 PS(Quad_VS2PS input) : SV_Target0
neighborhoodMax = max(neighborhoodMax, neighbor);
neighborhoodSum += neighbor;
float neighborDepth = LinearizeZ(GBuffer, SAMPLE_RT(Depth, sampleUV).r);
float neighborDepth = LinearizeZ(GBuffer, SAMPLE_RT_LOAD(Depth, sampleUV).r);
float depthDiff = abs(max(neighborDepth - prevDepth, 0));
minDepthDiff = min(minDepthDiff, depthDiff);
if (x == 0 && y == 0)