// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. #define USE_VERTEX_COLOR 0 #include "./Flax/Common.hlsl" #include "./Flax/MaterialCommon.hlsl" #include "./Flax/SH.hlsl" // This config must match C++ code #define HEMISPHERES_RESOLUTION 64 #define NUM_SH_TARGETS 3 META_CB_BEGIN(0, Data) float4 LightmapArea; float4x4 WorldMatrix; float4x4 ToTangentSpace; float FinalWeight; uint TexelAdress; uint AtlasSize; float TerrainChunkSizeLOD0; float4 HeightmapUVScaleBias; float3 WorldInvScale; float Dummy1; META_CB_END #define USED_TEXELS_BIAS 0.001f #define BACKGROUND_TEXELS_MARK -1.0f struct RenderCacheVSOutput { float4 Position : SV_Position; float3 WorldPosition : TEXCOORD0; float3 WorldNormal : TEXCOORD1; }; struct RenderCachePSOutput { float4 WorldPosition : SV_Target0; float4 WorldNormal : SV_Target1; }; META_VS(true, FEATURE_LEVEL_SM5) META_VS_IN_ELEMENT(POSITION, 0, R32G32B32_FLOAT, 0, 0, PER_VERTEX, 0, true) META_VS_IN_ELEMENT(TEXCOORD, 0, R16G16_FLOAT, 1, 0, PER_VERTEX, 0, true) META_VS_IN_ELEMENT(NORMAL, 0, R10G10B10A2_UNORM, 1, ALIGN, PER_VERTEX, 0, true) META_VS_IN_ELEMENT(TANGENT, 0, R10G10B10A2_UNORM, 1, ALIGN, PER_VERTEX, 0, true) META_VS_IN_ELEMENT(TEXCOORD, 1, R16G16_FLOAT, 1, ALIGN, PER_VERTEX, 0, true) RenderCacheVSOutput VS_RenderCacheModel(ModelInput input) { RenderCacheVSOutput output; // Calculate vertex world position output.WorldPosition = mul(float4(input.Position.xyz, 1), WorldMatrix).xyz; // Unpack and transform vertex tangent frame vectors to world space float3 normal = normalize(input.Normal.xyz * 2.0 - 1.0); output.WorldNormal = mul(normal, (float3x3)WorldMatrix); // Transform lightmap UV to clip-space float2 lightmapUV = input.LightmapUV * LightmapArea.zw + LightmapArea.xy; lightmapUV.y = 1.0 - lightmapUV.y; lightmapUV.xy = lightmapUV.xy * 2.0 - 1.0; output.Position = float4(lightmapUV, 0, 1); return output; } // Must match structure defined in TerrainManager.cpp struct TerrainVertexInput { float2 TexCoord : TEXCOORD0; float4 Morph : TEXCOORD1; }; #if defined(_VS_RenderCacheTerrain) Texture2D Heightmap : register(t0); // Removes the scale vector from the local to world transformation matrix float3x3 RemoveScaleFromLocalToWorld(float3x3 localToWorld) { localToWorld[0] *= WorldInvScale.x; localToWorld[1] *= WorldInvScale.y; localToWorld[2] *= WorldInvScale.z; return localToWorld; } float3x3 CalcTangentToWorld(float4x4 world, float3x3 tangentToLocal) { float3x3 localToWorld = RemoveScaleFromLocalToWorld((float3x3)world); return mul(tangentToLocal, localToWorld); } META_VS(true, FEATURE_LEVEL_SM5) META_VS_IN_ELEMENT(TEXCOORD, 0, R32G32_FLOAT, 0, ALIGN, PER_VERTEX, 0, true) META_VS_IN_ELEMENT(TEXCOORD, 1, R8G8B8A8_UNORM, 0, ALIGN, PER_VERTEX, 0, true) RenderCacheVSOutput VS_RenderCacheTerrain(TerrainVertexInput input) { RenderCacheVSOutput output; // Sample heightmap float2 heightmapUVs = input.TexCoord * HeightmapUVScaleBias.xy + HeightmapUVScaleBias.zw; float4 heightmapValue = Heightmap.SampleLevel(SamplerPointClamp, heightmapUVs, 0); bool isHole = (heightmapValue.b + heightmapValue.a) >= 1.9f; float height = (float)((int)(heightmapValue.x * 255.0) + ((int)(heightmapValue.y * 255) << 8)) / 65535.0; // Extract normal and the holes mask float2 normalTemp = float2(heightmapValue.b, heightmapValue.a) * 2.0f - 1.0f; float3 normal = float3(normalTemp.x, sqrt(1.0 - saturate(dot(normalTemp, normalTemp))), normalTemp.y); normal = normalize(normal); if (isHole) { normal = float3(0, 1, 0); } // Construct vertex position float2 positionXZ = input.TexCoord * TerrainChunkSizeLOD0; float3 position = float3(positionXZ.x, height, positionXZ.y); // Calculate vertex world position output.WorldPosition = mul(float4(position, 1), WorldMatrix).xyz; // Compute world space normal vector float3x3 tangentToLocal = CalcTangentBasisFromWorldNormal(normal); float3x3 tangentToWorld = CalcTangentToWorld(WorldMatrix, tangentToLocal); output.WorldNormal = tangentToWorld[2]; // Transform lightmap UV to clip-space float2 texCoord = input.TexCoord; float2 lightmapUV = texCoord * LightmapArea.zw + LightmapArea.xy; lightmapUV.y = 1.0 - lightmapUV.y; lightmapUV.xy = lightmapUV.xy * 2.0 - 1.0; output.Position = float4(lightmapUV, 0, 1); return output; } #endif META_PS(true, FEATURE_LEVEL_SM5) RenderCachePSOutput PS_RenderCache(RenderCacheVSOutput input) { RenderCachePSOutput output; // Just pass interpolated values to the output render targets output.WorldPosition = float4(input.WorldPosition, 0); output.WorldNormal = float4(normalize(input.WorldNormal), 0); return output; } #if defined(_PS_BlurCache) Texture2D WorldNormalsCache : register(t0); Texture2D WorldPositionsCache : register(t1); META_PS(true, FEATURE_LEVEL_SM5) RenderCachePSOutput PS_BlurCache(Quad_VS2PS input) { RenderCachePSOutput output; output.WorldNormal = WorldNormalsCache.SampleLevel(SamplerPointClamp, input.TexCoord, 0); output.WorldPosition = WorldPositionsCache.SampleLevel(SamplerPointClamp, input.TexCoord, 0); // TODO: check if we need to use that filter - this will add more hemispheres for rendering /* // Check if pixel isn't empty if(length(output.WorldNormal.xyz) > 0.1) return output; // Simple box filter (using only valid samples) const float blurRadius = 2.0f; float offset = 1.0f / AtlasSize; float weight = 0; float3 totalNormal = 0; float3 totalPosition = 0; for(float x = -blurRadius; x <= blurRadius; x++) { for(float y = -blurRadius; y <= blurRadius; y++) { float2 sampleUV = input.TexCoord + float2(x, y) * offset; float3 normal = WorldNormalsCache.SampleLevel(SamplerPointClamp, sampleUV, 0).xyz; float3 position = WorldPositionsCache.SampleLevel(SamplerPointClamp, sampleUV, 0).xyz; // Check if pixel isn't empty if(length(normal) > 0.1) { totalNormal += normal; totalPosition += position; weight++; } } } // Calculate avg values weight = 1.0 / max(weight, 1); output.WorldPosition = float4(totalPosition * weight, 0); output.WorldNormal = float4(normalize(totalNormal * weight), 0); */ return output; } #elif defined(_CS_Integrate) Texture2D RadianceMap : register(t0); RWBuffer OutputBuffer : register(u0); // Shared memory for summing H-Basis coefficients for a row groupshared float3 RowHBasis[HEMISPHERES_RESOLUTION][4]; // Performs the initial integration/weighting for each pixel and sums together all SH coefficients for a row. // The integration is based on the "Projection from Cube Maps" section of Peter Pike Sloan's "Stupid Spherical Harmonics Tricks". META_CS(true, FEATURE_LEVEL_SM5) [numthreads(HEMISPHERES_RESOLUTION, 1, 1)] void CS_Integrate(uint3 GroupID : SV_GroupID, uint3 GroupThreadID : SV_GroupThreadID) { const int2 location = int2(GroupThreadID.x, GroupID.y); // Sample radiance float3 radiance = RadianceMap.Load(int3(location, 0)).rgb; // Calculate the location in [-1, 1] texture space float u = (location.x / float(HEMISPHERES_RESOLUTION)) * 2.0f - 1.0f; float v = -((location.y / float(HEMISPHERES_RESOLUTION)) * 2.0f - 1.0f); // Calculate weight float squaredUVs = 1 + u * u + v * v; float weight = 4 / (sqrt(squaredUVs) * squaredUVs); // Extract direction from texel uv float3 dirVS = normalize(float3(u, v, 1.0f)); float3 dirTS = mul(dirVS, (float3x3)ToTangentSpace); // Project onto SH float3 sh[9]; ProjectOntoSH3(dirTS, radiance * weight, sh); // Convert to H-Basis float3 hBasis[4]; ConvertSH3ToHBasis(sh, hBasis); // Store in shared memory RowHBasis[GroupThreadID.x][0] = hBasis[0]; RowHBasis[GroupThreadID.x][1] = hBasis[1]; RowHBasis[GroupThreadID.x][2] = hBasis[2]; RowHBasis[GroupThreadID.x][3] = hBasis[3]; GroupMemoryBarrierWithGroupSync(); // Sum the coefficients for the row [unroll(HEMISPHERES_RESOLUTION)] for (uint s = HEMISPHERES_RESOLUTION / 2; s > 0; s >>= 1) { if (GroupThreadID.x < s) { RowHBasis[GroupThreadID.x][0] += RowHBasis[GroupThreadID.x + s][0]; RowHBasis[GroupThreadID.x][1] += RowHBasis[GroupThreadID.x + s][1]; RowHBasis[GroupThreadID.x][2] += RowHBasis[GroupThreadID.x + s][2]; RowHBasis[GroupThreadID.x][3] += RowHBasis[GroupThreadID.x + s][3]; } GroupMemoryBarrierWithGroupSync(); } // Have the first thread write out to the output texture if (GroupThreadID.x == 0) { UNROLL for (uint i = 0; i < NUM_SH_TARGETS; i++) { float4 packed = float4(RowHBasis[0][0][i], RowHBasis[0][1][i], RowHBasis[0][2][i], RowHBasis[0][3][i]); OutputBuffer[GroupID.y + HEMISPHERES_RESOLUTION * i] = packed; } } } #elif defined(_CS_Reduction) Buffer InputBuffer : register(t0); RWBuffer OutputBuffer : register(u0); // Shared memory for reducing H-Basis coefficients groupshared float4 ColumnHBasis[HEMISPHERES_RESOLUTION][3]; // Reduces H-basis to a 1x1 buffer META_CS(true, FEATURE_LEVEL_SM5) [numthreads(HEMISPHERES_RESOLUTION, 1, 1)] void CS_Reduction(uint3 GroupID : SV_GroupID, uint3 GroupThreadID : SV_GroupThreadID) { const int2 location = int2(GroupThreadID.x, GroupID.y); // Store in shared memory ColumnHBasis[location.x][location.y] = InputBuffer[location.x + HEMISPHERES_RESOLUTION * location.y]; GroupMemoryBarrierWithGroupSync(); // Sum the coefficients for the column [unroll(HEMISPHERES_RESOLUTION)] for (uint s = HEMISPHERES_RESOLUTION / 2; s > 0; s >>= 1) { if (GroupThreadID.x < s) ColumnHBasis[location.x][location.y] += ColumnHBasis[location.x + s][location.y]; GroupMemoryBarrierWithGroupSync(); } // Have the first thread write out to the output buffer if (GroupThreadID.x == 0 && GroupThreadID.z == 0) { float4 output = ColumnHBasis[location.x][location.y]; // Note: we add some bias to indicate that this texel has been used output = output * FinalWeight + USED_TEXELS_BIAS; output = clamp(output, 0, 10000); OutputBuffer[TexelAdress + location.y] = output; } } #elif defined(_CS_BlurEmpty) Buffer InputBuffer : register(t0); RWBuffer OutputBuffer : register(u0); // Blur empty lightmap texels to reduce artifacts (blurs only holes and sets -1 to pixels that are not using lightmap - no data) META_CS(true, FEATURE_LEVEL_SM5) [numthreads(1, 1, 1)] void CS_BlurEmpty(uint3 GroupID : SV_GroupID, uint3 GroupThreadID : SV_GroupThreadID) { if (GroupID.x >= AtlasSize || GroupID.y > AtlasSize) return; const int2 location = int2(GroupID.x, GroupID.y); const uint texelAdress = (location.y * AtlasSize + location.x) * NUM_SH_TARGETS; // TODO: use more threads to sample lightmap and final therad make it blur // Simple box filter (using only valid samples) const int blurRadius = 2; float weight = 0; float4 total0 = 0; float4 total1 = 0; float4 total2 = 0; for (int x = -blurRadius; x <= blurRadius; x++) { for (int y = -blurRadius; y <= blurRadius; y++) { int2 sampleLocation = location + int2(x, y); uint sampleAdress = (clamp(sampleLocation.y, 0, AtlasSize - 1) * AtlasSize + clamp(sampleLocation.x, 0, AtlasSize - 1)) * NUM_SH_TARGETS; float4 sample0 = InputBuffer[sampleAdress + 0]; float4 sample1 = InputBuffer[sampleAdress + 1]; float4 sample2 = InputBuffer[sampleAdress + 2]; if (any(sample0)) { total0 += sample0 - USED_TEXELS_BIAS; total1 += sample1 - USED_TEXELS_BIAS; total2 += sample2 - USED_TEXELS_BIAS; weight++; } } } // Check if pixel has invalid value /*float4 lightmap0 = InputBuffer[texelAdress + 0]; if (any(lightmap0)) { // Discard sampling results total0 = lightmap0 - USED_TEXELS_BIAS; total1 = InputBuffer[texelAdress + 1] - USED_TEXELS_BIAS; total2 = InputBuffer[texelAdress + 2] - USED_TEXELS_BIAS; } else*/ if (weight > 0.0001f) { // Calculate avg values and save results weight = 1.0 / weight; total0 *= weight; total1 *= weight; total2 *= weight; } else { // No data - use wide gaussian blur for the background in total0 = BACKGROUND_TEXELS_MARK; total1 = BACKGROUND_TEXELS_MARK; total2 = BACKGROUND_TEXELS_MARK; } // Save results #if 1 OutputBuffer[texelAdress + 0] = total0; OutputBuffer[texelAdress + 1] = total1; OutputBuffer[texelAdress + 2] = total2; #else OutputBuffer[texelAdress + 0] = InputBuffer[texelAdress + 0]; OutputBuffer[texelAdress + 1] = InputBuffer[texelAdress + 1]; OutputBuffer[texelAdress + 2] = InputBuffer[texelAdress + 2]; #endif } #elif defined(_CS_Dilate) Buffer InputBuffer : register(t0); RWBuffer OutputBuffer : register(u0); // Fills the empty lightmap texels with blurred data of the surroundings texels (uses only valid ones) META_CS(true, FEATURE_LEVEL_SM5) [numthreads(1, 1, 1)] void CS_Dilate(uint3 GroupID : SV_GroupID, uint3 GroupThreadID : SV_GroupThreadID) { if (GroupID.x >= AtlasSize || GroupID.y > AtlasSize) return; const int2 location = int2(GroupID.x, GroupID.y); const uint texelAdress = (location.y * AtlasSize + location.x) * NUM_SH_TARGETS; // Copy data float4 lightmap0 = InputBuffer[texelAdress + 0]; float4 lightmap1 = InputBuffer[texelAdress + 1]; float4 lightmap2 = InputBuffer[texelAdress + 2]; OutputBuffer[texelAdress + 0] = lightmap0; OutputBuffer[texelAdress + 1] = lightmap1; OutputBuffer[texelAdress + 2] = lightmap2; // Check if pixel has valid value if (abs(lightmap0.r - BACKGROUND_TEXELS_MARK) > 0.001f) return; float total = 0; float4 total0 = 0; float4 total1 = 0; float4 total2 = 0; const int OffsetX[] = { -1, 0, 1, -1, 0, 1, -1, 0, 1, }; const int OffsetY[] = { -1, -1, -1, 0, 0, 0, 1, 1, 1, }; UNROLL for (int sampleIndex = 0; sampleIndex < 9; sampleIndex++) { int2 sampleLocation = location + int2(OffsetX[sampleIndex], OffsetY[sampleIndex]); if (sampleLocation.x >= 0 && sampleLocation.x < AtlasSize && sampleLocation.y >= 0 && sampleLocation.y < AtlasSize) { uint sampleAdress = (sampleLocation.y * AtlasSize + sampleLocation.x) * NUM_SH_TARGETS; float4 sample0 = InputBuffer[sampleAdress + 0]; float4 sample1 = InputBuffer[sampleAdress + 1]; float4 sample2 = InputBuffer[sampleAdress + 2]; // Use only valid texels if (abs(sample0.r - BACKGROUND_TEXELS_MARK) > 0.001f) { total0 += sample0; total1 += sample1; total2 += sample2; total++; } } } if (total > 0) { total = 1.0f / total; OutputBuffer[texelAdress + 0] = total0 * total; OutputBuffer[texelAdress + 1] = total1 * total; OutputBuffer[texelAdress + 2] = total2 * total; } } #elif defined(_CS_Finalize) RWBuffer OutputBuffer : register(u0); // Cleanups the lightmap data by removing the invalid texels to be just pure black META_CS(true, FEATURE_LEVEL_SM5) [numthreads(1, 1, 1)] void CS_Finalize(uint3 GroupID : SV_GroupID, uint3 GroupThreadID : SV_GroupThreadID) { if (GroupID.x >= AtlasSize || GroupID.y > AtlasSize) return; const int2 location = int2(GroupID.x, GroupID.y); const uint texelAdress = (location.y * AtlasSize + location.x) * NUM_SH_TARGETS; // Check if pixel has valid value if (abs(OutputBuffer[texelAdress].r - BACKGROUND_TEXELS_MARK) > 0.001f) return; // Make it black float4 clearColor = float4(0, 0, 0, 0); OutputBuffer[texelAdress + 0] = clearColor; OutputBuffer[texelAdress + 1] = clearColor; OutputBuffer[texelAdress + 2] = clearColor; } #endif