Refactor draw calls drawing to use single objects buffer and better materials batching

This commit is contained in:
Wojtek Figat
2024-07-02 00:52:22 +02:00
parent 78f3248ac9
commit 08ef7c93ea
35 changed files with 491 additions and 442 deletions

View File

@@ -116,6 +116,7 @@ void Foliage::DrawInstance(RenderContext& renderContext, FoliageInstance& instan
ASSERT_LOW_LAYER(key.Mat);
e->DrawCall.Material = key.Mat;
e->DrawCall.Surface.Lightmap = EnumHasAnyFlags(_staticFlags, StaticFlags::Lightmap) && _scene ? _scene->LightmapsData.GetReadyLightmap(key.Lightmap) : nullptr;
e->DrawCall.Surface.GeometrySize = key.Geo->GetBox().GetSize();
}
// Add instance to the draw batch
@@ -124,13 +125,8 @@ void Foliage::DrawInstance(RenderContext& renderContext, FoliageInstance& instan
const Transform transform = _transform.LocalToWorld(instance.Transform);
const Float3 translation = transform.Translation - renderContext.View.Origin;
Matrix::Transformation(transform.Scale, transform.Orientation, translation, world);
instanceData.InstanceOrigin = Float3(world.M41, world.M42, world.M43);
instanceData.PerInstanceRandom = instance.Random;
instanceData.InstanceTransform1 = Float3(world.M11, world.M12, world.M13);
instanceData.LODDitherFactor = lodDitherFactor;
instanceData.InstanceTransform2 = Float3(world.M21, world.M22, world.M23);
instanceData.InstanceTransform3 = Float3(world.M31, world.M32, world.M33);
instanceData.InstanceLightmapArea = Half4(instance.Lightmap.UVsArea);
constexpr float worldDeterminantSign = 1.0f;
instanceData.Store(world, world, instance.Lightmap.UVsArea, drawCall.DrawCall.Surface.GeometrySize, instance.Random, worldDeterminantSign, lodDitherFactor);
}
}
@@ -456,6 +452,7 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Dr
continue;
drawCall.DrawCall.Material = material;
drawCall.DrawCall.Surface.GeometrySize = mesh.GetBox().GetSize();
}
}
@@ -479,18 +476,7 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Dr
mesh.GetDrawCallGeometry(batch.DrawCall);
batch.DrawCall.InstanceCount = 1;
auto& firstInstance = batch.Instances[0];
batch.DrawCall.ObjectPosition = firstInstance.InstanceOrigin;
batch.DrawCall.PerInstanceRandom = firstInstance.PerInstanceRandom;
auto lightmapArea = firstInstance.InstanceLightmapArea.ToFloat4();
batch.DrawCall.Surface.LightmapUVsArea = *(Rectangle*)&lightmapArea;
batch.DrawCall.Surface.LODDitherFactor = firstInstance.LODDitherFactor;
batch.DrawCall.World.SetRow1(Float4(firstInstance.InstanceTransform1, 0.0f));
batch.DrawCall.World.SetRow2(Float4(firstInstance.InstanceTransform2, 0.0f));
batch.DrawCall.World.SetRow3(Float4(firstInstance.InstanceTransform3, 0.0f));
batch.DrawCall.World.SetRow4(Float4(firstInstance.InstanceOrigin, 1.0f));
batch.DrawCall.Surface.PrevWorld = batch.DrawCall.World;
batch.DrawCall.Surface.GeometrySize = mesh.GetBox().GetSize();
batch.DrawCall.WorldDeterminantSign = 1;
firstInstance.Load(batch.DrawCall);
if (EnumHasAnyFlags(drawModes, DrawPass::Forward))
{
@@ -499,15 +485,7 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Dr
for (int32 j = 0; j < batch.Instances.Count(); j++)
{
auto& instance = batch.Instances[j];
drawCall.ObjectPosition = instance.InstanceOrigin;
drawCall.PerInstanceRandom = instance.PerInstanceRandom;
lightmapArea = instance.InstanceLightmapArea.ToFloat4();
drawCall.Surface.LightmapUVsArea = *(Rectangle*)&lightmapArea;
drawCall.Surface.LODDitherFactor = instance.LODDitherFactor;
drawCall.World.SetRow1(Float4(instance.InstanceTransform1, 0.0f));
drawCall.World.SetRow2(Float4(instance.InstanceTransform2, 0.0f));
drawCall.World.SetRow3(Float4(instance.InstanceTransform3, 0.0f));
drawCall.World.SetRow4(Float4(instance.InstanceOrigin, 1.0f));
instance.Load(drawCall);
const int32 drawCallIndex = renderContext.List->DrawCalls.Add(drawCall);
renderContext.List->DrawCallsLists[(int32)DrawCallsListType::Forward].Indices.Add(drawCallIndex);
}

View File

@@ -29,7 +29,7 @@ void DecalMaterialShader::Bind(BindParameters& params)
// Prepare
auto context = params.GPUContext;
auto& view = params.RenderContext.View;
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
Span<byte> cb(_cbData.Get(), _cbData.Count());
ASSERT_LOW_LAYER(cb.Length() >= sizeof(DecalMaterialShaderData));
auto materialData = reinterpret_cast<DecalMaterialShaderData*>(cb.Get());

View File

@@ -3,7 +3,6 @@
#include "DeferredMaterialShader.h"
#include "MaterialShaderFeatures.h"
#include "MaterialParams.h"
#include "Engine/Core/Math/Matrix3x4.h"
#include "Engine/Graphics/RenderBuffers.h"
#include "Engine/Graphics/RenderView.h"
#include "Engine/Renderer/DrawCall.h"
@@ -17,16 +16,6 @@
#include "Engine/Graphics/GPULimits.h"
#include "Engine/Graphics/RenderTask.h"
PACK_STRUCT(struct DeferredMaterialShaderData {
Matrix3x4 WorldMatrix;
Matrix3x4 PrevWorldMatrix;
Float2 Dummy0;
float LODDitherFactor;
float PerInstanceRandom;
Float3 GeometrySize;
float WorldDeterminantSign;
});
DrawPass DeferredMaterialShader::GetDrawModes() const
{
return DrawPass::Depth | DrawPass::GBuffer | DrawPass::GlobalSurfaceAtlas | DrawPass::MotionVectors | DrawPass::QuadOverdraw;
@@ -39,22 +28,17 @@ bool DeferredMaterialShader::CanUseLightmap() const
bool DeferredMaterialShader::CanUseInstancing(InstancingHandler& handler) const
{
handler = { SurfaceDrawCallHandler::GetHash, SurfaceDrawCallHandler::CanBatch, SurfaceDrawCallHandler::WriteDrawCall, };
handler = { SurfaceDrawCallHandler::GetHash, SurfaceDrawCallHandler::CanBatch, };
return true;
}
void DeferredMaterialShader::Bind(BindParameters& params)
{
//PROFILE_CPU();
// Prepare
auto context = params.GPUContext;
auto& view = params.RenderContext.View;
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
Span<byte> cb(_cbData.Get(), _cbData.Count());
ASSERT_LOW_LAYER(cb.Length() >= sizeof(DeferredMaterialShaderData));
auto materialData = reinterpret_cast<DeferredMaterialShaderData*>(cb.Get());
cb = Span<byte>(cb.Get() + sizeof(DeferredMaterialShaderData), cb.Length() - sizeof(DeferredMaterialShaderData));
int32 srv = 2;
int32 srv = 3;
// Setup features
const bool useLightmap = _info.BlendMode == MaterialBlendMode::Opaque && LightmapFeature::Bind(params, cb, srv);
@@ -68,28 +52,19 @@ void DeferredMaterialShader::Bind(BindParameters& params)
bindMeta.CanSampleDepth = false;
bindMeta.CanSampleGBuffer = false;
MaterialParams::Bind(params.ParamsLink, bindMeta);
context->BindSR(0, params.ObjectBuffer);
// Setup material constants
{
materialData->WorldMatrix.SetMatrixTranspose(drawCall.World);
materialData->PrevWorldMatrix.SetMatrixTranspose(drawCall.Surface.PrevWorld);
materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign;
materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor;
materialData->PerInstanceRandom = drawCall.PerInstanceRandom;
materialData->GeometrySize = drawCall.Surface.GeometrySize;
}
// Check if is using mesh skinning
// Check if using mesh skinning
const bool useSkinning = drawCall.Surface.Skinning != nullptr;
bool perBoneMotionBlur = false;
if (useSkinning)
{
// Bind skinning buffer
ASSERT(drawCall.Surface.Skinning->IsReady());
context->BindSR(0, drawCall.Surface.Skinning->BoneMatrices->View());
context->BindSR(1, drawCall.Surface.Skinning->BoneMatrices->View());
if (drawCall.Surface.Skinning->PrevBoneMatrices && drawCall.Surface.Skinning->PrevBoneMatrices->IsAllocated())
{
context->BindSR(1, drawCall.Surface.Skinning->PrevBoneMatrices->View());
context->BindSR(2, drawCall.Surface.Skinning->PrevBoneMatrices->View());
perBoneMotionBlur = true;
}
}
@@ -116,8 +91,8 @@ void DeferredMaterialShader::Bind(BindParameters& params)
else
cullMode = CullMode::Normal;
}
ASSERT_LOW_LAYER(!(useSkinning && params.DrawCallsCount > 1)); // No support for instancing skinned meshes
const auto cache = params.DrawCallsCount == 1 ? &_cache : &_cacheInstanced;
ASSERT_LOW_LAYER(!(useSkinning && params.Instanced)); // No support for instancing skinned meshes
const auto cache = params.Instanced ? &_cacheInstanced : &_cache;
PipelineStateCache* psCache = cache->GetPS(view.Pass, useLightmap, useSkinning, perBoneMotionBlur);
ASSERT(psCache);
GPUPipelineState* state = psCache->GetPS(cullMode, wireframe);

View File

@@ -37,7 +37,7 @@ void DeformableMaterialShader::Bind(BindParameters& params)
// Prepare
auto context = params.GPUContext;
auto& view = params.RenderContext.View;
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
Span<byte> cb(_cbData.Get(), _cbData.Count());
ASSERT_LOW_LAYER(cb.Length() >= sizeof(DeformableMaterialShaderData));
auto materialData = reinterpret_cast<DeformableMaterialShaderData*>(cb.Get());

View File

@@ -3,7 +3,6 @@
#include "ForwardMaterialShader.h"
#include "MaterialShaderFeatures.h"
#include "MaterialParams.h"
#include "Engine/Core/Math/Matrix3x4.h"
#include "Engine/Graphics/GPUContext.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPULimits.h"
@@ -18,16 +17,6 @@
#include "Engine/Renderer/Lightmaps.h"
#endif
PACK_STRUCT(struct ForwardMaterialShaderData {
Matrix3x4 WorldMatrix;
Matrix3x4 PrevWorldMatrix;
Float2 Dummy0;
float LODDitherFactor;
float PerInstanceRandom;
Float3 GeometrySize;
float WorldDeterminantSign;
});
DrawPass ForwardMaterialShader::GetDrawModes() const
{
return _drawModes;
@@ -35,7 +24,7 @@ DrawPass ForwardMaterialShader::GetDrawModes() const
bool ForwardMaterialShader::CanUseInstancing(InstancingHandler& handler) const
{
handler = { SurfaceDrawCallHandler::GetHash, SurfaceDrawCallHandler::CanBatch, SurfaceDrawCallHandler::WriteDrawCall, };
handler = { SurfaceDrawCallHandler::GetHash, SurfaceDrawCallHandler::CanBatch, };
return true;
}
@@ -44,12 +33,9 @@ void ForwardMaterialShader::Bind(BindParameters& params)
// Prepare
auto context = params.GPUContext;
auto& view = params.RenderContext.View;
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
Span<byte> cb(_cbData.Get(), _cbData.Count());
ASSERT_LOW_LAYER(cb.Length() >= sizeof(ForwardMaterialShaderData));
auto materialData = reinterpret_cast<ForwardMaterialShaderData*>(cb.Get());
cb = Span<byte>(cb.Get() + sizeof(ForwardMaterialShaderData), cb.Length() - sizeof(ForwardMaterialShaderData));
int32 srv = 2;
int32 srv = 3;
// Setup features
if ((_info.FeaturesFlags & MaterialFeaturesFlags::GlobalIllumination) != MaterialFeaturesFlags::None)
@@ -65,24 +51,15 @@ void ForwardMaterialShader::Bind(BindParameters& params)
bindMeta.CanSampleDepth = GPUDevice::Instance->Limits.HasReadOnlyDepth;
bindMeta.CanSampleGBuffer = true;
MaterialParams::Bind(params.ParamsLink, bindMeta);
context->BindSR(0, params.ObjectBuffer);
// Check if is using mesh skinning
// Check if using mesh skinning
const bool useSkinning = drawCall.Surface.Skinning != nullptr;
if (useSkinning)
{
// Bind skinning buffer
ASSERT(drawCall.Surface.Skinning->IsReady());
context->BindSR(0, drawCall.Surface.Skinning->BoneMatrices->View());
}
// Setup material constants
{
materialData->WorldMatrix.SetMatrixTranspose(drawCall.World);
materialData->PrevWorldMatrix.SetMatrixTranspose(drawCall.Surface.PrevWorld);
materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign;
materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor;
materialData->PerInstanceRandom = drawCall.PerInstanceRandom;
materialData->GeometrySize = drawCall.Surface.GeometrySize;
context->BindSR(1, drawCall.Surface.Skinning->BoneMatrices->View());
}
// Bind constants
@@ -107,8 +84,8 @@ void ForwardMaterialShader::Bind(BindParameters& params)
else
cullMode = CullMode::Normal;
}
ASSERT_LOW_LAYER(!(useSkinning && params.DrawCallsCount > 1)); // No support for instancing skinned meshes
const auto cacheObj = params.DrawCallsCount == 1 ? &_cache : &_cacheInstanced;
ASSERT_LOW_LAYER(!(useSkinning && params.Instanced)); // No support for instancing skinned meshes
const auto cacheObj = params.Instanced ? &_cacheInstanced : &_cache;
PipelineStateCache* psCache = cacheObj->GetPS(view.Pass, useSkinning);
ASSERT(psCache);
GPUPipelineState* state = psCache->GetPS(cullMode, wireframe);

View File

@@ -8,6 +8,7 @@ struct MaterialParamsLink;
class GPUShader;
class GPUContext;
class GPUTextureView;
class GPUBufferView;
class GPUConstantBuffer;
class RenderBuffers;
class SceneRenderTask;
@@ -120,7 +121,6 @@ public:
{
void (*GetHash)(const DrawCall& drawCall, uint32& batchKey);
bool (*CanBatch)(const DrawCall& a, const DrawCall& b, DrawPass pass);
void (*WriteDrawCall)(struct InstanceData* instanceData, const DrawCall& drawCall);
};
/// <summary>
@@ -131,7 +131,7 @@ public:
virtual bool CanUseInstancing(InstancingHandler& handler) const
{
#if BUILD_DEBUG
handler = { nullptr, nullptr, nullptr };
handler = { nullptr, nullptr };
#endif
return false;
}
@@ -144,11 +144,12 @@ public:
{
GPUContext* GPUContext;
const RenderContext& RenderContext;
const DrawCall* FirstDrawCall;
int32 DrawCallsCount;
GPUBufferView* ObjectBuffer = nullptr;
const ::DrawCall* DrawCall = nullptr;
MaterialParamsLink* ParamsLink = nullptr;
void* CustomData = nullptr;
float TimeParam;
bool Instanced = false;
/// <summary>
/// The input scene color. It's optional and used in forward/postFx rendering.
@@ -156,11 +157,12 @@ public:
GPUTextureView* Input = nullptr;
BindParameters(::GPUContext* context, const ::RenderContext& renderContext);
BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const DrawCall& drawCall);
BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const DrawCall* firstDrawCall, int32 drawCallsCount);
BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const ::DrawCall& drawCall, bool instanced = false);
// Per-view shared constant buffer (see ViewData in MaterialCommon.hlsl).
static GPUConstantBuffer* PerViewConstants;
// Per-draw shared constant buffer (see ViewData in MaterialCommon.hlsl).
static GPUConstantBuffer* PerDrawConstants;
// Binds the shared per-view constant buffer at slot 1 (see ViewData in MaterialCommon.hlsl)
void BindViewData();

View File

@@ -38,31 +38,21 @@ GPU_CB_STRUCT(MaterialShaderDataPerView {
IMaterial::BindParameters::BindParameters(::GPUContext* context, const ::RenderContext& renderContext)
: GPUContext(context)
, RenderContext(renderContext)
, FirstDrawCall(nullptr)
, DrawCallsCount(0)
, TimeParam(Time::Draw.UnscaledTime.GetTotalSeconds())
{
}
IMaterial::BindParameters::BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const DrawCall& drawCall)
IMaterial::BindParameters::BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const ::DrawCall& drawCall, bool instanced)
: GPUContext(context)
, RenderContext(renderContext)
, FirstDrawCall(&drawCall)
, DrawCallsCount(1)
, TimeParam(Time::Draw.UnscaledTime.GetTotalSeconds())
{
}
IMaterial::BindParameters::BindParameters(::GPUContext* context, const ::RenderContext& renderContext, const DrawCall* firstDrawCall, int32 drawCallsCount)
: GPUContext(context)
, RenderContext(renderContext)
, FirstDrawCall(firstDrawCall)
, DrawCallsCount(drawCallsCount)
, DrawCall(&drawCall)
, TimeParam(Time::Draw.UnscaledTime.GetTotalSeconds())
, Instanced(instanced)
{
}
GPUConstantBuffer* IMaterial::BindParameters::PerViewConstants = nullptr;
GPUConstantBuffer* IMaterial::BindParameters::PerDrawConstants = nullptr;
void IMaterial::BindParameters::BindViewData()
{
@@ -70,6 +60,7 @@ void IMaterial::BindParameters::BindViewData()
if (!PerViewConstants)
{
PerViewConstants = GPUDevice::Instance->CreateConstantBuffer(sizeof(MaterialShaderDataPerView), TEXT("PerViewConstants"));
PerDrawConstants = GPUDevice::Instance->CreateConstantBuffer(sizeof(MaterialShaderDataPerDraw), TEXT("PerDrawConstants"));
}
// Setup data

View File

@@ -10,13 +10,19 @@
/// <summary>
/// Current materials shader version.
/// </summary>
#define MATERIAL_GRAPH_VERSION 166
#define MATERIAL_GRAPH_VERSION 167
class Material;
class GPUShader;
class GPUConstantBuffer;
class MemoryReadStream;
// Draw pipeline constant buffer (with per-draw constants at slot 2)
GPU_CB_STRUCT(MaterialShaderDataPerDraw {
Float3 DrawPadding;
uint32 DrawObjectIndex;
});
/// <summary>
/// Represents material shader that can be used to render objects, visuals or effects. Contains a dedicated shader.
/// </summary>

View File

@@ -16,7 +16,7 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, Span<by
{
auto cache = params.RenderContext.List;
auto& view = params.RenderContext.View;
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
auto& data = *(Data*)cb.Get();
ASSERT_LOW_LAYER(cb.Length() >= sizeof(Data));
const int32 envProbeShaderRegisterIndex = srv + 0;
@@ -118,8 +118,7 @@ void ForwardShadingFeature::Bind(MaterialShader::BindParameters& params, Span<by
bool LightmapFeature::Bind(MaterialShader::BindParameters& params, Span<byte>& cb, int32& srv)
{
auto& drawCall = *params.FirstDrawCall;
ASSERT_LOW_LAYER(cb.Length() >= sizeof(Data));
auto& drawCall = *params.DrawCall;
const bool useLightmap = EnumHasAnyFlags(params.RenderContext.View.Flags, ViewFlags::GI)
#if USE_EDITOR
@@ -134,13 +133,15 @@ bool LightmapFeature::Bind(MaterialShader::BindParameters& params, Span<byte>& c
params.GPUContext->BindSR(srv + 0, lightmap0);
params.GPUContext->BindSR(srv + 1, lightmap1);
params.GPUContext->BindSR(srv + 2, lightmap2);
// Set lightmap data
auto& data = *(Data*)cb.Get();
data.LightmapArea = drawCall.Features.LightmapUVsArea;
}
else
{
// Free texture slots
params.GPUContext->UnBindSR(srv + 0);
params.GPUContext->UnBindSR(srv + 1);
params.GPUContext->UnBindSR(srv + 2);
}
cb = Span<byte>(cb.Get() + sizeof(Data), cb.Length() - sizeof(Data));
srv += SRVs;
return useLightmap;
}

View File

@@ -63,11 +63,6 @@ struct LightmapFeature : MaterialShaderFeature
{
enum { SRVs = 3 };
PACK_STRUCT(struct Data
{
Rectangle LightmapArea;
});
static bool Bind(MaterialShader::BindParameters& params, Span<byte>& cb, int32& srv);
#if USE_EDITOR
static void Generate(GeneratorData& data);

View File

@@ -48,7 +48,7 @@ void ParticleMaterialShader::Bind(BindParameters& params)
// Prepare
auto context = params.GPUContext;
auto& view = params.RenderContext.View;
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
const uint32 sortedIndicesOffset = drawCall.Particle.Module->SortedIndicesOffset;
Span<byte> cb(_cbData.Get(), _cbData.Count());
ASSERT_LOW_LAYER(cb.Length() >= sizeof(ParticleMaterialShaderData));

View File

@@ -28,6 +28,7 @@ PACK_STRUCT(struct TerrainMaterialShaderData {
Float4 NeighborLOD; // Per component LOD index for chunk neighbors ordered: top, left, right, bottom
Float2 OffsetUV; // Offset applied to the texture coordinates (used to implement seamless UVs based on chunk location relative to terrain root)
Float2 Dummy0;
Float4 LightmapArea;
});
DrawPass TerrainMaterialShader::GetDrawModes() const
@@ -45,7 +46,7 @@ void TerrainMaterialShader::Bind(BindParameters& params)
// Prepare
auto context = params.GPUContext;
auto& view = params.RenderContext.View;
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
Span<byte> cb(_cbData.Get(), _cbData.Count());
ASSERT_LOW_LAYER(cb.Length() >= sizeof(TerrainMaterialShaderData));
auto materialData = reinterpret_cast<TerrainMaterialShaderData*>(cb.Get());
@@ -83,6 +84,7 @@ void TerrainMaterialShader::Bind(BindParameters& params)
materialData->HeightmapUVScaleBias = drawCall.Terrain.HeightmapUVScaleBias;
materialData->NeighborLOD = drawCall.Terrain.NeighborLOD;
materialData->OffsetUV = drawCall.Terrain.OffsetUV;
materialData->LightmapArea = *(Float4*)&drawCall.Terrain.LightmapUVsArea;
}
// Bind terrain textures

View File

@@ -37,7 +37,7 @@ void VolumeParticleMaterialShader::Bind(BindParameters& params)
// Prepare
auto context = params.GPUContext;
const RenderView& view = params.RenderContext.View;
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
Span<byte> cb(_cbData.Get(), _cbData.Count());
ASSERT_LOW_LAYER(cb.Length() >= sizeof(VolumeParticleMaterialShaderData));
auto materialData = reinterpret_cast<VolumeParticleMaterialShaderData*>(cb.Get());

View File

@@ -382,7 +382,7 @@ void Mesh::Render(GPUContext* context) const
context->BindVB(ToSpan((GPUBuffer**)_vertexBuffers, 3));
context->BindIB(_indexBuffer);
context->DrawIndexedInstanced(_triangles * 3, 1, 0, 0, 0);
context->DrawIndexed(_triangles * 3);
}
void Mesh::Draw(const RenderContext& renderContext, MaterialBase* material, const Matrix& world, StaticFlags flags, bool receiveDecals, DrawPass drawModes, float perInstanceRandom, int8 sortOrder) const

View File

@@ -7,7 +7,6 @@
#include "Engine/Content/Content.h"
#include "Engine/Content/Assets/Model.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Renderer/DrawCall.h"
#include "Engine/Renderer/RenderList.h"
#include "Engine/Renderer/GBufferPass.h"
@@ -47,7 +46,7 @@ void LODPreviewMaterialShader::Bind(BindParameters& params)
{
// Find the LOD that produced this draw call
int32 lodIndex = 0;
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
const ModelLOD* drawCallModelLod;
if (GBufferPass::IndexBufferToModelLOD.TryGet(drawCall.Geometry.IndexBuffer, drawCallModelLod))
{

View File

@@ -108,7 +108,7 @@ void LightmapUVsDensityMaterialShader::Bind(BindParameters& params)
{
// Prepare
auto context = params.GPUContext;
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
// Setup
auto shader = _shader->GetShader();

View File

@@ -48,7 +48,7 @@ DrawPass MaterialComplexityMaterialShader::WrapperShader::GetDrawModes() const
void MaterialComplexityMaterialShader::WrapperShader::Bind(BindParameters& params)
{
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
// Get original material from the draw call
IMaterial* material = nullptr;

View File

@@ -61,7 +61,7 @@ void VertexColorsMaterialShader::Bind(BindParameters& params)
{
// Prepare
auto context = params.GPUContext;
auto& drawCall = *params.FirstDrawCall;
auto& drawCall = *params.DrawCall;
// Setup
auto shader = _shader->GetShader();

View File

@@ -509,7 +509,7 @@ void GBufferPass::DrawDecals(RenderContext& renderContext, GPUTextureView* light
drawCall.World = decal.World;
decal.Material->Bind(bindParams);
// TODO: use hardware instancing
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, 1, 0, 0, 0);
context->DrawIndexed(drawCall.Draw.IndicesCount);
}
context->ResetSR();

View File

@@ -849,6 +849,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
// Clear draw calls list
renderContextTiles.List->DrawCalls.Clear();
renderContextTiles.List->BatchedDrawCalls.Clear();
renderContextTiles.List->ObjectBuffer.Clear();
drawCallsListGBuffer.Indices.Clear();
drawCallsListGBuffer.PreBatchedDrawCalls.Clear();
drawCallsListGBufferNoDecals.Indices.Clear();

View File

@@ -3,6 +3,7 @@
#include "RenderList.h"
#include "Engine/Core/Collections/Sorting.h"
#include "Engine/Graphics/Materials/IMaterial.h"
#include "Engine/Graphics/Materials/MaterialShader.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/GPUContext.h"
#include "Engine/Graphics/GPUDevice.h"
@@ -21,6 +22,7 @@ static_assert(sizeof(DrawCall) <= 288, "Too big draw call data size.");
static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Terrain), "Wrong draw call data size.");
static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Particle), "Wrong draw call data size.");
static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Custom), "Wrong draw call data size.");
static_assert(sizeof(ShaderObjectData) == sizeof(Float4) * ARRAY_COUNT(ShaderObjectData::Raw), "Wrong object data.");
namespace
{
@@ -34,6 +36,40 @@ namespace
CriticalSection MemPoolLocker;
}
void ShaderObjectData::Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Rectangle& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom, float worldDeterminantSign, float lodDitherFactor)
{
Half4 lightmapUVsAreaPacked(*(Float4*)&lightmapUVsArea);
Float2 lightmapUVsAreaPackedAliased = *(Float2*)&lightmapUVsAreaPacked;
Raw[0] = Float4(worldMatrix.M11, worldMatrix.M12, worldMatrix.M13, worldMatrix.M41);
Raw[1] = Float4(worldMatrix.M21, worldMatrix.M22, worldMatrix.M23, worldMatrix.M42);
Raw[2] = Float4(worldMatrix.M31, worldMatrix.M32, worldMatrix.M33, worldMatrix.M43);
Raw[3] = Float4(prevWorldMatrix.M11, prevWorldMatrix.M12, prevWorldMatrix.M13, prevWorldMatrix.M41);
Raw[4] = Float4(prevWorldMatrix.M21, prevWorldMatrix.M22, prevWorldMatrix.M23, prevWorldMatrix.M42);
Raw[5] = Float4(prevWorldMatrix.M31, prevWorldMatrix.M32, prevWorldMatrix.M33, prevWorldMatrix.M43);
Raw[6] = Float4(geometrySize, perInstanceRandom);
Raw[7] = Float4(worldDeterminantSign, lodDitherFactor, lightmapUVsAreaPackedAliased.X, lightmapUVsAreaPackedAliased.Y);
// TODO: pack WorldDeterminantSign and LODDitherFactor
}
void ShaderObjectData::Load(Matrix& worldMatrix, Matrix& prevWorldMatrix, Rectangle& lightmapUVsArea, Float3& geometrySize, float& perInstanceRandom, float& worldDeterminantSign, float& lodDitherFactor) const
{
worldMatrix.SetRow1(Float4(Float3(Raw[0]), 0.0f));
worldMatrix.SetRow2(Float4(Float3(Raw[1]), 0.0f));
worldMatrix.SetRow3(Float4(Float3(Raw[2]), 0.0f));
worldMatrix.SetRow4(Float4(Raw[0].W, Raw[1].W, Raw[2].W, 1.0f));
prevWorldMatrix.SetRow1(Float4(Float3(Raw[3]), 0.0f));
prevWorldMatrix.SetRow2(Float4(Float3(Raw[4]), 0.0f));
prevWorldMatrix.SetRow3(Float4(Float3(Raw[5]), 0.0f));
prevWorldMatrix.SetRow4(Float4(Raw[3].W, Raw[4].W, Raw[5].W, 1.0f));
geometrySize = Float3(Raw[6]);
perInstanceRandom = Raw[6].W;
worldDeterminantSign = Raw[7].X;
lodDitherFactor = Raw[7].Y;
Float2 lightmapUVsAreaPackedAliased(Raw[7].Z, Raw[7].W);
Half4 lightmapUVsAreaPacked(*(Half4*)&lightmapUVsAreaPackedAliased);
*(Float4*)&lightmapUVsArea = lightmapUVsAreaPacked.ToFloat4();
}
bool RenderLightData::CanRenderShadow(const RenderView& view) const
{
bool result = false;
@@ -406,7 +442,8 @@ RenderList::RenderList(const SpawnParams& params)
, AtmosphericFog(nullptr)
, Fog(nullptr)
, Blendable(32)
, _instanceBuffer(1024 * sizeof(InstanceData), sizeof(InstanceData), TEXT("Instance Buffer"))
, ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Bufffer"))
, _instanceBuffer(0, sizeof(ShaderObjectDrawInstanceData), TEXT("Instance Buffer"))
{
}
@@ -439,6 +476,7 @@ void RenderList::Clear()
Settings = PostProcessSettings();
Blendable.Clear();
_instanceBuffer.Clear();
ObjectBuffer.Clear();
}
struct PackedSortKey
@@ -480,18 +518,6 @@ FORCE_INLINE void CalculateSortKey(const RenderContext& renderContext, DrawCall&
drawCall.SortKey = key.Data;
}
FORCE_INLINE bool CanBatchDrawCalls(const DrawCall& a, const DrawCall& b, DrawPass pass)
{
IMaterial::InstancingHandler handlerA, handlerB;
return a.Material->CanUseInstancing(handlerA) &&
b.Material->CanUseInstancing(handlerB) &&
a.InstanceCount != 0 &&
b.InstanceCount != 0 &&
handlerA.CanBatch == handlerB.CanBatch &&
handlerA.CanBatch(a, b, pass) &&
a.WorldDeterminantSign * b.WorldDeterminantSign > 0;
}
void RenderList::AddDrawCall(const RenderContext& renderContext, DrawPass drawModes, StaticFlags staticFlags, DrawCall& drawCall, bool receivesDecals, int8 sortOrder)
{
#if ENABLE_ASSERTION_LOW_LAYERS
@@ -586,9 +612,32 @@ void RenderList::AddDrawCall(const RenderContextBatch& renderContextBatch, DrawP
}
}
void RenderList::BuildObjectsBuffer()
{
int32 count = DrawCalls.Count();
for (const auto& e : BatchedDrawCalls)
count += e.Instances.Count();
ObjectBuffer.Clear();
if (count == 0)
return;
PROFILE_CPU();
ObjectBuffer.Data.Resize(count * sizeof(ShaderObjectData));
auto* src = (const DrawCall*)DrawCalls.Get();
auto* dst = (ShaderObjectData*)ObjectBuffer.Data.Get();
for (int32 i = 0; i < DrawCalls.Count(); i++)
{
dst->Store(src[i]);
dst++;
}
int32 startIndex = DrawCalls.Count();
for (auto& batch : BatchedDrawCalls)
{
batch.ObjectsStartIndex = startIndex;
Platform::MemoryCopy(dst, batch.Instances.Get(), batch.Instances.Count() * sizeof(ShaderObjectData));
dst += batch.Instances.Count();
startIndex += batch.Instances.Count();
}
ZoneValue(ObjectBuffer.Data.Count() / 1024); // Objects Buffer size in kB
}
void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls, DrawPass pass, bool stable)
@@ -642,15 +691,24 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
const DrawCall& drawCall = drawCallsData[listData[i]];
int32 batchSize = 1;
int32 instanceCount = drawCall.InstanceCount;
// Check the following draw calls sequence to merge them
for (int32 j = i + 1; j < listSize; j++)
IMaterial::InstancingHandler drawCallHandler, otherHandler;
if (instanceCount != 0 && drawCall.Material->CanUseInstancing(drawCallHandler))
{
const DrawCall& other = drawCallsData[listData[j]];
if (!CanBatchDrawCalls(drawCall, other, pass))
break;
batchSize++;
instanceCount += other.InstanceCount;
// Check the following draw calls sequence to merge them
for (int32 j = i + 1; j < listSize; j++)
{
const DrawCall& other = drawCallsData[listData[j]];
const bool canBatch =
other.Material->CanUseInstancing(otherHandler) &&
other.InstanceCount != 0 &&
drawCallHandler.CanBatch == otherHandler.CanBatch &&
drawCallHandler.CanBatch(drawCall, other, pass) &&
drawCall.WorldDeterminantSign * other.WorldDeterminantSign > 0;
if (!canBatch)
break;
batchSize++;
instanceCount += other.InstanceCount;
}
}
DrawBatch batch;
@@ -677,72 +735,86 @@ FORCE_INLINE bool CanUseInstancing(DrawPass pass)
return pass == DrawPass::GBuffer || pass == DrawPass::Depth;
}
void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls, GPUTextureView* input)
FORCE_INLINE bool DrawsEqual(const DrawCall* a, const DrawCall* b)
{
return a->Geometry.IndexBuffer == b->Geometry.IndexBuffer &&
a->Draw.IndicesCount == b->Draw.IndicesCount &&
a->Draw.StartIndex == b->Draw.StartIndex &&
Platform::MemoryCompare(a->Geometry.VertexBuffers, b->Geometry.VertexBuffers, sizeof(a->Geometry.VertexBuffers) + sizeof(a->Geometry.VertexBuffersOffsets)) == 0;
}
void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, RenderList* drawCallsList, GPUTextureView* input)
{
if (list.IsEmpty())
return;
PROFILE_GPU_CPU("Drawing");
const auto* drawCallsData = drawCalls.Get();
const auto* drawCallsData = drawCallsList->DrawCalls.Get();
const auto* listData = list.Indices.Get();
const auto* batchesData = list.Batches.Get();
const auto context = GPUDevice::Instance->GetMainContext();
bool useInstancing = list.CanUseInstancing && CanUseInstancing(renderContext.View.Pass) && GPUDevice::Instance->Limits.HasInstancing;
TaaJitterRemoveContext taaJitterRemove(renderContext.View);
// Lazy-init objects buffer (if user didn't do it)
if (drawCallsList->ObjectBuffer.Data.IsEmpty())
{
drawCallsList->BuildObjectsBuffer();
drawCallsList->ObjectBuffer.Flush(context);
}
// Clear SR slots to prevent any resources binding issues (leftovers from the previous passes)
context->ResetSR();
// Prepare instance buffer
if (useInstancing)
{
int32 instancedBatchesCount = 0;
// Estimate the maximum amount of elements for all instanced draws
int32 instancesCount = 0;
for (int32 i = 0; i < list.Batches.Count(); i++)
{
auto& batch = batchesData[i];
const DrawBatch& batch = batchesData[i];
if (batch.BatchSize > 1)
instancedBatchesCount += batch.BatchSize;
instancesCount += batch.BatchSize;
}
for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++)
{
auto& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
if (batch.Instances.Count() > 1)
instancedBatchesCount += batch.Instances.Count();
const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
instancesCount += batch.Instances.Count();
}
if (instancedBatchesCount != 0)
if (instancesCount != 0)
{
PROFILE_CPU_NAMED("Build Instancing");
_instanceBuffer.Clear();
_instanceBuffer.Data.Resize(instancedBatchesCount * sizeof(InstanceData));
auto instanceData = (InstanceData*)_instanceBuffer.Data.Get();
_instanceBuffer.Data.Resize(instancesCount * sizeof(ShaderObjectDrawInstanceData));
auto instanceData = (ShaderObjectDrawInstanceData*)_instanceBuffer.Data.Get();
// Write to instance buffer
for (int32 i = 0; i < list.Batches.Count(); i++)
{
auto& batch = batchesData[i];
const DrawBatch& batch = batchesData[i];
if (batch.BatchSize > 1)
{
IMaterial::InstancingHandler handler;
drawCallsData[listData[batch.StartIndex]].Material->CanUseInstancing(handler);
for (int32 j = 0; j < batch.BatchSize; j++)
{
auto& drawCall = drawCallsData[listData[batch.StartIndex + j]];
handler.WriteDrawCall(instanceData, drawCall);
instanceData->ObjectIndex = listData[batch.StartIndex + j];
instanceData++;
}
}
}
for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++)
{
auto& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
if (batch.Instances.Count() > 1)
const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
for (int32 j = 0; j < batch.Instances.Count(); j++)
{
Platform::MemoryCopy(instanceData, batch.Instances.Get(), batch.Instances.Count() * sizeof(InstanceData));
instanceData += batch.Instances.Count();
instanceData->ObjectIndex = batch.ObjectsStartIndex + j;
instanceData++;
}
}
ASSERT((byte*)instanceData == _instanceBuffer.Data.end());
// Upload data
_instanceBuffer.Flush(context);
ZoneValue(instancesCount);
}
else
{
@@ -752,132 +824,122 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
}
// Execute draw calls
int32 draws = list.Batches.Count();
int32 materialBinds = list.Batches.Count();
MaterialBase::BindParameters bindParams(context, renderContext);
bindParams.ObjectBuffer = drawCallsList->ObjectBuffer.GetBuffer()->View();
bindParams.Input = input;
bindParams.BindViewData();
MaterialShaderDataPerDraw perDraw;
perDraw.DrawPadding = Float3::Zero;
GPUConstantBuffer* perDrawCB = IMaterial::BindParameters::PerDrawConstants;
context->BindCB(2, perDrawCB); // TODO: use rootSignature/pushConstants on D3D12/Vulkan
constexpr int32 vbMax = ARRAY_COUNT(DrawCall::Geometry.VertexBuffers);
if (useInstancing)
{
GPUBuffer* vb[vbMax + 1];
uint32 vbOffsets[vbMax + 1];
vb[3] = _instanceBuffer.GetBuffer(); // Pass object index in a vertex stream at slot 3 (used by VS in Surface.shader)
vbOffsets[3] = 0;
int32 instanceBufferOffset = 0;
GPUBuffer* vb[4];
uint32 vbOffsets[4];
for (int32 i = 0; i < list.Batches.Count(); i++)
{
auto& batch = batchesData[i];
const DrawCall& drawCall = drawCallsData[listData[batch.StartIndex]];
const DrawBatch& batch = batchesData[i];
uint32 drawCallIndex = listData[batch.StartIndex];
const DrawCall& drawCall = drawCallsData[drawCallIndex];
int32 vbCount = 0;
while (vbCount < ARRAY_COUNT(drawCall.Geometry.VertexBuffers) && drawCall.Geometry.VertexBuffers[vbCount])
bindParams.Instanced = batch.BatchSize != 1;
bindParams.DrawCall = &drawCall;
bindParams.DrawCall->Material->Bind(bindParams);
if (bindParams.Instanced)
{
vb[vbCount] = drawCall.Geometry.VertexBuffers[vbCount];
vbOffsets[vbCount] = drawCall.Geometry.VertexBuffersOffsets[vbCount];
vbCount++;
}
for (int32 j = vbCount; j < ARRAY_COUNT(drawCall.Geometry.VertexBuffers); j++)
{
vb[vbCount] = nullptr;
vbOffsets[vbCount] = 0;
}
// One or more draw calls per batch
const DrawCall* activeDraw = &drawCall;
int32 activeCount = 1;
for (int32 j = 1; j <= batch.BatchSize; j++)
{
if (j != batch.BatchSize && DrawsEqual(activeDraw, drawCallsData + listData[batch.StartIndex + j]))
{
// Group two draw calls into active draw call
activeCount++;
continue;
}
bindParams.FirstDrawCall = &drawCall;
bindParams.DrawCallsCount = batch.BatchSize;
drawCall.Material->Bind(bindParams);
// Draw whole active draw (instanced)
Platform::MemoryCopy(vb, activeDraw->Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers));
Platform::MemoryCopy(vbOffsets, activeDraw->Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets));
context->BindIB(activeDraw->Geometry.IndexBuffer);
context->BindVB(ToSpan(vb, ARRAY_COUNT(vb)), vbOffsets);
context->DrawIndexedInstanced(activeDraw->Draw.IndicesCount, activeCount, instanceBufferOffset, 0, activeDraw->Draw.StartIndex);
instanceBufferOffset += activeCount;
context->BindIB(drawCall.Geometry.IndexBuffer);
if (drawCall.InstanceCount == 0)
{
// No support for batching indirect draw calls
ASSERT_LOW_LAYER(batch.BatchSize == 1);
context->BindVB(ToSpan(vb, vbCount), vbOffsets);
context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset);
// Reset active draw
activeDraw = drawCallsData + listData[batch.StartIndex + j];
activeCount = 1;
}
}
else
{
if (batch.BatchSize == 1)
// Pass object index in constant buffer
perDraw.DrawObjectIndex = drawCallIndex;
context->UpdateCB(perDrawCB, &perDraw);
// Single-draw call batch
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
if (drawCall.InstanceCount == 0)
{
context->BindVB(ToSpan(vb, vbCount), vbOffsets);
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.InstanceCount, 0, 0, drawCall.Draw.StartIndex);
context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset);
}
else
{
vbCount = 3;
vb[vbCount] = _instanceBuffer.GetBuffer();
vbOffsets[vbCount] = 0;
vbCount++;
context->BindVB(ToSpan(vb, vbCount), vbOffsets);
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.InstanceCount, instanceBufferOffset, 0, drawCall.Draw.StartIndex);
instanceBufferOffset += batch.BatchSize;
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.InstanceCount, 0, 0, drawCall.Draw.StartIndex);
}
}
}
for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++)
{
auto& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
auto& drawCall = batch.DrawCall;
const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
const DrawCall& drawCall = batch.DrawCall;
int32 vbCount = 0;
while (vbCount < ARRAY_COUNT(drawCall.Geometry.VertexBuffers) && drawCall.Geometry.VertexBuffers[vbCount])
{
vb[vbCount] = drawCall.Geometry.VertexBuffers[vbCount];
vbOffsets[vbCount] = drawCall.Geometry.VertexBuffersOffsets[vbCount];
vbCount++;
}
for (int32 j = vbCount; j < ARRAY_COUNT(drawCall.Geometry.VertexBuffers); j++)
{
vb[vbCount] = nullptr;
vbOffsets[vbCount] = 0;
}
bindParams.FirstDrawCall = &drawCall;
bindParams.DrawCallsCount = batch.Instances.Count();
drawCall.Material->Bind(bindParams);
bindParams.Instanced = true;
bindParams.DrawCall = &drawCall;
bindParams.DrawCall->Material->Bind(bindParams);
Platform::MemoryCopy(vb, drawCall.Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers));
Platform::MemoryCopy(vbOffsets, drawCall.Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets));
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(ToSpan(vb, vbMax + 1), vbOffsets);
if (drawCall.InstanceCount == 0)
{
ASSERT_LOW_LAYER(batch.Instances.Count() == 1);
context->BindVB(ToSpan(vb, vbCount), vbOffsets);
context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset);
}
else
{
if (batch.Instances.Count() == 1)
{
context->BindVB(ToSpan(vb, vbCount), vbOffsets);
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.Instances.Count(), 0, 0, drawCall.Draw.StartIndex);
}
else
{
vbCount = 3;
vb[vbCount] = _instanceBuffer.GetBuffer();
vbOffsets[vbCount] = 0;
vbCount++;
context->BindVB(ToSpan(vb, vbCount), vbOffsets);
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.Instances.Count(), instanceBufferOffset, 0, drawCall.Draw.StartIndex);
instanceBufferOffset += batch.Instances.Count();
}
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.Instances.Count(), instanceBufferOffset, 0, drawCall.Draw.StartIndex);
instanceBufferOffset += batch.Instances.Count();
}
}
draws += list.PreBatchedDrawCalls.Count();
materialBinds += list.PreBatchedDrawCalls.Count();
}
else
{
bindParams.DrawCallsCount = 1;
for (int32 i = 0; i < list.Batches.Count(); i++)
{
auto& batch = batchesData[i];
const DrawBatch& batch = batchesData[i];
bindParams.DrawCall = drawCallsData + listData[batch.StartIndex];
bindParams.DrawCall->Material->Bind(bindParams);
for (int32 j = 0; j < batch.BatchSize; j++)
{
const DrawCall& drawCall = drawCalls[listData[batch.StartIndex + j]];
bindParams.FirstDrawCall = &drawCall;
drawCall.Material->Bind(bindParams);
perDraw.DrawObjectIndex = listData[batch.StartIndex + j];
context->UpdateCB(perDrawCB, &perDraw);
const DrawCall& drawCall = drawCallsData[perDraw.DrawObjectIndex];
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, 3), drawCall.Geometry.VertexBuffersOffsets);
context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
if (drawCall.InstanceCount == 0)
{
@@ -891,43 +953,38 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
}
for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++)
{
auto& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
auto drawCall = batch.DrawCall;
drawCall.ObjectRadius = 0.0f;
bindParams.FirstDrawCall = &drawCall;
const auto* instancesData = batch.Instances.Get();
const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
const DrawCall& drawCall = batch.DrawCall;
bindParams.DrawCall = &drawCall;
bindParams.DrawCall->Material->Bind(bindParams);
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
for (int32 j = 0; j < batch.Instances.Count(); j++)
{
auto& instance = instancesData[j];
drawCall.ObjectPosition = instance.InstanceOrigin;
drawCall.PerInstanceRandom = instance.PerInstanceRandom;
auto lightmapArea = instance.InstanceLightmapArea.ToFloat4();
drawCall.Surface.LightmapUVsArea = *(Rectangle*)&lightmapArea;
drawCall.Surface.LODDitherFactor = instance.LODDitherFactor;
drawCall.World.SetRow1(Float4(instance.InstanceTransform1, 0.0f));
drawCall.World.SetRow2(Float4(instance.InstanceTransform2, 0.0f));
drawCall.World.SetRow3(Float4(instance.InstanceTransform3, 0.0f));
drawCall.World.SetRow4(Float4(instance.InstanceOrigin, 1.0f));
drawCall.Material->Bind(bindParams);
perDraw.DrawObjectIndex = batch.ObjectsStartIndex + j;
context->UpdateCB(perDrawCB, &perDraw);
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, 3), drawCall.Geometry.VertexBuffersOffsets);
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, drawCall.InstanceCount, 0, 0, drawCall.Draw.StartIndex);
}
draws += batch.Instances.Count();
}
materialBinds += list.PreBatchedDrawCalls.Count();
if (list.Batches.IsEmpty() && list.Indices.Count() != 0)
{
// Draw calls list has nto been batched so execute draw calls separately
// Draw calls list has bot been batched so execute draw calls separately
for (int32 j = 0; j < list.Indices.Count(); j++)
{
const DrawCall& drawCall = drawCalls[listData[j]];
bindParams.FirstDrawCall = &drawCall;
perDraw.DrawObjectIndex = listData[j];
context->UpdateCB(perDrawCB, &perDraw);
const DrawCall& drawCall = drawCallsData[perDraw.DrawObjectIndex];
bindParams.DrawCall = &drawCall;
drawCall.Material->Bind(bindParams);
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, 3), drawCall.Geometry.VertexBuffersOffsets);
context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
if (drawCall.InstanceCount == 0)
{
@@ -938,10 +995,10 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, drawCall.InstanceCount, 0, 0, drawCall.Draw.StartIndex);
}
}
draws += list.Indices.Count();
materialBinds += list.Indices.Count();
}
}
ZoneValue(draws);
ZoneValue(materialBinds); // Material shaders bindings count
}
void SurfaceDrawCallHandler::GetHash(const DrawCall& drawCall, uint32& batchKey)
@@ -971,14 +1028,3 @@ bool SurfaceDrawCallHandler::CanBatch(const DrawCall& a, const DrawCall& b, Draw
}
return false;
}
void SurfaceDrawCallHandler::WriteDrawCall(InstanceData* instanceData, const DrawCall& drawCall)
{
instanceData->InstanceOrigin = Float3(drawCall.World.M41, drawCall.World.M42, drawCall.World.M43);
instanceData->PerInstanceRandom = drawCall.PerInstanceRandom;
instanceData->InstanceTransform1 = Float3(drawCall.World.M11, drawCall.World.M12, drawCall.World.M13);
instanceData->LODDitherFactor = drawCall.Surface.LODDitherFactor;
instanceData->InstanceTransform2 = Float3(drawCall.World.M21, drawCall.World.M22, drawCall.World.M23);
instanceData->InstanceTransform3 = Float3(drawCall.World.M31, drawCall.World.M32, drawCall.World.M33);
instanceData->InstanceLightmapArea = Half4(drawCall.Surface.LightmapUVsArea);
}

View File

@@ -239,7 +239,8 @@ struct DrawBatch
struct BatchedDrawCall
{
DrawCall DrawCall;
Array<struct InstanceData, RendererAllocation> Instances;
uint16 ObjectsStartIndex = 0; // Index of the instances start in the ObjectsBuffer (set internally).
Array<struct ShaderObjectData, RendererAllocation> Instances;
};
/// <summary>
@@ -413,6 +414,11 @@ public:
/// </summary>
Float3 FrustumCornersVs[8];
/// <summary>
/// Objects buffer that contains ShaderObjectData for each DrawCall.
/// </summary>
DynamicTypedBuffer ObjectBuffer;
private:
DynamicVertexBuffer _instanceBuffer;
@@ -517,6 +523,11 @@ public:
/// <param name="sortOrder">Object sorting key.</param>
void AddDrawCall(const RenderContextBatch& renderContextBatch, DrawPass drawModes, StaticFlags staticFlags, ShadowsCastingMode shadowsMode, const BoundingSphere& bounds, DrawCall& drawCall, bool receivesDecals = true, int8 sortOrder = 0);
/// <summary>
/// Writes all draw calls into large objects buffer (used for random-access object data access on a GPU). Can be executed in async.
/// </summary>
void BuildObjectsBuffer();
/// <summary>
/// Sorts the collected draw calls list.
/// </summary>
@@ -549,7 +560,7 @@ public:
/// <param name="input">The input scene color. It's optional and used in forward/postFx rendering.</param>
API_FUNCTION() FORCE_INLINE void ExecuteDrawCalls(API_PARAM(Ref) const RenderContext& renderContext, DrawCallsListType listType, GPUTextureView* input = nullptr)
{
ExecuteDrawCalls(renderContext, DrawCallsLists[(int32)listType], DrawCalls, input);
ExecuteDrawCalls(renderContext, DrawCallsLists[(int32)listType], this, input);
}
/// <summary>
@@ -560,7 +571,7 @@ public:
/// <param name="input">The input scene color. It's optional and used in forward/postFx rendering.</param>
FORCE_INLINE void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, GPUTextureView* input = nullptr)
{
ExecuteDrawCalls(renderContext, list, DrawCalls, input);
ExecuteDrawCalls(renderContext, list, this, input);
}
/// <summary>
@@ -568,28 +579,43 @@ public:
/// </summary>
/// <param name="renderContext">The rendering context.</param>
/// <param name="list">The collected draw calls indices list.</param>
/// <param name="drawCalls">The collected draw calls list.</param>
/// <param name="drawCallsList">The collected draw calls list owner.</param>
/// <param name="input">The input scene color. It's optional and used in forward/postFx rendering.</param>
void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls, GPUTextureView* input);
void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, RenderList* drawCallsList, GPUTextureView* input);
};
/// <summary>
/// Represents data per instance element used for instanced rendering.
/// Represents a single object information for GPU rendering.
/// </summary>
PACK_STRUCT(struct FLAXENGINE_API InstanceData
GPU_CB_STRUCT(ShaderObjectData
{
Float3 InstanceOrigin;
float PerInstanceRandom;
Float3 InstanceTransform1;
float LODDitherFactor;
Float3 InstanceTransform2;
Float3 InstanceTransform3;
Half4 InstanceLightmapArea;
Float4 Raw[8];
void FLAXENGINE_API Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Rectangle& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom = 0.0f, float worldDeterminantSign = 1.0f, float lodDitherFactor = 0.0f);
void FLAXENGINE_API Load(Matrix& worldMatrix, Matrix& prevWorldMatrix, Rectangle& lightmapUVsArea, Float3& geometrySize, float& perInstanceRandom, float& worldDeterminantSign, float& lodDitherFactor) const;
FORCE_INLINE void Store(const DrawCall& drawCall)
{
Store(drawCall.World, drawCall.Surface.PrevWorld, drawCall.Surface.LightmapUVsArea, drawCall.Surface.GeometrySize, drawCall.PerInstanceRandom, drawCall.WorldDeterminantSign, drawCall.Surface.LODDitherFactor);
}
FORCE_INLINE void Load(DrawCall& drawCall) const
{
Load(drawCall.World, drawCall.Surface.PrevWorld, drawCall.Surface.LightmapUVsArea, drawCall.Surface.GeometrySize, drawCall.PerInstanceRandom, drawCall.WorldDeterminantSign, drawCall.Surface.LODDitherFactor);
drawCall.ObjectPosition = drawCall.World.GetTranslation();
}
});
/// <summary>
/// Represents data passed to Vertex Shader used for instanced rendering (per-instance element).
/// </summary>
PACK_STRUCT(struct ShaderObjectDrawInstanceData
{
uint32 ObjectIndex;
});
struct SurfaceDrawCallHandler
{
static void GetHash(const DrawCall& drawCall, uint32& batchKey);
static bool CanBatch(const DrawCall& a, const DrawCall& b, DrawPass pass);
static void WriteDrawCall(InstanceData* instanceData, const DrawCall& drawCall);
};

View File

@@ -428,6 +428,9 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
// Sort draw calls
{
PROFILE_CPU_NAMED("Sort Draw Calls");
// TODO: run all of these functions in async via jobs
for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++)
renderContextBatch.Contexts[i].List->BuildObjectsBuffer();
renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBuffer);
renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBufferNoDecals);
renderContext.List->SortDrawCalls(renderContext, true, DrawCallsListType::Forward);
@@ -440,6 +443,11 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth);
shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth);
}
{
PROFILE_CPU_NAMED("FlushObjectsBuffer");
for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++)
renderContextBatch.Contexts[i].List->ObjectBuffer.Flush(context);
}
}
// Get the light accumulation buffer

View File

@@ -1390,7 +1390,7 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch)
if (!shadowContextStatic.List->DrawCallsLists[(int32)DrawCallsListType::Depth].IsEmpty() || !shadowContextStatic.List->ShadowDepthDrawCallsList.IsEmpty())
{
shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, DrawCallsListType::Depth);
shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, shadowContextStatic.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr);
shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, shadowContextStatic.List->ShadowDepthDrawCallsList, renderContext.List, nullptr);
tile.HasStaticGeometry = true;
}
}
@@ -1452,7 +1452,7 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch)
// Draw objects depth
auto& shadowContext = renderContextBatch.Contexts[atlasLight.ContextIndex + contextIndex++];
shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List, nullptr);
if (atlasLight.HasStaticShadowContext)
{
auto& shadowContextStatic = renderContextBatch.Contexts[atlasLight.ContextIndex + contextIndex++];
@@ -1462,7 +1462,7 @@ void ShadowsPass::RenderShadowMaps(RenderContextBatch& renderContextBatch)
{
// Draw static objects directly to the shadow map
shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, DrawCallsListType::Depth);
shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, shadowContextStatic.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr);
shadowContextStatic.List->ExecuteDrawCalls(shadowContextStatic, shadowContextStatic.List->ShadowDepthDrawCallsList, renderContext.List, nullptr);
}
tile.HasStaticGeometry = true;
}

View File

@@ -408,7 +408,6 @@ void VolumetricFogPass::Render(RenderContext& renderContext)
InitCircleBuffer();
MaterialBase::BindParameters bindParams(context, renderContext);
bindParams.DrawCallsCount = 1;
CustomData customData;
customData.Shader = _shader->GetShader();
customData.GridSize = cache.GridSize;
@@ -435,7 +434,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext)
// Setup material shader data
customData.ParticleIndex = drawCall.Particle.VolumetricFog.ParticleIndex;
bindParams.FirstDrawCall = &drawCall;
bindParams.DrawCall = &drawCall;
drawCall.Material->Bind(bindParams);
// Setup volumetric shader data

View File

@@ -461,7 +461,7 @@ bool MaterialGenerator::Generate(WriteStream& source, MaterialInfo& materialInfo
switch (baseLayer->Domain)
{
case MaterialDomain::Surface:
srv = 2; // Skinning Bones + Prev Bones
srv = 3; // Objects + Skinning Bones + Prev Bones
break;
case MaterialDomain::Decal:
srv = 1; // Depth buffer

View File

@@ -58,6 +58,9 @@
#ifndef USE_PER_VIEW_CONSTANTS
#define USE_PER_VIEW_CONSTANTS 0
#endif
#ifndef USE_PER_DRAW_CONSTANTS
#define USE_PER_DRAW_CONSTANTS 0
#endif
#ifndef MATERIAL_TESSELLATION
#define MATERIAL_TESSELLATION MATERIAL_TESSELLATION_NONE
#endif
@@ -68,6 +71,65 @@
#define PER_BONE_MOTION_BLUR 0
#endif
// Object properties
struct ObjectData
{
float4x4 WorldMatrix;
float4x4 PrevWorldMatrix;
float3 GeometrySize;
float WorldDeterminantSign;
float LODDitherFactor;
float PerInstanceRandom;
float4 LightmapArea;
};
float2 UnpackHalf2(uint xy)
{
return float2(f16tof32(xy & 0xffff), f16tof32(xy >> 16));
}
// Loads the object data from the global buffer
ObjectData LoadObject(Buffer<float4> objectsBuffer, uint objectIndex)
{
// This must match ShaderObjectData::Store
objectIndex *= 8;
ObjectData object = (ObjectData)0;
float4 vector0 = objectsBuffer.Load(objectIndex + 0);
float4 vector1 = objectsBuffer.Load(objectIndex + 1);
float4 vector2 = objectsBuffer.Load(objectIndex + 2);
float4 vector3 = objectsBuffer.Load(objectIndex + 3);
float4 vector4 = objectsBuffer.Load(objectIndex + 4);
float4 vector5 = objectsBuffer.Load(objectIndex + 5);
float4 vector6 = objectsBuffer.Load(objectIndex + 6);
float4 vector7 = objectsBuffer.Load(objectIndex + 7);
object.WorldMatrix[0] = float4(vector0.xyz, 0.0f);
object.WorldMatrix[1] = float4(vector1.xyz, 0.0f);
object.WorldMatrix[2] = float4(vector2.xyz, 0.0f);
object.WorldMatrix[3] = float4(vector0.w, vector1.w, vector2.w, 1.0f);
object.PrevWorldMatrix[0] = float4(vector3.xyz, 0.0f);
object.PrevWorldMatrix[1] = float4(vector4.xyz, 0.0f);
object.PrevWorldMatrix[2] = float4(vector5.xyz, 0.0f);
object.PrevWorldMatrix[3] = float4(vector3.w, vector4.w, vector5.w, 1.0f);
object.GeometrySize = vector6.xyz;
object.PerInstanceRandom = vector6.w;
object.WorldDeterminantSign = vector7.x;
object.LODDitherFactor = vector7.y;
object.LightmapArea.xy = UnpackHalf2(asuint(vector7.z));
object.LightmapArea.zw = UnpackHalf2(asuint(vector7.w));
return object;
}
// Loads the object data from the constant buffer into the variable
#define LoadObjectFromCB(var) \
var = (ObjectData)0; \
var.WorldMatrix = ToMatrix4x4(WorldMatrix); \
var.PrevWorldMatrix = ToMatrix4x4(PrevWorldMatrix); \
var.GeometrySize = GeometrySize; \
var.PerInstanceRandom = PerInstanceRandom; \
var.WorldDeterminantSign = WorldDeterminantSign; \
var.LODDitherFactor = LODDitherFactor; \
var.LightmapArea = LightmapArea;
// Material properties
struct Material
{
@@ -110,6 +172,15 @@ cbuffer ViewData : register(b1)
};
#endif
// Draw pipeline constant buffer (with per-draw constants at slot 2)
#if USE_PER_DRAW_CONSTANTS
cbuffer DrawData : register(b2)
{
float3 DrawPadding;
uint DrawObjectIndex;
};
#endif
struct ModelInput
{
float3 Position : POSITION;
@@ -121,11 +192,7 @@ struct ModelInput
half4 Color : COLOR;
#endif
#if USE_INSTANCING
float4 InstanceOrigin : ATTRIBUTE0; // .w contains PerInstanceRandom
float4 InstanceTransform1 : ATTRIBUTE1; // .w contains LODDitherFactor
float3 InstanceTransform2 : ATTRIBUTE2;
float3 InstanceTransform3 : ATTRIBUTE3;
half4 InstanceLightmapArea : ATTRIBUTE4;
uint ObjectIndex : ATTRIBUTE0;
#endif
};
@@ -133,11 +200,7 @@ struct ModelInput_PosOnly
{
float3 Position : POSITION;
#if USE_INSTANCING
float4 InstanceOrigin : ATTRIBUTE0; // .w contains PerInstanceRandom
float4 InstanceTransform1 : ATTRIBUTE1; // .w contains LODDitherFactor
float3 InstanceTransform2 : ATTRIBUTE2;
float3 InstanceTransform3 : ATTRIBUTE3;
half4 InstanceLightmapArea : ATTRIBUTE4;
uint ObjectIndex : ATTRIBUTE0;
#endif
};