Optimize Foliage rendering with manual draw calls submission

This commit is contained in:
Wojtek Figat
2021-07-02 00:13:54 +02:00
parent fe2e17e353
commit 44c4ab6580
3 changed files with 186 additions and 7 deletions

View File

@@ -13,6 +13,9 @@ struct FoliageInstance;
// Enables using single quad-tree acceleration structure per foliage actor, otherwise will use quad-tree per foliage type to optimize drawing performance at a cost of higher memory usage.
#define FOLIAGE_USE_SINGLE_QUAD_TREE 0
// Enables using manual draw calls batching instead of using automated generic solution in RenderList. Boosts performance for large foliage.
#define FOLIAGE_USE_DRAW_CALLS_BATCHING 1
// Size of the instance allocation chunks (number of instances per allocated page)
#define FOLIAGE_INSTANCE_CHUNKS_SIZE (4096*4)

View File

@@ -7,6 +7,11 @@
#include "Engine/Core/Random.h"
#include "Engine/Engine/Engine.h"
#include "Engine/Graphics/RenderTask.h"
#if !FOLIAGE_USE_SINGLE_QUAD_TREE && FOLIAGE_USE_DRAW_CALLS_BATCHING
#include "Engine/Graphics/RenderTools.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Renderer/RenderList.h"
#endif
#include "Engine/Level/SceneQuery.h"
#include "Engine/Profiler/ProfilerCPU.h"
#include "Engine/Serialization/Serialization.h"
@@ -74,6 +79,171 @@ void Foliage::AddToCluster(ChunkedArray<FoliageCluster, FOLIAGE_CLUSTER_CHUNKS_S
}
}
#if !FOLIAGE_USE_SINGLE_QUAD_TREE && FOLIAGE_USE_DRAW_CALLS_BATCHING
void Foliage::DrawInstance(RenderContext& renderContext, FoliageInstance& instance, FoliageType& type, Model* model, const ModelLOD& modelLod, float lodDitherFactor)
{
for (const auto& mesh : modelLod.Meshes)
{
const auto& entry = type.Entries[mesh.GetMaterialSlotIndex()];
if (!entry.Visible || !mesh.IsInitialized())
return;
const MaterialSlot& slot = model->MaterialSlots[mesh.GetMaterialSlotIndex()];
const auto shadowsMode = static_cast<ShadowsCastingMode>(entry.ShadowsMode & slot.ShadowsMode);
const auto drawModes = static_cast<DrawPass>(type._drawModes & renderContext.View.GetShadowsDrawPassMask(shadowsMode));
// Select material
MaterialBase* material;
if (entry.Material && entry.Material->IsLoaded())
material = entry.Material;
else if (slot.Material && slot.Material->IsLoaded())
material = slot.Material;
else
material = GPUDevice::Instance->GetDefaultMaterial();
if (!material || !material->IsSurface() || drawModes == DrawPass::None)
return;
// Submit draw call
DrawCall drawCall;
mesh.GetDrawCallGeometry(drawCall);
drawCall.InstanceCount = 1;
drawCall.Material = material;
drawCall.World = instance.World;
drawCall.ObjectPosition = drawCall.World.GetTranslation();
drawCall.Surface.GeometrySize = mesh.GetBox().GetSize();
drawCall.Surface.PrevWorld = instance.World;
drawCall.Surface.Lightmap = _staticFlags & StaticFlags::Lightmap ? _scene->LightmapsData.GetReadyLightmap(instance.Lightmap.TextureIndex) : nullptr;
drawCall.Surface.LightmapUVsArea = instance.Lightmap.UVsArea;
drawCall.Surface.Skinning = nullptr;
drawCall.Surface.LODDitherFactor = lodDitherFactor;
drawCall.WorldDeterminantSign = 1;
drawCall.PerInstanceRandom = instance.Random;
renderContext.List->AddDrawCall(drawModes, _staticFlags, drawCall, entry.ReceiveDecals);
}
}
void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, FoliageType& type)
{
// Skip clusters that around too far from view
if (Vector3::Distance(renderContext.View.Position, cluster->TotalBoundsSphere.Center) - cluster->TotalBoundsSphere.Radius > cluster->MaxCullDistance)
return;
//DebugDraw::DrawBox(cluster->Bounds, Color::Red);
// Draw visible children
if (cluster->Children[0])
{
// Don't store instances in non-leaf nodes
ASSERT_LOW_LAYER(cluster->Instances.IsEmpty());
#define DRAW_CLUSTER(idx) \
if (renderContext.View.CullingFrustum.Intersects(cluster->Children[idx]->TotalBounds)) \
DrawCluster(renderContext, cluster->Children[idx], type)
DRAW_CLUSTER(0);
DRAW_CLUSTER(1);
DRAW_CLUSTER(2);
DRAW_CLUSTER(3);
#undef DRAW_CLUSTER
}
else
{
// Draw visible instances
const auto frame = Engine::FrameCount;
const auto model = type.Model.Get();
for (int32 i = 0; i < cluster->Instances.Count(); i++)
{
auto& instance = *cluster->Instances[i];
if (Vector3::Distance(renderContext.View.Position, instance.Bounds.Center) - instance.Bounds.Radius < instance.CullDistance &&
renderContext.View.CullingFrustum.Intersects(instance.Bounds))
{
const auto modelFrame = instance.DrawState.PrevFrame + 1;
// Select a proper LOD index (model may be culled)
int32 lodIndex = RenderTools::ComputeModelLOD(model, instance.Bounds.Center, instance.Bounds.Radius, renderContext);
if (lodIndex == -1)
{
// Handling model fade-out transition
if (modelFrame == frame && instance.DrawState.PrevLOD != -1)
{
// Check if start transition
if (instance.DrawState.LODTransition == 255)
{
instance.DrawState.LODTransition = 0;
}
RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition);
// Check if end transition
if (instance.DrawState.LODTransition == 255)
{
instance.DrawState.PrevLOD = lodIndex;
}
else
{
const auto prevLOD = model->ClampLODIndex(instance.DrawState.PrevLOD);
const float normalizedProgress = static_cast<float>(instance.DrawState.LODTransition) * (1.0f / 255.0f);
DrawInstance(renderContext, instance, type, model, model->LODs[prevLOD], normalizedProgress);
}
}
instance.DrawState.PrevFrame = frame;
continue;
}
lodIndex += renderContext.View.ModelLODBias;
lodIndex = model->ClampLODIndex(lodIndex);
// Check if it's the new frame and could update the drawing state (note: model instance could be rendered many times per frame to different viewports)
if (modelFrame == frame)
{
// Check if start transition
if (instance.DrawState.PrevLOD != lodIndex && instance.DrawState.LODTransition == 255)
{
instance.DrawState.LODTransition = 0;
}
RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition);
// Check if end transition
if (instance.DrawState.LODTransition == 255)
{
instance.DrawState.PrevLOD = lodIndex;
}
}
// Check if there was a gap between frames in drawing this model instance
else if (modelFrame < frame || instance.DrawState.PrevLOD == -1)
{
// Reset state
instance.DrawState.PrevLOD = lodIndex;
instance.DrawState.LODTransition = 255;
}
// Draw
if (instance.DrawState.PrevLOD == lodIndex)
{
DrawInstance(renderContext, instance, type, model, model->LODs[lodIndex], 0.0f);
}
else if (instance.DrawState.PrevLOD == -1)
{
const float normalizedProgress = static_cast<float>(instance.DrawState.LODTransition) * (1.0f / 255.0f);
DrawInstance(renderContext, instance, type, model, model->LODs[lodIndex], 1.0f - normalizedProgress);
}
else
{
const auto prevLOD = model->ClampLODIndex(instance.DrawState.PrevLOD);
const float normalizedProgress = static_cast<float>(instance.DrawState.LODTransition) * (1.0f / 255.0f);
DrawInstance(renderContext, instance, type, model, model->LODs[prevLOD], normalizedProgress);
DrawInstance(renderContext, instance, type, model, model->LODs[lodIndex], normalizedProgress - 1.0f);
}
//DebugDraw::DrawSphere(instance.Bounds, Color::YellowGreen);
instance.DrawState.PrevFrame = frame;
}
}
}
}
#else
void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, Mesh::DrawInfo& draw)
{
// Skip clusters that around too far from view
@@ -115,7 +285,7 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster,
instance.DrawState.PrevWorld = instance.World;
// Draw model
draw.Lightmap = GetScene()->LightmapsData.GetReadyLightmap(instance.Lightmap.TextureIndex);
draw.Lightmap = _scene->LightmapsData.GetReadyLightmap(instance.Lightmap.TextureIndex);
draw.LightmapUVs = &instance.Lightmap.UVsArea;
draw.Buffer = &type.Entries;
draw.World = &instance.World;
@@ -133,6 +303,8 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster,
}
}
#endif
int32 Foliage::GetInstancesCount() const
{
return Instances.Count();
@@ -640,13 +812,11 @@ void Foliage::Draw(RenderContext& renderContext)
const auto drawModes = static_cast<DrawPass>(type.DrawModes & view.Pass & (int32)view.GetShadowsDrawPassMask(type.ShadowsMode));
type._canDraw = type.IsReady() && drawModes != DrawPass::None;
type._drawModes = drawModes;
if (type._canDraw)
{
for (int32 j = 0; j < type.Entries.Count(); j++)
{
auto& e = type.Entries[j];
e.ReceiveDecals = type.ReceiveDecals != 0;
e.ShadowsMode = type.ShadowsMode;
}
@@ -654,21 +824,22 @@ void Foliage::Draw(RenderContext& renderContext)
}
// Draw visible clusters
#if FOLIAGE_USE_SINGLE_QUAD_TREE
Mesh::DrawInfo draw;
draw.Flags = GetStaticFlags();
draw.DrawModes = (DrawPass)(DrawPass::Default & view.Pass);
draw.LODBias = 0;
draw.ForcedLOD = -1;
draw.VertexColors = nullptr;
#if FOLIAGE_USE_SINGLE_QUAD_TREE
if (Root)
DrawCluster(renderContext, Root, draw);
#else
for (auto& type : FoliageTypes)
{
if (!type.Root)
continue;
DrawCluster(renderContext, type.Root, draw);
if (type.Root && type._canDraw && type.Model->CanBeRendered())
{
DrawCluster(renderContext, type.Root, type);
}
}
#endif
}

View File

@@ -152,7 +152,12 @@ public:
private:
void AddToCluster(ChunkedArray<FoliageCluster, FOLIAGE_CLUSTER_CHUNKS_SIZE>& clusters, FoliageCluster* cluster, FoliageInstance& instance);
#if !FOLIAGE_USE_SINGLE_QUAD_TREE && FOLIAGE_USE_DRAW_CALLS_BATCHING
void DrawInstance(RenderContext& renderContext, FoliageInstance& instance, FoliageType& type, Model* model, const ModelLOD& modelLod, float lodDitherFactor);
void DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, FoliageType& type);
#else
void DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, Mesh::DrawInfo& draw);
#endif
public: