Optimize world matrix storage for drawable objects to use Matrix3x4 instead of full matrix

This commit is contained in:
Wojtek Figat
2024-03-26 11:29:01 +01:00
parent cdbb2cc813
commit 2dfb1058b2
15 changed files with 134 additions and 105 deletions

View File

@@ -2,6 +2,7 @@
#include "Matrix.h"
#include "Matrix3x3.h"
#include "Matrix3x4.h"
#include "Vector2.h"
#include "Quaternion.h"
#include "Transform.h"
@@ -887,3 +888,39 @@ Float4 Matrix::TransformPosition(const Matrix& m, const Float4& v)
m.Values[0][3] * v.Raw[0] + m.Values[1][3] * v.Raw[1] + m.Values[2][3] * v.Raw[2] + m.Values[3][3] * v.Raw[3]
);
}
void Matrix3x4::SetMatrix(const Matrix& m)
{
const float* src = m.Raw;
float* dst = Raw;
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
dst[4] = src[4];
dst[5] = src[5];
dst[6] = src[6];
dst[7] = src[7];
dst[8] = src[8];
dst[9] = src[9];
dst[10] = src[10];
dst[11] = src[11];
}
void Matrix3x4::SetMatrixTranspose(const Matrix& m)
{
const float* src = m.Raw;
float* dst = Raw;
dst[0] = src[0];
dst[1] = src[4];
dst[2] = src[8];
dst[3] = src[12];
dst[4] = src[1];
dst[5] = src[5];
dst[6] = src[9];
dst[7] = src[13];
dst[8] = src[2];
dst[9] = src[6];
dst[10] = src[10];
dst[11] = src[14];
}

View File

@@ -9,43 +9,14 @@
/// </summary>
struct FLAXENGINE_API Matrix3x4
{
float M[3][4];
void SetMatrix(const Matrix& m)
union
{
const float* src = m.Raw;
float* dst = &M[0][0];
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
dst[4] = src[4];
dst[5] = src[5];
dst[6] = src[6];
dst[7] = src[7];
dst[8] = src[8];
dst[9] = src[9];
dst[10] = src[10];
dst[11] = src[11];
}
float Values[3][4];
float Raw[12];
};
void SetMatrixTranspose(const Matrix& m)
{
const float* src = m.Raw;
float* dst = &M[0][0];
dst[0] = src[0];
dst[1] = src[4];
dst[2] = src[8];
dst[3] = src[12];
dst[4] = src[1];
dst[5] = src[5];
dst[6] = src[9];
dst[7] = src[13];
dst[8] = src[2];
dst[9] = src[6];
dst[10] = src[10];
dst[11] = src[14];
}
void SetMatrix(const Matrix& m);
void SetMatrixTranspose(const Matrix& m);
};
template<>

View File

@@ -3,6 +3,7 @@
#include "DeferredMaterialShader.h"
#include "MaterialShaderFeatures.h"
#include "MaterialParams.h"
#include "Engine/Core/Math/Matrix3x4.h"
#include "Engine/Graphics/RenderBuffers.h"
#include "Engine/Graphics/RenderView.h"
#include "Engine/Renderer/DrawCall.h"
@@ -17,8 +18,8 @@
#include "Engine/Graphics/RenderTask.h"
PACK_STRUCT(struct DeferredMaterialShaderData {
Matrix WorldMatrix;
Matrix PrevWorldMatrix;
Matrix3x4 WorldMatrix;
Matrix3x4 PrevWorldMatrix;
Float2 Dummy0;
float LODDitherFactor;
float PerInstanceRandom;
@@ -70,8 +71,8 @@ void DeferredMaterialShader::Bind(BindParameters& params)
// Setup material constants
{
Matrix::Transpose(drawCall.World, materialData->WorldMatrix);
Matrix::Transpose(drawCall.Surface.PrevWorld, materialData->PrevWorldMatrix);
materialData->WorldMatrix.SetMatrixTranspose(drawCall.World);
materialData->PrevWorldMatrix.SetMatrixTranspose(drawCall.Surface.PrevWorld);
materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign;
materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor;
materialData->PerInstanceRandom = drawCall.PerInstanceRandom;

View File

@@ -3,6 +3,7 @@
#include "ForwardMaterialShader.h"
#include "MaterialShaderFeatures.h"
#include "MaterialParams.h"
#include "Engine/Core/Math/Matrix3x4.h"
#include "Engine/Graphics/GPUContext.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPULimits.h"
@@ -18,8 +19,8 @@
#endif
PACK_STRUCT(struct ForwardMaterialShaderData {
Matrix WorldMatrix;
Matrix PrevWorldMatrix;
Matrix3x4 WorldMatrix;
Matrix3x4 PrevWorldMatrix;
Float2 Dummy0;
float LODDitherFactor;
float PerInstanceRandom;
@@ -76,8 +77,8 @@ void ForwardMaterialShader::Bind(BindParameters& params)
// Setup material constants
{
Matrix::Transpose(drawCall.World, materialData->WorldMatrix);
Matrix::Transpose(drawCall.Surface.PrevWorld, materialData->PrevWorldMatrix);
materialData->WorldMatrix.SetMatrixTranspose(drawCall.World);
materialData->PrevWorldMatrix.SetMatrixTranspose(drawCall.Surface.PrevWorld);
materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign;
materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor;
materialData->PerInstanceRandom = drawCall.PerInstanceRandom;

View File

@@ -10,7 +10,7 @@
/// <summary>
/// Current materials shader version.
/// </summary>
#define MATERIAL_GRAPH_VERSION 161
#define MATERIAL_GRAPH_VERSION 162
class Material;
class GPUShader;

View File

@@ -3,6 +3,7 @@
#include "ParticleMaterialShader.h"
#include "MaterialShaderFeatures.h"
#include "MaterialParams.h"
#include "Engine/Core/Math/Matrix3x4.h"
#include "Engine/Renderer/DrawCall.h"
#include "Engine/Renderer/RenderList.h"
#include "Engine/Graphics/RenderView.h"
@@ -15,7 +16,7 @@
#include "Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.h"
PACK_STRUCT(struct ParticleMaterialShaderData {
Matrix WorldMatrix;
Matrix3x4 WorldMatrix;
uint32 SortedIndicesOffset;
float PerInstanceRandom;
int32 ParticleStride;
@@ -34,7 +35,7 @@ PACK_STRUCT(struct ParticleMaterialShaderData {
int32 RibbonTwistOffset;
int32 RibbonFacingVectorOffset;
uint32 RibbonSegmentCount;
Matrix WorldMatrixInverseTransposed;
Matrix3x4 WorldMatrixInverseTransposed;
});
DrawPass ParticleMaterialShader::GetDrawModes() const
@@ -101,7 +102,7 @@ void ParticleMaterialShader::Bind(BindParameters& params)
static StringView ParticleScaleOffset(TEXT("Scale"));
static StringView ParticleModelFacingModeOffset(TEXT("ModelFacingMode"));
Matrix::Transpose(drawCall.World, materialData->WorldMatrix);
materialData->WorldMatrix.SetMatrixTranspose(drawCall.World);
materialData->SortedIndicesOffset = drawCall.Particle.Particles->GPU.SortedIndices && params.RenderContext.View.Pass != DrawPass::Depth ? sortedIndicesOffset : 0xFFFFFFFF;
materialData->PerInstanceRandom = drawCall.PerInstanceRandom;
materialData->ParticleStride = drawCall.Particle.Particles->Stride;
@@ -113,7 +114,9 @@ void ParticleMaterialShader::Bind(BindParameters& params)
materialData->RotationOffset = drawCall.Particle.Particles->Layout->FindAttributeOffset(ParticleRotationOffset, ParticleAttribute::ValueTypes::Float3, -1);
materialData->ScaleOffset = drawCall.Particle.Particles->Layout->FindAttributeOffset(ParticleScaleOffset, ParticleAttribute::ValueTypes::Float3, -1);
materialData->ModelFacingModeOffset = drawCall.Particle.Particles->Layout->FindAttributeOffset(ParticleModelFacingModeOffset, ParticleAttribute::ValueTypes::Int, -1);
Matrix::Invert(drawCall.World, materialData->WorldMatrixInverseTransposed);
Matrix worldMatrixInverseTransposed;
Matrix::Invert(drawCall.World, worldMatrixInverseTransposed);
materialData->WorldMatrixInverseTransposed.SetMatrix(worldMatrixInverseTransposed);
}
// Select pipeline state based on current pass and render mode

View File

@@ -3,6 +3,7 @@
#include "TerrainMaterialShader.h"
#include "MaterialShaderFeatures.h"
#include "MaterialParams.h"
#include "Engine/Core/Math/Matrix3x4.h"
#include "Engine/Graphics/GPUContext.h"
#include "Engine/Graphics/GPULimits.h"
#include "Engine/Graphics/GPUDevice.h"
@@ -16,7 +17,7 @@
#include "Engine/Terrain/TerrainPatch.h"
PACK_STRUCT(struct TerrainMaterialShaderData {
Matrix WorldMatrix;
Matrix3x4 WorldMatrix;
Float3 WorldInvScale;
float WorldDeterminantSign;
float PerInstanceRandom;
@@ -66,7 +67,7 @@ void TerrainMaterialShader::Bind(BindParameters& params)
// Setup material constants
{
Matrix::Transpose(drawCall.World, materialData->WorldMatrix);
materialData->WorldMatrix.SetMatrixTranspose(drawCall.World);
const float scaleX = Float3(drawCall.World.M11, drawCall.World.M12, drawCall.World.M13).Length();
const float scaleY = Float3(drawCall.World.M21, drawCall.World.M22, drawCall.World.M23).Length();
const float scaleZ = Float3(drawCall.World.M31, drawCall.World.M32, drawCall.World.M33).Length();

View File

@@ -3,6 +3,7 @@
#include "VolumeParticleMaterialShader.h"
#include "MaterialShaderFeatures.h"
#include "MaterialParams.h"
#include "Engine/Core/Math/Matrix3x4.h"
#include "Engine/Renderer/DrawCall.h"
#include "Engine/Renderer/VolumetricFogPass.h"
#include "Engine/Renderer/RenderList.h"
@@ -16,8 +17,8 @@
PACK_STRUCT(struct VolumeParticleMaterialShaderData {
Matrix InverseViewProjectionMatrix;
Matrix WorldMatrix;
Matrix WorldMatrixInverseTransposed;
Matrix3x4 WorldMatrix;
Matrix3x4 WorldMatrixInverseTransposed;
Float3 GridSize;
float PerInstanceRandom;
float Dummy0;
@@ -76,8 +77,10 @@ void VolumeParticleMaterialShader::Bind(BindParameters& params)
// Setup material constants
{
Matrix::Transpose(view.IVP, materialData->InverseViewProjectionMatrix);
Matrix::Transpose(drawCall.World, materialData->WorldMatrix);
Matrix::Invert(drawCall.World, materialData->WorldMatrixInverseTransposed);
materialData->WorldMatrix.SetMatrixTranspose(drawCall.World);
Matrix worldMatrixInverseTransposed;
Matrix::Invert(drawCall.World, worldMatrixInverseTransposed);
materialData->WorldMatrixInverseTransposed.SetMatrix(worldMatrixInverseTransposed);
materialData->GridSize = customData->GridSize;
materialData->PerInstanceRandom = drawCall.PerInstanceRandom;
materialData->VolumetricFogMaxDistance = customData->VolumetricFogMaxDistance;

View File

@@ -3,6 +3,7 @@
#include "GlobalSignDistanceFieldPass.h"
#include "RenderList.h"
#include "Engine/Core/Math/Vector3.h"
#include "Engine/Core/Math/Matrix3x4.h"
#include "Engine/Core/Collections/HashSet.h"
#include "Engine/Engine/Engine.h"
#include "Engine/Content/Content.h"
@@ -39,8 +40,8 @@ static_assert(GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT % 4 == 0, "Must be multiple o
PACK_STRUCT(struct ObjectRasterizeData
{
Matrix WorldToVolume; // TODO: use 3x4 matrix
Matrix VolumeToWorld; // TODO: use 3x4 matrix
Matrix3x4 WorldToVolume;
Matrix3x4 VolumeToWorld;
Float3 VolumeToUVWMul;
float MipOffset;
Float3 VolumeToUVWAdd;
@@ -670,15 +671,15 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
// Add object data for the GPU buffer
uint16 dataIndex = _objectsBufferCount++;
ObjectRasterizeData objectData;
Matrix localToWorldM, worldToLocal, volumeToWorld;
Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - _sdfData->Origin, localToWorldM);
Matrix::Invert(localToWorldM, worldToLocal);
Matrix localToWorld, worldToLocal, volumeToWorld;
Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - _sdfData->Origin, localToWorld);
Matrix::Invert(localToWorld, worldToLocal);
BoundingBox localVolumeBounds(object.SDF->LocalBoundsMin, object.SDF->LocalBoundsMax);
Float3 volumeLocalBoundsExtent = localVolumeBounds.GetSize() * 0.5f;
Matrix worldToVolume = worldToLocal * Matrix::Translation(-(localVolumeBounds.Minimum + volumeLocalBoundsExtent));
Matrix::Invert(worldToVolume, volumeToWorld);
Matrix::Transpose(worldToVolume, objectData.WorldToVolume);
Matrix::Transpose(volumeToWorld, objectData.VolumeToWorld);
objectData.WorldToVolume.SetMatrixTranspose(worldToVolume);
objectData.VolumeToWorld.SetMatrixTranspose(volumeToWorld);
objectData.VolumeLocalBoundsExtent = volumeLocalBoundsExtent;
objectData.VolumeToUVWMul = object.SDF->LocalToUVWMul;
objectData.VolumeToUVWAdd = object.SDF->LocalToUVWAdd + (localVolumeBounds.Minimum + volumeLocalBoundsExtent) * object.SDF->LocalToUVWMul;
@@ -702,11 +703,11 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
// Add object data for the GPU buffer
uint16 dataIndex = _objectsBufferCount++;
ObjectRasterizeData objectData;
Matrix localToWorldM, worldToLocal;
Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - _sdfData->Origin, localToWorldM);
Matrix::Invert(localToWorldM, worldToLocal);
Matrix::Transpose(worldToLocal, objectData.WorldToVolume);
Matrix::Transpose(localToWorldM, objectData.VolumeToWorld);
Matrix localToWorld, worldToLocal;
Matrix::Transformation(object.LocalToWorld.Scale, object.LocalToWorld.Orientation, object.LocalToWorld.Translation - _sdfData->Origin, localToWorld);
Matrix::Invert(localToWorld, worldToLocal);
objectData.WorldToVolume.SetMatrixTranspose(worldToLocal);
objectData.VolumeToWorld.SetMatrixTranspose(localToWorld);
objectData.VolumeToUVWMul = Float3(object.LocalToUV.X, 1.0f, object.LocalToUV.Y);
objectData.VolumeToUVWAdd = Float3(object.LocalToUV.Z, 0.0f, object.LocalToUV.W);
objectData.MipOffset = (float)_cascadeIndex * 0.5f; // Use lower-quality mip for far cascades