From 1161a4114dde2fe0de23cc920a64c64c06f171f4 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 26 Sep 2024 18:56:22 +0200 Subject: [PATCH] Minor tweaks to simd codebase --- Source/Engine/Core/Math/BoundingFrustum.h | 18 +++++----- Source/Engine/Core/SIMD.h | 40 ++++++++++++++++++----- Source/Engine/Platform/Defines.h | 1 + 3 files changed, 41 insertions(+), 18 deletions(-) diff --git a/Source/Engine/Core/Math/BoundingFrustum.h b/Source/Engine/Core/Math/BoundingFrustum.h index f98bcafae..554019079 100644 --- a/Source/Engine/Core/Math/BoundingFrustum.h +++ b/Source/Engine/Core/Math/BoundingFrustum.h @@ -101,7 +101,7 @@ public: } /// - /// Gets the far plane of the BoundingFrustum. + /// Gets the far plane of the frustum. /// FORCE_INLINE Plane GetFar() const { @@ -109,7 +109,7 @@ public: } /// - /// Gets the left plane of the BoundingFrustum. + /// Gets the left plane of the frustum. /// FORCE_INLINE Plane GetLeft() const { @@ -117,7 +117,7 @@ public: } /// - /// Gets the right plane of the BoundingFrustum. + /// Gets the right plane of the frustum. /// FORCE_INLINE Plane GetRight() const { @@ -125,7 +125,7 @@ public: } /// - /// Gets the top plane of the BoundingFrustum. + /// Gets the top plane of the frustum. /// FORCE_INLINE Plane GetTop() const { @@ -133,7 +133,7 @@ public: } /// - /// Gets the bottom plane of the BoundingFrustum. + /// Gets the bottom plane of the frustum. /// FORCE_INLINE Plane GetBottom() const { @@ -230,17 +230,17 @@ public: ContainmentType Contains(const BoundingSphere& sphere) const; /// - /// Checks whether the current BoundingFrustum intersects a BoundingSphere. + /// Checks whether the current frustum intersects a sphere. /// /// The sphere. - /// True if the current BoundingFrustum intersects a BoundingSphere, otherwise false. + /// True if the current frustum intersects a sphere, otherwise false. bool Intersects(const BoundingSphere& sphere) const; /// - /// Checks whether the current BoundingFrustum intersects a BoundingBox. + /// Checks whether the current frustum intersects a box. /// /// The box - /// True if the current BoundingFrustum intersects a BoundingBox, otherwise false. + /// True if the current frustum intersects a box, otherwise false. FORCE_INLINE bool Intersects(const BoundingBox& box) const { return CollisionsHelper::FrustumContainsBox(*this, box) != ContainmentType::Disjoint; diff --git a/Source/Engine/Core/SIMD.h b/Source/Engine/Core/SIMD.h index 0ce58f7e7..b1d0db3a7 100644 --- a/Source/Engine/Core/SIMD.h +++ b/Source/Engine/Core/SIMD.h @@ -13,7 +13,7 @@ #if PLATFORM_SIMD_SSE2 -// Vector of four floating point values stored in vector register. +// Vector of four floating point values stored in a vector register. typedef __m128 SimdVector4; namespace SIMD @@ -28,9 +28,15 @@ namespace SIMD return _mm_set_ps(w, z, y, x); } - FORCE_INLINE SimdVector4 Load(const void* src) + FORCE_INLINE SimdVector4 Load(const float* __restrict src) { - return _mm_load_ps((const float*)(src)); + return _mm_loadu_ps(src); + } + + FORCE_INLINE SimdVector4 LoadAligned(const float* __restrict src) + { + ASSERT_LOW_LAYER(((uintptr)src & 15) == 0); + return _mm_load_ps(src); } FORCE_INLINE SimdVector4 Splat(float value) @@ -38,9 +44,15 @@ namespace SIMD return _mm_set_ps1(value); } - FORCE_INLINE void Store(void* dst, SimdVector4 src) + FORCE_INLINE void Store(float* __restrict dst, SimdVector4 src) { - _mm_store_ps((float*)dst, src); + _mm_storeu_ps(dst, src); + } + + FORCE_INLINE void StoreAligned(float* __restrict dst, SimdVector4 src) + { + ASSERT_LOW_LAYER(((uintptr)dst & 15) == 0); + _mm_store_ps(dst, src); } FORCE_INLINE int MoveMask(SimdVector4 a) @@ -113,7 +125,12 @@ namespace SIMD return { x, y, z, w }; } - FORCE_INLINE SimdVector4 Load(const void* src) + FORCE_INLINE SimdVector4 Load(const float* __restrict src) + { + return *(const SimdVector4*)src; + } + + FORCE_INLINE SimdVector4 LoadAligned(const float* __restrict src) { return *(const SimdVector4*)src; } @@ -123,10 +140,15 @@ namespace SIMD return { value, value, value, value }; } - FORCE_INLINE void Store(void* dst, SimdVector4 src) - { + FORCE_INLINE void Store(float* __restrict dst, SimdVector4 src) + { (*(SimdVector4*)dst) = src; - } + } + + FORCE_INLINE void StoreAligned(float* __restrict dst, SimdVector4 src) + { + (*(SimdVector4*)dst) = src; + } FORCE_INLINE int MoveMask(SimdVector4 a) { diff --git a/Source/Engine/Platform/Defines.h b/Source/Engine/Platform/Defines.h index 40b250983..1f12f63d2 100644 --- a/Source/Engine/Platform/Defines.h +++ b/Source/Engine/Platform/Defines.h @@ -228,6 +228,7 @@ API_ENUM() enum class ArchitectureType #if defined(_M_PPC) || defined(__CELLOS_LV2__) #define PLATFORM_SIMD_VMX 1 #endif +#define PLATFORM_SIMD (PLATFORM_SIMD_SSE2 || PLATFORM_SIMD_SSE3 || PLATFORM_SIMD_SSE4 || PLATFORM_SIMD_NEON || PLATFORM_SIMD_VMX) // Unicode text macro #if !defined(TEXT)