// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. #pragma once #include "Engine/Platform/Platform.h" #if PLATFORM_SIMD_SSE2 #ifdef _WIN32 #include #endif #else #include #endif #if PLATFORM_SIMD_SSE2 // Vector of four floating point values stored in a vector register. typedef __m128 SimdVector4; namespace SIMD { FORCE_INLINE SimdVector4 Load(float xyzw) { return _mm_set1_ps(xyzw); } FORCE_INLINE SimdVector4 Load(float x, float y, float z, float w) { return _mm_set_ps(w, z, y, x); } FORCE_INLINE SimdVector4 Load(const float* __restrict src) { return _mm_loadu_ps(src); } FORCE_INLINE SimdVector4 LoadAligned(const float* __restrict src) { ASSERT_LOW_LAYER(((uintptr)src & 15) == 0); return _mm_load_ps(src); } FORCE_INLINE SimdVector4 Splat(float value) { return _mm_set_ps1(value); } FORCE_INLINE void Store(float* __restrict dst, SimdVector4 src) { _mm_storeu_ps(dst, src); } FORCE_INLINE void StoreAligned(float* __restrict dst, SimdVector4 src) { ASSERT_LOW_LAYER(((uintptr)dst & 15) == 0); _mm_store_ps(dst, src); } FORCE_INLINE int MoveMask(SimdVector4 a) { return _mm_movemask_ps(a); } FORCE_INLINE SimdVector4 Add(SimdVector4 a, SimdVector4 b) { return _mm_add_ps(a, b); } FORCE_INLINE SimdVector4 Sub(SimdVector4 a, SimdVector4 b) { return _mm_sub_ps(a, b); } FORCE_INLINE SimdVector4 Mul(SimdVector4 a, SimdVector4 b) { return _mm_mul_ps(a, b); } FORCE_INLINE SimdVector4 Div(SimdVector4 a, SimdVector4 b) { return _mm_div_ps(a, b); } FORCE_INLINE SimdVector4 Rcp(SimdVector4 a) { return _mm_rcp_ps(a); } FORCE_INLINE SimdVector4 Sqrt(SimdVector4 a) { return _mm_sqrt_ps(a); } FORCE_INLINE SimdVector4 Rsqrt(SimdVector4 a) { return _mm_rsqrt_ps(a); } FORCE_INLINE SimdVector4 Min(SimdVector4 a, SimdVector4 b) { return _mm_min_ps(a, b); } FORCE_INLINE SimdVector4 Max(SimdVector4 a, SimdVector4 b) { return _mm_max_ps(a, b); } } #else struct SimdVector4 { float X, Y, Z, W; }; namespace SIMD { FORCE_INLINE SimdVector4 Load(float xyzw) { return { xyzw, xyzw, xyzw, xyzw }; } FORCE_INLINE SimdVector4 Load(float x, float y, float z, float w) { return { x, y, z, w }; } FORCE_INLINE SimdVector4 Load(const float* __restrict src) { return *(const SimdVector4*)src; } FORCE_INLINE SimdVector4 LoadAligned(const float* __restrict src) { return *(const SimdVector4*)src; } FORCE_INLINE SimdVector4 Splat(float value) { return { value, value, value, value }; } FORCE_INLINE void Store(float* __restrict dst, SimdVector4 src) { (*(SimdVector4*)dst) = src; } FORCE_INLINE void StoreAligned(float* __restrict dst, SimdVector4 src) { (*(SimdVector4*)dst) = src; } FORCE_INLINE int MoveMask(SimdVector4 a) { return (a.W < 0 ? (1 << 3) : 0) | (a.Z < 0 ? (1 << 2) : 0) | (a.Y < 0 ? (1 << 1) : 0) | (a.X < 0 ? 1 : 0); } FORCE_INLINE SimdVector4 Add(SimdVector4 a, SimdVector4 b) { return { a.X + b.X, a.Y + b.Y, a.Z + b.Z, a.W + b.W }; } FORCE_INLINE SimdVector4 Sub(SimdVector4 a, SimdVector4 b) { return { a.X - b.X, a.Y - b.Y, a.Z - b.Z, a.W - b.W }; } FORCE_INLINE SimdVector4 Mul(SimdVector4 a, SimdVector4 b) { return { a.X * b.X, a.Y * b.Y, a.Z * b.Z, a.W * b.W }; } FORCE_INLINE SimdVector4 Div(SimdVector4 a, SimdVector4 b) { return { a.X / b.X, a.Y / b.Y, a.Z / b.Z, a.W / b.W }; } FORCE_INLINE SimdVector4 Rcp(SimdVector4 a) { return { 1 / a.X, 1 / a.Y, 1 / a.Z, 1 / a.W }; } FORCE_INLINE SimdVector4 Sqrt(SimdVector4 a) { return { (float)sqrt(a.X), (float)sqrt(a.Y), (float)sqrt(a.Z), (float)sqrt(a.W) }; } FORCE_INLINE SimdVector4 Rsqrt(SimdVector4 a) { return { 1 / (float)sqrt(a.X), 1 / (float)sqrt(a.Y), 1 / (float)sqrt(a.Z), 1 / (float)sqrt(a.W) }; } FORCE_INLINE SimdVector4 Min(SimdVector4 a, SimdVector4 b) { return { a.X < b.X ? a.X : b.X, a.Y < b.Y ? a.Y : b.Y, a.Z < b.Z ? a.Z : b.Z, a.W < b.W ? a.W : b.W }; } FORCE_INLINE SimdVector4 Max(SimdVector4 a, SimdVector4 b) { return { a.X > b.X ? a.X : b.X, a.Y > b.Y ? a.Y : b.Y, a.Z > b.Z ? a.Z : b.Z, a.W > b.W ? a.W : b.W }; } } #endif