_matrix3x3 multiply unrolling
This commit is contained in:
@@ -5,6 +5,9 @@
|
||||
#include "Quaternion.h"
|
||||
#include "../Types/String.h"
|
||||
|
||||
#include <intrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
const Matrix3x3 Matrix3x3::Zero(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
|
||||
const Matrix3x3 Matrix3x3::Identity(
|
||||
1.0f, 0.0f, 0.0f,
|
||||
@@ -139,6 +142,67 @@ void Matrix3x3::Multiply(const Matrix3x3& left, float right, Matrix3x3& result)
|
||||
|
||||
void Matrix3x3::Multiply(const Matrix3x3& left, const Matrix3x3& right, Matrix3x3& result)
|
||||
{
|
||||
|
||||
#if true
|
||||
{
|
||||
// First : naive solution with but with some tricks to make compiler (MSVC) behave
|
||||
//* Note that, in this case, manually unrolling the loop helps
|
||||
//* as the compiler can't auto-vectorize non-contagious memory access
|
||||
float* __restrict const matData = result.Raw;
|
||||
|
||||
//Mat matC{ matB.width, matA.height, matB.rowSpan, matData };
|
||||
|
||||
for (int rowC = 0; rowC < 3; ++rowC) {
|
||||
for (int colC = 0; colC < 3; ++colC) {
|
||||
// an independent, local accumulator.
|
||||
float accumulate = 0;
|
||||
int pos = 0;
|
||||
// manual unrolling IS helpful in this case
|
||||
for (; pos < 3 - 4; pos += 4) {
|
||||
accumulate += left.Raw[rowC * 3 + pos] *
|
||||
right.Raw[pos * 3 + colC] +
|
||||
left.Raw[rowC * 3 + pos + 1] *
|
||||
right.Raw[(pos + 1) * 3 + colC] +
|
||||
left.Raw[rowC * 3 + pos + 2] *
|
||||
right.Raw[(pos + 2) * 3 + colC] +
|
||||
left.Raw[rowC * 3 + pos + 3] *
|
||||
right.Raw[(pos + 3) * 3 + colC];
|
||||
}
|
||||
for (; pos < 3; ++pos) {
|
||||
accumulate += left.Raw[rowC * 3 + pos] *
|
||||
right.Raw[pos * 3 + colC];
|
||||
}
|
||||
matData[rowC * 3 + colC] = accumulate;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
/*
|
||||
__m256i vec_multi_res = _mm256_setzero_si256();
|
||||
__m256i vec_mat1 = _mm256_setzero_si256();
|
||||
__m256i vec_mat2 = _mm256_setzero_si256();
|
||||
|
||||
int i, j, k;
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
for (j = 0; j < 3; ++j)
|
||||
{
|
||||
//Stores one element in mat1 and use it in all computations needed before proceeding
|
||||
//Stores as vector to increase computations per cycle
|
||||
vec_mat1 = _mm256_set1_epi32(left.Values[j][i]);
|
||||
|
||||
for (k = 0; k < 3; k += 8)
|
||||
{
|
||||
vec_mat2 = _mm256_loadu_si256((__m256i*) & right.Values[k][j]); //Stores row of second matrix (eight in each iteration)
|
||||
vec_multi_res = _mm256_loadu_si256((__m256i*) & result.Values[k][i]); //Loads the result matrix row as a vector
|
||||
vec_multi_res = _mm256_add_epi32(vec_multi_res, _mm256_mullo_epi32(vec_mat1, vec_mat2));//Multiplies the vectors and adds to th the result vector
|
||||
|
||||
_mm256_storeu_si256((__m256i*) & result.Values[k][i], vec_multi_res); //Stores the result vector into the result array
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
result = Matrix3x3(
|
||||
left.M11 * right.M11 + left.M12 * right.M21 + left.M13 * right.M31,
|
||||
left.M11 * right.M12 + left.M12 * right.M22 + left.M13 * right.M32,
|
||||
@@ -150,6 +214,19 @@ void Matrix3x3::Multiply(const Matrix3x3& left, const Matrix3x3& right, Matrix3x
|
||||
left.M31 * right.M12 + left.M32 * right.M22 + left.M33 * right.M32,
|
||||
left.M31 * right.M13 + left.M32 * right.M23 + left.M33 * right.M33
|
||||
);
|
||||
/*Matrix3x3 result2 = Matrix3x3(
|
||||
left.M11 * right.M11 + left.M12 * right.M21 + left.M13 * right.M31,
|
||||
left.M11 * right.M12 + left.M12 * right.M22 + left.M13 * right.M32,
|
||||
left.M11 * right.M13 + left.M12 * right.M23 + left.M13 * right.M33,
|
||||
left.M21 * right.M11 + left.M22 * right.M21 + left.M23 * right.M31,
|
||||
left.M21 * right.M12 + left.M22 * right.M22 + left.M23 * right.M32,
|
||||
left.M21 * right.M13 + left.M22 * right.M23 + left.M23 * right.M33,
|
||||
left.M31 * right.M11 + left.M32 * right.M21 + left.M33 * right.M31,
|
||||
left.M31 * right.M12 + left.M32 * right.M22 + left.M33 * right.M32,
|
||||
left.M31 * right.M13 + left.M32 * right.M23 + left.M33 * right.M33
|
||||
);
|
||||
ASSERT(result2 == result);*/
|
||||
#endif
|
||||
}
|
||||
|
||||
void Matrix3x3::Divide(const Matrix3x3& left, float right, Matrix3x3& result)
|
||||
|
||||
Reference in New Issue
Block a user