Update meshoptimizer to v0.20
This commit is contained in:
2
Source/ThirdParty/meshoptimizer/LICENSE.md
vendored
2
Source/ThirdParty/meshoptimizer/LICENSE.md
vendored
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2016-2020 Arseny Kapoulkine
|
||||
Copyright (c) 2016-2023 Arseny Kapoulkine
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
void meshopt_setAllocator(void* (*allocate)(size_t), void (*deallocate)(void*))
|
||||
void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*))
|
||||
{
|
||||
meshopt_Allocator::Storage::allocate = allocate;
|
||||
meshopt_Allocator::Storage::deallocate = deallocate;
|
||||
|
||||
671
Source/ThirdParty/meshoptimizer/clusterizer.cpp
vendored
671
Source/ThirdParty/meshoptimizer/clusterizer.cpp
vendored
@@ -2,6 +2,7 @@
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
@@ -12,6 +13,68 @@
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet
|
||||
const size_t kMeshletMaxVertices = 255;
|
||||
|
||||
// A reasonable limit is around 2*max_vertices or less
|
||||
const size_t kMeshletMaxTriangles = 512;
|
||||
|
||||
struct TriangleAdjacency2
|
||||
{
|
||||
unsigned int* counts;
|
||||
unsigned int* offsets;
|
||||
unsigned int* data;
|
||||
};
|
||||
|
||||
static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
|
||||
{
|
||||
size_t face_count = index_count / 3;
|
||||
|
||||
// allocate arrays
|
||||
adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
|
||||
adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
|
||||
adjacency.data = allocator.allocate<unsigned int>(index_count);
|
||||
|
||||
// fill triangle counts
|
||||
memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
{
|
||||
assert(indices[i] < vertex_count);
|
||||
|
||||
adjacency.counts[indices[i]]++;
|
||||
}
|
||||
|
||||
// fill offset table
|
||||
unsigned int offset = 0;
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
adjacency.offsets[i] = offset;
|
||||
offset += adjacency.counts[i];
|
||||
}
|
||||
|
||||
assert(offset == index_count);
|
||||
|
||||
// fill triangle data
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
|
||||
|
||||
adjacency.data[adjacency.offsets[a]++] = unsigned(i);
|
||||
adjacency.data[adjacency.offsets[b]++] = unsigned(i);
|
||||
adjacency.data[adjacency.offsets[c]++] = unsigned(i);
|
||||
}
|
||||
|
||||
// fix offsets that have been disturbed by the previous pass
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
assert(adjacency.offsets[i] >= adjacency.counts[i]);
|
||||
|
||||
adjacency.offsets[i] -= adjacency.counts[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void computeBoundingSphere(float result[4], const float points[][3], size_t count)
|
||||
{
|
||||
assert(count > 0);
|
||||
@@ -82,13 +145,382 @@ static void computeBoundingSphere(float result[4], const float points[][3], size
|
||||
result[3] = radius;
|
||||
}
|
||||
|
||||
struct Cone
|
||||
{
|
||||
float px, py, pz;
|
||||
float nx, ny, nz;
|
||||
};
|
||||
|
||||
static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius)
|
||||
{
|
||||
float cone = 1.f - spread * cone_weight;
|
||||
float cone_clamped = cone < 1e-3f ? 1e-3f : cone;
|
||||
|
||||
return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped;
|
||||
}
|
||||
|
||||
static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count)
|
||||
{
|
||||
Cone result = acc;
|
||||
|
||||
float center_scale = triangle_count == 0 ? 0.f : 1.f / float(triangle_count);
|
||||
|
||||
result.px *= center_scale;
|
||||
result.py *= center_scale;
|
||||
result.pz *= center_scale;
|
||||
|
||||
float axis_length = result.nx * result.nx + result.ny * result.ny + result.nz * result.nz;
|
||||
float axis_scale = axis_length == 0.f ? 0.f : 1.f / sqrtf(axis_length);
|
||||
|
||||
result.nx *= axis_scale;
|
||||
result.ny *= axis_scale;
|
||||
result.nz *= axis_scale;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static float computeTriangleCones(Cone* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
(void)vertex_count;
|
||||
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
size_t face_count = index_count / 3;
|
||||
|
||||
float mesh_area = 0;
|
||||
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
{
|
||||
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
const float* p0 = vertex_positions + vertex_stride_float * a;
|
||||
const float* p1 = vertex_positions + vertex_stride_float * b;
|
||||
const float* p2 = vertex_positions + vertex_stride_float * c;
|
||||
|
||||
float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
|
||||
float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
|
||||
|
||||
float normalx = p10[1] * p20[2] - p10[2] * p20[1];
|
||||
float normaly = p10[2] * p20[0] - p10[0] * p20[2];
|
||||
float normalz = p10[0] * p20[1] - p10[1] * p20[0];
|
||||
|
||||
float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
|
||||
float invarea = (area == 0.f) ? 0.f : 1.f / area;
|
||||
|
||||
triangles[i].px = (p0[0] + p1[0] + p2[0]) / 3.f;
|
||||
triangles[i].py = (p0[1] + p1[1] + p2[1]) / 3.f;
|
||||
triangles[i].pz = (p0[2] + p1[2] + p2[2]) / 3.f;
|
||||
|
||||
triangles[i].nx = normalx * invarea;
|
||||
triangles[i].ny = normaly * invarea;
|
||||
triangles[i].nz = normalz * invarea;
|
||||
|
||||
mesh_area += area;
|
||||
}
|
||||
|
||||
return mesh_area;
|
||||
}
|
||||
|
||||
static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles)
|
||||
{
|
||||
size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3;
|
||||
|
||||
// fill 4b padding with 0
|
||||
while (offset & 3)
|
||||
meshlet_triangles[offset++] = 0;
|
||||
}
|
||||
|
||||
static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles)
|
||||
{
|
||||
unsigned char& av = used[a];
|
||||
unsigned char& bv = used[b];
|
||||
unsigned char& cv = used[c];
|
||||
|
||||
bool result = false;
|
||||
|
||||
unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
|
||||
|
||||
if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
|
||||
{
|
||||
meshlets[meshlet_offset] = meshlet;
|
||||
|
||||
for (size_t j = 0; j < meshlet.vertex_count; ++j)
|
||||
used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff;
|
||||
|
||||
finishMeshlet(meshlet, meshlet_triangles);
|
||||
|
||||
meshlet.vertex_offset += meshlet.vertex_count;
|
||||
meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding
|
||||
meshlet.vertex_count = 0;
|
||||
meshlet.triangle_count = 0;
|
||||
|
||||
result = true;
|
||||
}
|
||||
|
||||
if (av == 0xff)
|
||||
{
|
||||
av = (unsigned char)meshlet.vertex_count;
|
||||
meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a;
|
||||
}
|
||||
|
||||
if (bv == 0xff)
|
||||
{
|
||||
bv = (unsigned char)meshlet.vertex_count;
|
||||
meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b;
|
||||
}
|
||||
|
||||
if (cv == 0xff)
|
||||
{
|
||||
cv = (unsigned char)meshlet.vertex_count;
|
||||
meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c;
|
||||
}
|
||||
|
||||
meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av;
|
||||
meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv;
|
||||
meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv;
|
||||
meshlet.triangle_count++;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Cone* meshlet_cone, unsigned int* meshlet_vertices, const unsigned int* indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int* live_triangles, const unsigned char* used, float meshlet_expected_radius, float cone_weight, unsigned int* out_extra)
|
||||
{
|
||||
unsigned int best_triangle = ~0u;
|
||||
unsigned int best_extra = 5;
|
||||
float best_score = FLT_MAX;
|
||||
|
||||
for (size_t i = 0; i < meshlet.vertex_count; ++i)
|
||||
{
|
||||
unsigned int index = meshlet_vertices[meshlet.vertex_offset + i];
|
||||
|
||||
unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index];
|
||||
size_t neighbors_size = adjacency.counts[index];
|
||||
|
||||
for (size_t j = 0; j < neighbors_size; ++j)
|
||||
{
|
||||
unsigned int triangle = neighbors[j];
|
||||
unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
|
||||
|
||||
unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff);
|
||||
|
||||
// triangles that don't add new vertices to meshlets are max. priority
|
||||
if (extra != 0)
|
||||
{
|
||||
// artificially increase the priority of dangling triangles as they're expensive to add to new meshlets
|
||||
if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1)
|
||||
extra = 0;
|
||||
|
||||
extra++;
|
||||
}
|
||||
|
||||
// since topology-based priority is always more important than the score, we can skip scoring in some cases
|
||||
if (extra > best_extra)
|
||||
continue;
|
||||
|
||||
float score = 0;
|
||||
|
||||
// caller selects one of two scoring functions: geometrical (based on meshlet cone) or topological (based on remaining triangles)
|
||||
if (meshlet_cone)
|
||||
{
|
||||
const Cone& tri_cone = triangles[triangle];
|
||||
|
||||
float distance2 =
|
||||
(tri_cone.px - meshlet_cone->px) * (tri_cone.px - meshlet_cone->px) +
|
||||
(tri_cone.py - meshlet_cone->py) * (tri_cone.py - meshlet_cone->py) +
|
||||
(tri_cone.pz - meshlet_cone->pz) * (tri_cone.pz - meshlet_cone->pz);
|
||||
|
||||
float spread = tri_cone.nx * meshlet_cone->nx + tri_cone.ny * meshlet_cone->ny + tri_cone.nz * meshlet_cone->nz;
|
||||
|
||||
score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius);
|
||||
}
|
||||
else
|
||||
{
|
||||
// each live_triangles entry is >= 1 since it includes the current triangle we're processing
|
||||
score = float(live_triangles[a] + live_triangles[b] + live_triangles[c] - 3);
|
||||
}
|
||||
|
||||
// note that topology-based priority is always more important than the score
|
||||
// this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost
|
||||
if (extra < best_extra || score < best_score)
|
||||
{
|
||||
best_triangle = triangle;
|
||||
best_extra = extra;
|
||||
best_score = score;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (out_extra)
|
||||
*out_extra = best_extra;
|
||||
|
||||
return best_triangle;
|
||||
}
|
||||
|
||||
struct KDNode
|
||||
{
|
||||
union
|
||||
{
|
||||
float split;
|
||||
unsigned int index;
|
||||
};
|
||||
|
||||
// leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point)
|
||||
// branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children
|
||||
unsigned int axis : 2;
|
||||
unsigned int children : 30;
|
||||
};
|
||||
|
||||
static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot)
|
||||
{
|
||||
size_t m = 0;
|
||||
|
||||
// invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
float v = points[indices[i] * stride + axis];
|
||||
|
||||
// swap(m, i) unconditionally
|
||||
unsigned int t = indices[m];
|
||||
indices[m] = indices[i];
|
||||
indices[i] = t;
|
||||
|
||||
// when v >= pivot, we swap i with m without advancing it, preserving invariants
|
||||
m += v < pivot;
|
||||
}
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, unsigned int* indices, size_t count)
|
||||
{
|
||||
assert(offset + count <= node_count);
|
||||
(void)node_count;
|
||||
|
||||
KDNode& result = nodes[offset];
|
||||
|
||||
result.index = indices[0];
|
||||
result.axis = 3;
|
||||
result.children = unsigned(count - 1);
|
||||
|
||||
// all remaining points are stored in nodes immediately following the leaf
|
||||
for (size_t i = 1; i < count; ++i)
|
||||
{
|
||||
KDNode& tail = nodes[offset + i];
|
||||
|
||||
tail.index = indices[i];
|
||||
tail.axis = 3;
|
||||
tail.children = ~0u >> 2; // bogus value to prevent misuse
|
||||
}
|
||||
|
||||
return offset + count;
|
||||
}
|
||||
|
||||
static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size)
|
||||
{
|
||||
assert(count > 0);
|
||||
assert(offset < node_count);
|
||||
|
||||
if (count <= leaf_size)
|
||||
return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
|
||||
|
||||
float mean[3] = {};
|
||||
float vars[3] = {};
|
||||
float runc = 1, runs = 1;
|
||||
|
||||
// gather statistics on the points in the subtree using Welford's algorithm
|
||||
for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc)
|
||||
{
|
||||
const float* point = points + indices[i] * stride;
|
||||
|
||||
for (int k = 0; k < 3; ++k)
|
||||
{
|
||||
float delta = point[k] - mean[k];
|
||||
mean[k] += delta * runs;
|
||||
vars[k] += delta * (point[k] - mean[k]);
|
||||
}
|
||||
}
|
||||
|
||||
// split axis is one where the variance is largest
|
||||
unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 : 2;
|
||||
|
||||
float split = mean[axis];
|
||||
size_t middle = kdtreePartition(indices, count, points, stride, axis, split);
|
||||
|
||||
// when the partition is degenerate simply consolidate the points into a single node
|
||||
if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2)
|
||||
return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
|
||||
|
||||
KDNode& result = nodes[offset];
|
||||
|
||||
result.split = split;
|
||||
result.axis = axis;
|
||||
|
||||
// left subtree is right after our node
|
||||
size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size);
|
||||
|
||||
// distance to the right subtree is represented explicitly
|
||||
result.children = unsigned(next_offset - offset - 1);
|
||||
|
||||
return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size);
|
||||
}
|
||||
|
||||
static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit)
|
||||
{
|
||||
const KDNode& node = nodes[root];
|
||||
|
||||
if (node.axis == 3)
|
||||
{
|
||||
// leaf
|
||||
for (unsigned int i = 0; i <= node.children; ++i)
|
||||
{
|
||||
unsigned int index = nodes[root + i].index;
|
||||
|
||||
if (emitted_flags[index])
|
||||
continue;
|
||||
|
||||
const float* point = points + index * stride;
|
||||
|
||||
float distance2 =
|
||||
(point[0] - position[0]) * (point[0] - position[0]) +
|
||||
(point[1] - position[1]) * (point[1] - position[1]) +
|
||||
(point[2] - position[2]) * (point[2] - position[2]);
|
||||
float distance = sqrtf(distance2);
|
||||
|
||||
if (distance < limit)
|
||||
{
|
||||
result = index;
|
||||
limit = distance;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// branch; we order recursion to process the node that search position is in first
|
||||
float delta = position[node.axis] - node.split;
|
||||
unsigned int first = (delta <= 0) ? 0 : node.children;
|
||||
unsigned int second = first ^ node.children;
|
||||
|
||||
kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit);
|
||||
|
||||
// only process the other node if it can have a match based on closest distance so far
|
||||
if (fabsf(delta) <= limit)
|
||||
kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(max_vertices >= 3);
|
||||
assert(max_triangles >= 1);
|
||||
assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
|
||||
assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
|
||||
assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
|
||||
|
||||
(void)kMeshletMaxVertices;
|
||||
(void)kMeshletMaxTriangles;
|
||||
|
||||
// meshlet construction is limited by max vertices and max triangles per meshlet
|
||||
// the worst case is that the input is an unindexed stream since this equally stresses both limits
|
||||
@@ -100,77 +532,181 @@ size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_
|
||||
return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles;
|
||||
}
|
||||
|
||||
size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
|
||||
size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(max_vertices >= 3);
|
||||
assert(max_triangles >= 1);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
|
||||
assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
|
||||
assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
|
||||
|
||||
assert(cone_weight >= 0 && cone_weight <= 1);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
meshopt_Meshlet meshlet;
|
||||
memset(&meshlet, 0, sizeof(meshlet));
|
||||
TriangleAdjacency2 adjacency = {};
|
||||
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
|
||||
|
||||
assert(max_vertices <= sizeof(meshlet.vertices) / sizeof(meshlet.vertices[0]));
|
||||
assert(max_triangles <= sizeof(meshlet.indices) / 3);
|
||||
unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
|
||||
memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
|
||||
|
||||
size_t face_count = index_count / 3;
|
||||
|
||||
unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
|
||||
memset(emitted_flags, 0, face_count);
|
||||
|
||||
// for each triangle, precompute centroid & normal to use for scoring
|
||||
Cone* triangles = allocator.allocate<Cone>(face_count);
|
||||
float mesh_area = computeTriangleCones(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
|
||||
// assuming each meshlet is a square patch, expected radius is sqrt(expected area)
|
||||
float triangle_area_avg = face_count == 0 ? 0.f : mesh_area / float(face_count) * 0.5f;
|
||||
float meshlet_expected_radius = sqrtf(triangle_area_avg * max_triangles) * 0.5f;
|
||||
|
||||
// build a kd-tree for nearest neighbor lookup
|
||||
unsigned int* kdindices = allocator.allocate<unsigned int>(face_count);
|
||||
for (size_t i = 0; i < face_count; ++i)
|
||||
kdindices[i] = unsigned(i);
|
||||
|
||||
KDNode* nodes = allocator.allocate<KDNode>(face_count * 2);
|
||||
kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8);
|
||||
|
||||
// index of the vertex in the meshlet, 0xff if the vertex isn't used
|
||||
unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
|
||||
memset(used, -1, vertex_count);
|
||||
|
||||
size_t offset = 0;
|
||||
meshopt_Meshlet meshlet = {};
|
||||
size_t meshlet_offset = 0;
|
||||
|
||||
Cone meshlet_cone_acc = {};
|
||||
|
||||
for (;;)
|
||||
{
|
||||
Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count);
|
||||
|
||||
unsigned int best_extra = 0;
|
||||
unsigned int best_triangle = getNeighborTriangle(meshlet, &meshlet_cone, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, cone_weight, &best_extra);
|
||||
|
||||
// if the best triangle doesn't fit into current meshlet, the spatial scoring we've used is not very meaningful, so we re-select using topological scoring
|
||||
if (best_triangle != ~0u && (meshlet.vertex_count + best_extra > max_vertices || meshlet.triangle_count >= max_triangles))
|
||||
{
|
||||
best_triangle = getNeighborTriangle(meshlet, NULL, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, 0.f, NULL);
|
||||
}
|
||||
|
||||
// when we run out of neighboring triangles we need to switch to spatial search; we currently just pick the closest triangle irrespective of connectivity
|
||||
if (best_triangle == ~0u)
|
||||
{
|
||||
float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz};
|
||||
unsigned int index = ~0u;
|
||||
float limit = FLT_MAX;
|
||||
|
||||
kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit);
|
||||
|
||||
best_triangle = index;
|
||||
}
|
||||
|
||||
if (best_triangle == ~0u)
|
||||
break;
|
||||
|
||||
unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2];
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
// add meshlet to the output; when the current meshlet is full we reset the accumulated bounds
|
||||
if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles))
|
||||
{
|
||||
meshlet_offset++;
|
||||
memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc));
|
||||
}
|
||||
|
||||
live_triangles[a]--;
|
||||
live_triangles[b]--;
|
||||
live_triangles[c]--;
|
||||
|
||||
// remove emitted triangle from adjacency data
|
||||
// this makes sure that we spend less time traversing these lists on subsequent iterations
|
||||
for (size_t k = 0; k < 3; ++k)
|
||||
{
|
||||
unsigned int index = indices[best_triangle * 3 + k];
|
||||
|
||||
unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index];
|
||||
size_t neighbors_size = adjacency.counts[index];
|
||||
|
||||
for (size_t i = 0; i < neighbors_size; ++i)
|
||||
{
|
||||
unsigned int tri = neighbors[i];
|
||||
|
||||
if (tri == best_triangle)
|
||||
{
|
||||
neighbors[i] = neighbors[neighbors_size - 1];
|
||||
adjacency.counts[index]--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// update aggregated meshlet cone data for scoring subsequent triangles
|
||||
meshlet_cone_acc.px += triangles[best_triangle].px;
|
||||
meshlet_cone_acc.py += triangles[best_triangle].py;
|
||||
meshlet_cone_acc.pz += triangles[best_triangle].pz;
|
||||
meshlet_cone_acc.nx += triangles[best_triangle].nx;
|
||||
meshlet_cone_acc.ny += triangles[best_triangle].ny;
|
||||
meshlet_cone_acc.nz += triangles[best_triangle].nz;
|
||||
|
||||
emitted_flags[best_triangle] = 1;
|
||||
}
|
||||
|
||||
if (meshlet.triangle_count)
|
||||
{
|
||||
finishMeshlet(meshlet, meshlet_triangles);
|
||||
|
||||
meshlets[meshlet_offset++] = meshlet;
|
||||
}
|
||||
|
||||
assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
|
||||
return meshlet_offset;
|
||||
}
|
||||
|
||||
size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
|
||||
assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
|
||||
assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
// index of the vertex in the meshlet, 0xff if the vertex isn't used
|
||||
unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
|
||||
memset(used, -1, vertex_count);
|
||||
|
||||
meshopt_Meshlet meshlet = {};
|
||||
size_t meshlet_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
unsigned char& av = used[a];
|
||||
unsigned char& bv = used[b];
|
||||
unsigned char& cv = used[c];
|
||||
|
||||
unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
|
||||
|
||||
if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
|
||||
{
|
||||
destination[offset++] = meshlet;
|
||||
|
||||
for (size_t j = 0; j < meshlet.vertex_count; ++j)
|
||||
used[meshlet.vertices[j]] = 0xff;
|
||||
|
||||
memset(&meshlet, 0, sizeof(meshlet));
|
||||
}
|
||||
|
||||
if (av == 0xff)
|
||||
{
|
||||
av = meshlet.vertex_count;
|
||||
meshlet.vertices[meshlet.vertex_count++] = a;
|
||||
}
|
||||
|
||||
if (bv == 0xff)
|
||||
{
|
||||
bv = meshlet.vertex_count;
|
||||
meshlet.vertices[meshlet.vertex_count++] = b;
|
||||
}
|
||||
|
||||
if (cv == 0xff)
|
||||
{
|
||||
cv = meshlet.vertex_count;
|
||||
meshlet.vertices[meshlet.vertex_count++] = c;
|
||||
}
|
||||
|
||||
meshlet.indices[meshlet.triangle_count][0] = av;
|
||||
meshlet.indices[meshlet.triangle_count][1] = bv;
|
||||
meshlet.indices[meshlet.triangle_count][2] = cv;
|
||||
meshlet.triangle_count++;
|
||||
// appends triangle to the meshlet and writes previous meshlet to the output if full
|
||||
meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles);
|
||||
}
|
||||
|
||||
if (meshlet.triangle_count)
|
||||
destination[offset++] = meshlet;
|
||||
{
|
||||
finishMeshlet(meshlet, meshlet_triangles);
|
||||
|
||||
assert(offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
|
||||
meshlets[meshlet_offset++] = meshlet;
|
||||
}
|
||||
|
||||
return offset;
|
||||
assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
|
||||
return meshlet_offset;
|
||||
}
|
||||
|
||||
meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
@@ -178,18 +714,17 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
assert(index_count / 3 <= kMeshletMaxTriangles);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
assert(index_count / 3 <= 256);
|
||||
|
||||
(void)vertex_count;
|
||||
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
// compute triangle normals and gather triangle corners
|
||||
float normals[256][3];
|
||||
float corners[256][3][3];
|
||||
float normals[kMeshletMaxTriangles][3];
|
||||
float corners[kMeshletMaxTriangles][3][3];
|
||||
size_t triangles = 0;
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
@@ -327,25 +862,23 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t
|
||||
return bounds;
|
||||
}
|
||||
|
||||
meshopt_Bounds meshopt_computeMeshletBounds(const meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
using namespace meshopt;
|
||||
|
||||
assert(triangle_count <= kMeshletMaxTriangles);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
unsigned int indices[sizeof(meshlet->indices) / sizeof(meshlet->indices[0][0])];
|
||||
unsigned int indices[kMeshletMaxTriangles * 3];
|
||||
|
||||
for (size_t i = 0; i < meshlet->triangle_count; ++i)
|
||||
for (size_t i = 0; i < triangle_count * 3; ++i)
|
||||
{
|
||||
unsigned int a = meshlet->vertices[meshlet->indices[i][0]];
|
||||
unsigned int b = meshlet->vertices[meshlet->indices[i][1]];
|
||||
unsigned int c = meshlet->vertices[meshlet->indices[i][2]];
|
||||
unsigned int index = meshlet_vertices[meshlet_triangles[i]];
|
||||
assert(index < vertex_count);
|
||||
|
||||
assert(a < vertex_count && b < vertex_count && c < vertex_count);
|
||||
|
||||
indices[i * 3 + 0] = a;
|
||||
indices[i * 3 + 1] = b;
|
||||
indices[i * 3 + 2] = c;
|
||||
indices[i] = index;
|
||||
}
|
||||
|
||||
return meshopt_computeClusterBounds(indices, meshlet->triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
}
|
||||
|
||||
82
Source/ThirdParty/meshoptimizer/indexcodec.cpp
vendored
82
Source/ThirdParty/meshoptimizer/indexcodec.cpp
vendored
@@ -4,14 +4,6 @@
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef TRACE
|
||||
#define TRACE 0
|
||||
#endif
|
||||
|
||||
#if TRACE
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
// This work is based on:
|
||||
// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013
|
||||
// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014
|
||||
@@ -21,7 +13,7 @@ namespace meshopt
|
||||
const unsigned char kIndexHeader = 0xe0;
|
||||
const unsigned char kSequenceHeader = 0xd0;
|
||||
|
||||
static int gEncodeIndexVersion = 0;
|
||||
static int gEncodeIndexVersion = 1;
|
||||
|
||||
typedef unsigned int VertexFifo[16];
|
||||
typedef unsigned int EdgeFifo[16][2];
|
||||
@@ -116,7 +108,7 @@ static unsigned int decodeVByte(const unsigned char*& data)
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
unsigned char group = *data++;
|
||||
result |= (group & 127) << shift;
|
||||
result |= unsigned(group & 127) << shift;
|
||||
shift += 7;
|
||||
|
||||
if (group < 128)
|
||||
@@ -167,38 +159,6 @@ static void writeTriangle(void* destination, size_t offset, size_t index_size, u
|
||||
}
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
static size_t sortTop16(unsigned char dest[16], size_t stats[256])
|
||||
{
|
||||
size_t destsize = 0;
|
||||
|
||||
for (size_t i = 0; i < 256; ++i)
|
||||
{
|
||||
size_t j = 0;
|
||||
for (; j < destsize; ++j)
|
||||
{
|
||||
if (stats[i] >= stats[dest[j]])
|
||||
{
|
||||
if (destsize < 16)
|
||||
destsize++;
|
||||
|
||||
memmove(&dest[j + 1], &dest[j], destsize - 1 - j);
|
||||
dest[j] = (unsigned char)i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (j == destsize && destsize < 16)
|
||||
{
|
||||
dest[destsize] = (unsigned char)i;
|
||||
destsize++;
|
||||
}
|
||||
}
|
||||
|
||||
return destsize;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
|
||||
@@ -207,11 +167,6 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
|
||||
#if TRACE
|
||||
size_t codestats[256] = {};
|
||||
size_t codeauxstats[256] = {};
|
||||
#endif
|
||||
|
||||
// the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
|
||||
if (buffer_size < 1 + index_count / 3 + 16)
|
||||
return 0;
|
||||
@@ -275,10 +230,6 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons
|
||||
|
||||
*code++ = (unsigned char)((fe << 4) | fec);
|
||||
|
||||
#if TRACE
|
||||
codestats[code[-1]]++;
|
||||
#endif
|
||||
|
||||
// note that we need to update the last index since free indices are delta-encoded
|
||||
if (fec == 15)
|
||||
encodeIndex(data, c, last), last = c;
|
||||
@@ -334,11 +285,6 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons
|
||||
*data++ = codeaux;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
codestats[code[-1]]++;
|
||||
codeauxstats[codeaux]++;
|
||||
#endif
|
||||
|
||||
// note that we need to update the last index since free indices are delta-encoded
|
||||
if (fea == 15)
|
||||
encodeIndex(data, a, last), last = a;
|
||||
@@ -387,30 +333,6 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons
|
||||
assert(data >= buffer + index_count / 3 + 16);
|
||||
assert(data <= buffer + buffer_size);
|
||||
|
||||
#if TRACE
|
||||
unsigned char codetop[16], codeauxtop[16];
|
||||
size_t codetopsize = sortTop16(codetop, codestats);
|
||||
size_t codeauxtopsize = sortTop16(codeauxtop, codeauxstats);
|
||||
|
||||
size_t sumcode = 0, sumcodeaux = 0;
|
||||
for (size_t i = 0; i < 256; ++i)
|
||||
sumcode += codestats[i], sumcodeaux += codeauxstats[i];
|
||||
|
||||
size_t acccode = 0, acccodeaux = 0;
|
||||
|
||||
printf("code\t\t\t\t\tcodeaux\n");
|
||||
|
||||
for (size_t i = 0; i < codetopsize && i < codeauxtopsize; ++i)
|
||||
{
|
||||
acccode += codestats[codetop[i]];
|
||||
acccodeaux += codeauxstats[codeauxtop[i]];
|
||||
|
||||
printf("%2d: %02x = %d (%.1f%% ..%.1f%%)\t\t%2d: %02x = %d (%.1f%% ..%.1f%%)\n",
|
||||
int(i), codetop[i], int(codestats[codetop[i]]), double(codestats[codetop[i]]) / double(sumcode) * 100, double(acccode) / double(sumcode) * 100,
|
||||
int(i), codeauxtop[i], int(codeauxstats[codeauxtop[i]]), double(codeauxstats[codeauxtop[i]]) / double(sumcodeaux) * 100, double(acccodeaux) / double(sumcodeaux) * 100);
|
||||
}
|
||||
#endif
|
||||
|
||||
return data - buffer;
|
||||
}
|
||||
|
||||
|
||||
249
Source/ThirdParty/meshoptimizer/indexgenerator.cpp
vendored
249
Source/ThirdParty/meshoptimizer/indexgenerator.cpp
vendored
@@ -4,6 +4,8 @@
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
// This work is based on:
|
||||
// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
@@ -83,10 +85,49 @@ struct VertexStreamHasher
|
||||
}
|
||||
};
|
||||
|
||||
struct EdgeHasher
|
||||
{
|
||||
const unsigned int* remap;
|
||||
|
||||
size_t hash(unsigned long long edge) const
|
||||
{
|
||||
unsigned int e0 = unsigned(edge >> 32);
|
||||
unsigned int e1 = unsigned(edge);
|
||||
|
||||
unsigned int h1 = remap[e0];
|
||||
unsigned int h2 = remap[e1];
|
||||
|
||||
const unsigned int m = 0x5bd1e995;
|
||||
|
||||
// MurmurHash64B finalizer
|
||||
h1 ^= h2 >> 18;
|
||||
h1 *= m;
|
||||
h2 ^= h1 >> 22;
|
||||
h2 *= m;
|
||||
h1 ^= h2 >> 17;
|
||||
h1 *= m;
|
||||
h2 ^= h1 >> 19;
|
||||
h2 *= m;
|
||||
|
||||
return h2;
|
||||
}
|
||||
|
||||
bool equal(unsigned long long lhs, unsigned long long rhs) const
|
||||
{
|
||||
unsigned int l0 = unsigned(lhs >> 32);
|
||||
unsigned int l1 = unsigned(lhs);
|
||||
|
||||
unsigned int r0 = unsigned(rhs >> 32);
|
||||
unsigned int r1 = unsigned(rhs);
|
||||
|
||||
return remap[l0] == remap[r0] && remap[l1] == remap[r1];
|
||||
}
|
||||
};
|
||||
|
||||
static size_t hashBuckets(size_t count)
|
||||
{
|
||||
size_t buckets = 1;
|
||||
while (buckets < count)
|
||||
while (buckets < count + count / 4)
|
||||
buckets *= 2;
|
||||
|
||||
return buckets;
|
||||
@@ -116,7 +157,43 @@ static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, c
|
||||
}
|
||||
|
||||
assert(false && "Hash table is full"); // unreachable
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator)
|
||||
{
|
||||
VertexHasher vertex_hasher = {reinterpret_cast<const unsigned char*>(vertex_positions), 3 * sizeof(float), vertex_positions_stride};
|
||||
|
||||
size_t vertex_table_size = hashBuckets(vertex_count);
|
||||
unsigned int* vertex_table = allocator.allocate<unsigned int>(vertex_table_size);
|
||||
memset(vertex_table, -1, vertex_table_size * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
unsigned int index = unsigned(i);
|
||||
unsigned int* entry = hashLookup(vertex_table, vertex_table_size, vertex_hasher, index, ~0u);
|
||||
|
||||
if (*entry == ~0u)
|
||||
*entry = index;
|
||||
|
||||
remap[index] = *entry;
|
||||
}
|
||||
|
||||
allocator.deallocate(vertex_table);
|
||||
}
|
||||
|
||||
template <size_t BlockSize>
|
||||
static void remapVertices(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
|
||||
{
|
||||
size_t block_size = BlockSize == 0 ? vertex_size : BlockSize;
|
||||
assert(block_size == vertex_size);
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
if (remap[i] != ~0u)
|
||||
{
|
||||
assert(remap[i] < vertex_count);
|
||||
memcpy(static_cast<unsigned char*>(destination) + remap[i] * block_size, static_cast<const unsigned char*>(vertices) + i * block_size, block_size);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
@@ -126,7 +203,7 @@ size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int
|
||||
using namespace meshopt;
|
||||
|
||||
assert(indices || index_count == vertex_count);
|
||||
assert(index_count % 3 == 0);
|
||||
assert(!indices || index_count % 3 == 0);
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
@@ -227,6 +304,8 @@ size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigne
|
||||
|
||||
void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
@@ -239,14 +318,23 @@ void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t v
|
||||
vertices = vertices_copy;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
// specialize the loop for common vertex sizes to ensure memcpy is compiled as an inlined intrinsic
|
||||
switch (vertex_size)
|
||||
{
|
||||
if (remap[i] != ~0u)
|
||||
{
|
||||
assert(remap[i] < vertex_count);
|
||||
case 4:
|
||||
return remapVertices<4>(destination, vertices, vertex_count, vertex_size, remap);
|
||||
|
||||
memcpy(static_cast<unsigned char*>(destination) + remap[i] * vertex_size, static_cast<const unsigned char*>(vertices) + i * vertex_size, vertex_size);
|
||||
}
|
||||
case 8:
|
||||
return remapVertices<8>(destination, vertices, vertex_count, vertex_size, remap);
|
||||
|
||||
case 12:
|
||||
return remapVertices<12>(destination, vertices, vertex_count, vertex_size, remap);
|
||||
|
||||
case 16:
|
||||
return remapVertices<16>(destination, vertices, vertex_count, vertex_size, remap);
|
||||
|
||||
default:
|
||||
return remapVertices<0>(destination, vertices, vertex_count, vertex_size, remap);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -345,3 +433,146 @@ void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const uns
|
||||
destination[i] = remap[index];
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
static const int next[4] = {1, 2, 0, 1};
|
||||
|
||||
// build position remap: for each vertex, which other (canonical) vertex does it map to?
|
||||
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
|
||||
buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
|
||||
|
||||
// build edge set; this stores all triangle edges but we can look these up by any other wedge
|
||||
EdgeHasher edge_hasher = {remap};
|
||||
|
||||
size_t edge_table_size = hashBuckets(index_count);
|
||||
unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
|
||||
unsigned int* edge_vertex_table = allocator.allocate<unsigned int>(edge_table_size);
|
||||
|
||||
memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
|
||||
memset(edge_vertex_table, -1, edge_table_size * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
for (int e = 0; e < 3; ++e)
|
||||
{
|
||||
unsigned int i0 = indices[i + e];
|
||||
unsigned int i1 = indices[i + next[e]];
|
||||
unsigned int i2 = indices[i + next[e + 1]];
|
||||
assert(i0 < vertex_count && i1 < vertex_count && i2 < vertex_count);
|
||||
|
||||
unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
|
||||
unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
|
||||
|
||||
if (*entry == ~0ull)
|
||||
{
|
||||
*entry = edge;
|
||||
|
||||
// store vertex opposite to the edge
|
||||
edge_vertex_table[entry - edge_table] = i2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// build resulting index buffer: 6 indices for each input triangle
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
unsigned int patch[6];
|
||||
|
||||
for (int e = 0; e < 3; ++e)
|
||||
{
|
||||
unsigned int i0 = indices[i + e];
|
||||
unsigned int i1 = indices[i + next[e]];
|
||||
assert(i0 < vertex_count && i1 < vertex_count);
|
||||
|
||||
// note: this refers to the opposite edge!
|
||||
unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
|
||||
unsigned long long* oppe = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
|
||||
|
||||
patch[e * 2 + 0] = i0;
|
||||
patch[e * 2 + 1] = (*oppe == ~0ull) ? i0 : edge_vertex_table[oppe - edge_table];
|
||||
}
|
||||
|
||||
memcpy(destination + i * 2, patch, sizeof(patch));
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
static const int next[3] = {1, 2, 0};
|
||||
|
||||
// build position remap: for each vertex, which other (canonical) vertex does it map to?
|
||||
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
|
||||
buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
|
||||
|
||||
// build edge set; this stores all triangle edges but we can look these up by any other wedge
|
||||
EdgeHasher edge_hasher = {remap};
|
||||
|
||||
size_t edge_table_size = hashBuckets(index_count);
|
||||
unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
|
||||
memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
for (int e = 0; e < 3; ++e)
|
||||
{
|
||||
unsigned int i0 = indices[i + e];
|
||||
unsigned int i1 = indices[i + next[e]];
|
||||
assert(i0 < vertex_count && i1 < vertex_count);
|
||||
|
||||
unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
|
||||
unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
|
||||
|
||||
if (*entry == ~0ull)
|
||||
*entry = edge;
|
||||
}
|
||||
}
|
||||
|
||||
// build resulting index buffer: 12 indices for each input triangle
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
unsigned int patch[12];
|
||||
|
||||
for (int e = 0; e < 3; ++e)
|
||||
{
|
||||
unsigned int i0 = indices[i + e];
|
||||
unsigned int i1 = indices[i + next[e]];
|
||||
assert(i0 < vertex_count && i1 < vertex_count);
|
||||
|
||||
// note: this refers to the opposite edge!
|
||||
unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
|
||||
unsigned long long oppe = *hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
|
||||
|
||||
// use the same edge if opposite edge doesn't exist (border)
|
||||
oppe = (oppe == ~0ull) ? edge : oppe;
|
||||
|
||||
// triangle index (0, 1, 2)
|
||||
patch[e] = i0;
|
||||
|
||||
// opposite edge (3, 4; 5, 6; 7, 8)
|
||||
patch[3 + e * 2 + 0] = unsigned(oppe);
|
||||
patch[3 + e * 2 + 1] = unsigned(oppe >> 32);
|
||||
|
||||
// dominant vertex (9, 10, 11)
|
||||
patch[9 + e] = remap[i0];
|
||||
}
|
||||
|
||||
memcpy(destination + i * 4, patch, sizeof(patch));
|
||||
}
|
||||
}
|
||||
|
||||
436
Source/ThirdParty/meshoptimizer/meshoptimizer.h
vendored
436
Source/ThirdParty/meshoptimizer/meshoptimizer.h
vendored
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* meshoptimizer - version 0.14
|
||||
* meshoptimizer - version 0.20
|
||||
*
|
||||
* Copyright (C) 2016-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Copyright (C) 2016-2023, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at https://github.com/zeux/meshoptimizer
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end of this file.
|
||||
@@ -12,13 +12,22 @@
|
||||
#include <stddef.h>
|
||||
|
||||
/* Version macro; major * 1000 + minor * 10 + patch */
|
||||
#define MESHOPTIMIZER_VERSION 140
|
||||
#define MESHOPTIMIZER_VERSION 200 /* 0.20 */
|
||||
|
||||
/* If no API is defined, assume default */
|
||||
#ifndef MESHOPTIMIZER_API
|
||||
#define MESHOPTIMIZER_API
|
||||
#endif
|
||||
|
||||
/* Set the calling-convention for alloc/dealloc function pointers */
|
||||
#ifndef MESHOPTIMIZER_ALLOC_CALLCONV
|
||||
#ifdef _MSC_VER
|
||||
#define MESHOPTIMIZER_ALLOC_CALLCONV __cdecl
|
||||
#else
|
||||
#define MESHOPTIMIZER_ALLOC_CALLCONV
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Experimental APIs have unstable interface and might have implementation that's not fully tested or optimized */
|
||||
#define MESHOPTIMIZER_EXPERIMENTAL MESHOPTIMIZER_API
|
||||
|
||||
@@ -28,8 +37,8 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Vertex attribute stream, similar to glVertexPointer
|
||||
* Each element takes size bytes, with stride controlling the spacing between successive elements.
|
||||
* Vertex attribute stream
|
||||
* Each element takes size bytes, beginning at data, with stride controlling the spacing between successive elements (stride >= size).
|
||||
*/
|
||||
struct meshopt_Stream
|
||||
{
|
||||
@@ -42,6 +51,7 @@ struct meshopt_Stream
|
||||
* Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices
|
||||
* As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence.
|
||||
* Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer.
|
||||
* Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized.
|
||||
*
|
||||
* destination must contain enough space for the resulting remap table (vertex_count elements)
|
||||
* indices can be NULL if the input is unindexed
|
||||
@@ -53,9 +63,11 @@ MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination,
|
||||
* As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence.
|
||||
* Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer.
|
||||
* To remap vertex buffers, you will need to call meshopt_remapVertexBuffer for each vertex stream.
|
||||
* Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized.
|
||||
*
|
||||
* destination must contain enough space for the resulting remap table (vertex_count elements)
|
||||
* indices can be NULL if the input is unindexed
|
||||
* stream_count must be <= 16
|
||||
*/
|
||||
MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
|
||||
|
||||
@@ -79,6 +91,7 @@ MESHOPTIMIZER_API void meshopt_remapIndexBuffer(unsigned int* destination, const
|
||||
* Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary
|
||||
* All vertices that are binary equivalent (wrt first vertex_size bytes) map to the first vertex in the original vertex buffer.
|
||||
* This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering.
|
||||
* Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
*/
|
||||
@@ -88,11 +101,42 @@ MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destinati
|
||||
* Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary
|
||||
* All vertices that are binary equivalent (wrt specified streams) map to the first vertex in the original vertex buffer.
|
||||
* This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering.
|
||||
* Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
* stream_count must be <= 16
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
|
||||
|
||||
/**
|
||||
* Generate index buffer that can be used as a geometry shader input with triangle adjacency topology
|
||||
* Each triangle is converted into a 6-vertex patch with the following layout:
|
||||
* - 0, 2, 4: original triangle vertices
|
||||
* - 1, 3, 5: vertices adjacent to edges 02, 24 and 40
|
||||
* The resulting patch can be rendered with geometry shaders using e.g. VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY.
|
||||
* This can be used to implement algorithms like silhouette detection/expansion and other forms of GS-driven rendering.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count*2 elements)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Generate index buffer that can be used for PN-AEN tessellation with crack-free displacement
|
||||
* Each triangle is converted into a 12-vertex patch with the following layout:
|
||||
* - 0, 1, 2: original triangle vertices
|
||||
* - 3, 4: opposing edge for edge 0, 1
|
||||
* - 5, 6: opposing edge for edge 1, 2
|
||||
* - 7, 8: opposing edge for edge 2, 0
|
||||
* - 9, 10, 11: dominant vertices for corners 0, 1, 2
|
||||
* The resulting patch can be rendered with hardware tessellation using PN-AEN and displacement mapping.
|
||||
* See "Tessellation on Any Budget" (John McDonald, GDC 2011) for implementation details.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count*4 elements)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Vertex transform cache optimizer
|
||||
* Reorders indices to reduce the number of GPU vertex shader invocations
|
||||
@@ -129,7 +173,7 @@ MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
* indices must contain index data that is the result of meshopt_optimizeVertexCache (*not* the original mesh indices!)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
* threshold indicates how much the overdraw optimizer can degrade vertex cache efficiency (1.05 = up to 5%) to reduce overdraw more efficiently
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold);
|
||||
@@ -168,10 +212,10 @@ MESHOPTIMIZER_API size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t
|
||||
MESHOPTIMIZER_API size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count);
|
||||
|
||||
/**
|
||||
* Experimental: Set index encoder format version
|
||||
* Set index encoder format version
|
||||
* version must specify the data format version to encode; valid values are 0 (decodable by all library versions) and 1 (decodable by 0.14+)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeIndexVersion(int version);
|
||||
MESHOPTIMIZER_API void meshopt_encodeIndexVersion(int version);
|
||||
|
||||
/**
|
||||
* Index buffer decoder
|
||||
@@ -184,15 +228,15 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeIndexVersion(int version);
|
||||
MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size);
|
||||
|
||||
/**
|
||||
* Experimental: Index sequence encoder
|
||||
* Index sequence encoder
|
||||
* Encodes index sequence into an array of bytes that is generally smaller and compresses better compared to original.
|
||||
* Input index sequence can represent arbitrary topology; for triangle lists meshopt_encodeIndexBuffer is likely to be better.
|
||||
* Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
|
||||
*
|
||||
* buffer must contain enough space for the encoded index sequence (use meshopt_encodeIndexSequenceBound to compute worst case size)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count);
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count);
|
||||
MESHOPTIMIZER_API size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count);
|
||||
MESHOPTIMIZER_API size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count);
|
||||
|
||||
/**
|
||||
* Index sequence decoder
|
||||
@@ -202,13 +246,14 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeIndexSequenceBound(size_t index_
|
||||
*
|
||||
* destination must contain enough space for the resulting index sequence (index_count elements)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size);
|
||||
MESHOPTIMIZER_API int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size);
|
||||
|
||||
/**
|
||||
* Vertex buffer encoder
|
||||
* Encodes vertex data into an array of bytes that is generally smaller and compresses better compared to original.
|
||||
* Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
|
||||
* This function works for a single vertex stream; for multiple vertex streams, call meshopt_encodeVertexBuffer for each stream.
|
||||
* Note that all vertex_size bytes of each vertex are encoded verbatim, including padding which should be zero-initialized.
|
||||
*
|
||||
* buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to compute worst case size)
|
||||
*/
|
||||
@@ -216,10 +261,10 @@ MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_
|
||||
MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size);
|
||||
|
||||
/**
|
||||
* Experimental: Set vertex encoder format version
|
||||
* Set vertex encoder format version
|
||||
* version must specify the data format version to encode; valid values are 0 (decodable by all library versions)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeVertexVersion(int version);
|
||||
MESHOPTIMIZER_API void meshopt_encodeVertexVersion(int version);
|
||||
|
||||
/**
|
||||
* Vertex buffer decoder
|
||||
@@ -234,7 +279,6 @@ MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t verte
|
||||
/**
|
||||
* Vertex buffer filters
|
||||
* These functions can be used to filter output of meshopt_decodeVertexBuffer in-place.
|
||||
* count must be aligned by 4 and stride is fixed for each function to facilitate SIMD implementation.
|
||||
*
|
||||
* meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f.
|
||||
* Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is.
|
||||
@@ -245,12 +289,51 @@ MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t verte
|
||||
* meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M.
|
||||
* Each 32-bit component is decoded in isolation; stride must be divisible by 4.
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size);
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size);
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size);
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride);
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride);
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride);
|
||||
|
||||
/**
|
||||
* Experimental: Mesh simplifier
|
||||
* Vertex buffer filter encoders
|
||||
* These functions can be used to encode data in a format that meshopt_decodeFilter can decode
|
||||
*
|
||||
* meshopt_encodeFilterOct encodes unit vectors with K-bit (K <= 16) signed X/Y as an output.
|
||||
* Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is.
|
||||
* Input data must contain 4 floats for every vector (count*4 total).
|
||||
*
|
||||
* meshopt_encodeFilterQuat encodes unit quaternions with K-bit (4 <= K <= 16) component encoding.
|
||||
* Each component is stored as an 16-bit integer; stride must be equal to 8.
|
||||
* Input data must contain 4 floats for every quaternion (count*4 total).
|
||||
*
|
||||
* meshopt_encodeFilterExp encodes arbitrary (finite) floating-point data with 8-bit exponent and K-bit integer mantissa (1 <= K <= 24).
|
||||
* Exponent can be shared between all components of a given vector as defined by stride or all values of a given component; stride must be divisible by 4.
|
||||
* Input data must contain stride/4 floats for every vector (count*stride/4 total).
|
||||
*/
|
||||
enum meshopt_EncodeExpMode
|
||||
{
|
||||
/* When encoding exponents, use separate values for each component (maximum quality) */
|
||||
meshopt_EncodeExpSeparate,
|
||||
/* When encoding exponents, use shared value for all components of each vector (better compression) */
|
||||
meshopt_EncodeExpSharedVector,
|
||||
/* When encoding exponents, use shared value for each component of all vectors (best compression) */
|
||||
meshopt_EncodeExpSharedComponent,
|
||||
};
|
||||
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data);
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterQuat(void* destination, size_t count, size_t stride, int bits, const float* data);
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data, enum meshopt_EncodeExpMode mode);
|
||||
|
||||
/**
|
||||
* Simplification options
|
||||
*/
|
||||
enum
|
||||
{
|
||||
/* Do not move vertices that are located on the topological border (vertices on triangle edges that don't have a paired triangle). Useful for simplifying portions of the larger mesh. */
|
||||
meshopt_SimplifyLockBorder = 1 << 0,
|
||||
};
|
||||
|
||||
/**
|
||||
* Mesh simplifier
|
||||
* Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible
|
||||
* The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error.
|
||||
* If not all attributes from the input mesh are required, it's recommended to reindex the mesh using meshopt_generateShadowIndexBuffer prior to simplification.
|
||||
@@ -258,23 +341,40 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver
|
||||
* The resulting index buffer references vertices from the original vertex buffer.
|
||||
* If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
*
|
||||
* destination must contain enough space for the *source* index buffer (since optimization is iterative, this means index_count elements - *not* target_index_count!)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
* destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)!
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
* target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1]
|
||||
* options must be a bitmask composed of meshopt_SimplifyX options; 0 is a safe default
|
||||
* result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error);
|
||||
MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error);
|
||||
|
||||
/**
|
||||
* Experimental: Mesh simplifier with attribute metric
|
||||
* The algorithm ehnahces meshopt_simplify by incorporating attribute values into the error metric used to prioritize simplification order; see meshopt_simplify documentation for details.
|
||||
* Note that the number of attributes affects memory requirements and running time; this algorithm requires ~1.5x more memory and time compared to meshopt_simplify when using 4 scalar attributes.
|
||||
*
|
||||
* vertex_attributes should have attribute_count floats for each vertex
|
||||
* attribute_weights should have attribute_count floats in total; the weights determine relative priority of attributes between each other and wrt position. The recommended weight range is [1e-3..1e-1], assuming attribute data is in [0..1] range.
|
||||
* attribute_count must be <= 16
|
||||
* TODO target_error/result_error currently use combined distance+attribute error; this may change in the future
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* result_error);
|
||||
|
||||
/**
|
||||
* Experimental: Mesh simplifier (sloppy)
|
||||
* Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance
|
||||
* The algorithm doesn't preserve mesh topology but is always able to reach target triangle count.
|
||||
* Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance
|
||||
* The algorithm doesn't preserve mesh topology but can stop short of the target goal based on target error.
|
||||
* Returns the number of indices after simplification, with destination containing new index data
|
||||
* The resulting index buffer references vertices from the original vertex buffer.
|
||||
* If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
*
|
||||
* destination must contain enough space for the target index buffer
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
* destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)!
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
* target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1]
|
||||
* result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count);
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error);
|
||||
|
||||
/**
|
||||
* Experimental: Point cloud simplifier
|
||||
@@ -283,10 +383,19 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destinati
|
||||
* The resulting index buffer references vertices from the original vertex buffer.
|
||||
* If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
*
|
||||
* destination must contain enough space for the target index buffer
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
* destination must contain enough space for the target index buffer (target_vertex_count elements)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
* vertex_colors should can be NULL; when it's not NULL, it should have float3 color in the first 12 bytes of each vertex
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count);
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t target_vertex_count);
|
||||
|
||||
/**
|
||||
* Returns the error scaling factor used by the simplifier to convert between absolute and relative extents
|
||||
*
|
||||
* Absolute error must be *divided* by the scaling factor before passing it to meshopt_simplify as target_error
|
||||
* Relative error returned by meshopt_simplify via result_error must be *multiplied* by the scaling factor to get absolute error.
|
||||
*/
|
||||
MESHOPTIMIZER_API float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Mesh stripifier
|
||||
@@ -338,7 +447,7 @@ struct meshopt_OverdrawStatistics
|
||||
* Returns overdraw statistics using a software rasterizer
|
||||
* Results may not match actual GPU performance
|
||||
*
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
*/
|
||||
MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
@@ -357,23 +466,32 @@ MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetc
|
||||
|
||||
struct meshopt_Meshlet
|
||||
{
|
||||
unsigned int vertices[64];
|
||||
unsigned char indices[126][3];
|
||||
unsigned char triangle_count;
|
||||
unsigned char vertex_count;
|
||||
/* offsets within meshlet_vertices and meshlet_triangles arrays with meshlet data */
|
||||
unsigned int vertex_offset;
|
||||
unsigned int triangle_offset;
|
||||
|
||||
/* number of vertices and triangles used in the meshlet; data is stored in consecutive range defined by offset and count */
|
||||
unsigned int vertex_count;
|
||||
unsigned int triangle_count;
|
||||
};
|
||||
|
||||
/**
|
||||
* Experimental: Meshlet builder
|
||||
* Meshlet builder
|
||||
* Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer
|
||||
* The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers.
|
||||
* For maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
|
||||
* When using buildMeshlets, vertex positions need to be provided to minimize the size of the resulting clusters.
|
||||
* When using buildMeshletsScan, for maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
|
||||
*
|
||||
* destination must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound
|
||||
* max_vertices and max_triangles can't exceed limits statically declared in meshopt_Meshlet (max_vertices <= 64, max_triangles <= 126)
|
||||
* meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound
|
||||
* meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices
|
||||
* meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
* max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512)
|
||||
* cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles);
|
||||
MESHOPTIMIZER_API size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight);
|
||||
MESHOPTIMIZER_API size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
|
||||
MESHOPTIMIZER_API size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles);
|
||||
|
||||
struct meshopt_Bounds
|
||||
{
|
||||
@@ -392,13 +510,13 @@ struct meshopt_Bounds
|
||||
};
|
||||
|
||||
/**
|
||||
* Experimental: Cluster bounds generator
|
||||
* Cluster bounds generator
|
||||
* Creates bounding volumes that can be used for frustum, backface and occlusion culling.
|
||||
*
|
||||
* For backface culling with orthographic projection, use the following formula to reject backfacing clusters:
|
||||
* dot(view, cone_axis) >= cone_cutoff
|
||||
*
|
||||
* For perspective projection, you can the formula that needs cone apex in addition to axis & cutoff:
|
||||
* For perspective projection, you can use the formula that needs cone apex in addition to axis & cutoff:
|
||||
* dot(normalize(cone_apex - camera_position), cone_axis) >= cone_cutoff
|
||||
*
|
||||
* Alternatively, you can use the formula that doesn't need cone apex and uses bounding sphere instead:
|
||||
@@ -407,29 +525,31 @@ struct meshopt_Bounds
|
||||
* dot(center - camera_position, cone_axis) >= cone_cutoff * length(center - camera_position) + radius
|
||||
*
|
||||
* The formula that uses the apex is slightly more accurate but needs the apex; if you are already using bounding sphere
|
||||
* to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable.
|
||||
* to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable (for derivation see
|
||||
* Real-Time Rendering 4th Edition, section 19.3).
|
||||
*
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
* index_count should be less than or equal to 256*3 (the function assumes clusters of limited size)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
* index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size)
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const struct meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Experimental: Spatial sorter
|
||||
* Spatial sorter
|
||||
* Generates a remap table that can be used to reorder points for spatial locality.
|
||||
* Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer.
|
||||
*
|
||||
* destination must contain enough space for the resulting remap table (vertex_count elements)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
MESHOPTIMIZER_API void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Experimental: Spatial sorter
|
||||
* Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (index_count elements)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
@@ -439,7 +559,7 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* desti
|
||||
* Note that all algorithms only allocate memory for temporary use.
|
||||
* allocate/deallocate are always called in a stack-like order - last pointer to be allocated is deallocated first.
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_setAllocator(void* (*allocate)(size_t), void (*deallocate)(void*));
|
||||
MESHOPTIMIZER_API void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*));
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
@@ -462,19 +582,25 @@ inline int meshopt_quantizeUnorm(float v, int N);
|
||||
inline int meshopt_quantizeSnorm(float v, int N);
|
||||
|
||||
/**
|
||||
* Quantize a float into half-precision floating point value
|
||||
* Quantize a float into half-precision (as defined by IEEE-754 fp16) floating point value
|
||||
* Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
|
||||
* Representable magnitude range: [6e-5; 65504]
|
||||
* Maximum relative reconstruction error: 5e-4
|
||||
*/
|
||||
inline unsigned short meshopt_quantizeHalf(float v);
|
||||
MESHOPTIMIZER_API unsigned short meshopt_quantizeHalf(float v);
|
||||
|
||||
/**
|
||||
* Quantize a float into a floating point value with a limited number of significant mantissa bits
|
||||
* Quantize a float into a floating point value with a limited number of significant mantissa bits, preserving the IEEE-754 fp32 binary representation
|
||||
* Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
|
||||
* Assumes N is in a valid mantissa precision range, which is 1..23
|
||||
*/
|
||||
inline float meshopt_quantizeFloat(float v, int N);
|
||||
MESHOPTIMIZER_API float meshopt_quantizeFloat(float v, int N);
|
||||
|
||||
/**
|
||||
* Reverse quantization of a half-precision (as defined by IEEE-754 fp16) floating point value
|
||||
* Preserves Inf/NaN, flushes denormals to zero
|
||||
*/
|
||||
MESHOPTIMIZER_API float meshopt_dequantizeHalf(unsigned short h);
|
||||
#endif
|
||||
|
||||
/**
|
||||
@@ -497,6 +623,10 @@ inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices,
|
||||
template <typename T>
|
||||
inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count);
|
||||
template <typename T>
|
||||
inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
template <typename T>
|
||||
inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count);
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count);
|
||||
@@ -517,9 +647,11 @@ inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_s
|
||||
template <typename T>
|
||||
inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size);
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error);
|
||||
inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL);
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count);
|
||||
inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL);
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = NULL);
|
||||
template <typename T>
|
||||
inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index);
|
||||
template <typename T>
|
||||
@@ -531,7 +663,9 @@ inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size
|
||||
template <typename T>
|
||||
inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
|
||||
template <typename T>
|
||||
inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
|
||||
inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight);
|
||||
template <typename T>
|
||||
inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
|
||||
template <typename T>
|
||||
inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
template <typename T>
|
||||
@@ -561,50 +695,6 @@ inline int meshopt_quantizeSnorm(float v, int N)
|
||||
|
||||
return int(v * scale + round);
|
||||
}
|
||||
|
||||
inline unsigned short meshopt_quantizeHalf(float v)
|
||||
{
|
||||
union { float f; unsigned int ui; } u = {v};
|
||||
unsigned int ui = u.ui;
|
||||
|
||||
int s = (ui >> 16) & 0x8000;
|
||||
int em = ui & 0x7fffffff;
|
||||
|
||||
/* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */
|
||||
int h = (em - (112 << 23) + (1 << 12)) >> 13;
|
||||
|
||||
/* underflow: flush to zero; 113 encodes exponent -14 */
|
||||
h = (em < (113 << 23)) ? 0 : h;
|
||||
|
||||
/* overflow: infinity; 143 encodes exponent 16 */
|
||||
h = (em >= (143 << 23)) ? 0x7c00 : h;
|
||||
|
||||
/* NaN; note that we convert all types of NaN to qNaN */
|
||||
h = (em > (255 << 23)) ? 0x7e00 : h;
|
||||
|
||||
return (unsigned short)(s | h);
|
||||
}
|
||||
|
||||
inline float meshopt_quantizeFloat(float v, int N)
|
||||
{
|
||||
union { float f; unsigned int ui; } u = {v};
|
||||
unsigned int ui = u.ui;
|
||||
|
||||
const int mask = (1 << (23 - N)) - 1;
|
||||
const int round = (1 << (23 - N)) >> 1;
|
||||
|
||||
int e = ui & 0x7f800000;
|
||||
unsigned int rui = (ui + round) & ~mask;
|
||||
|
||||
/* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */
|
||||
ui = e == 0x7f800000 ? ui : rui;
|
||||
|
||||
/* flush denormals to zero */
|
||||
ui = e == 0 ? 0 : ui;
|
||||
|
||||
u.ui = ui;
|
||||
return u.f;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Internal implementation helpers */
|
||||
@@ -615,8 +705,8 @@ public:
|
||||
template <typename T>
|
||||
struct StorageT
|
||||
{
|
||||
static void* (*allocate)(size_t);
|
||||
static void (*deallocate)(void*);
|
||||
static void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t);
|
||||
static void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*);
|
||||
};
|
||||
|
||||
typedef StorageT<void> Storage;
|
||||
@@ -641,14 +731,21 @@ public:
|
||||
return result;
|
||||
}
|
||||
|
||||
void deallocate(void* ptr)
|
||||
{
|
||||
assert(count > 0 && blocks[count - 1] == ptr);
|
||||
Storage::deallocate(ptr);
|
||||
count--;
|
||||
}
|
||||
|
||||
private:
|
||||
void* blocks[16];
|
||||
void* blocks[24];
|
||||
size_t count;
|
||||
};
|
||||
|
||||
// This makes sure that allocate/deallocate are lazily generated in translation units that need them and are deduplicated by the linker
|
||||
template <typename T> void* (*meshopt_Allocator::StorageT<T>::allocate)(size_t) = operator new;
|
||||
template <typename T> void (*meshopt_Allocator::StorageT<T>::deallocate)(void*) = operator delete;
|
||||
template <typename T> void* (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT<T>::allocate)(size_t) = operator new;
|
||||
template <typename T> void (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT<T>::deallocate)(void*) = operator delete;
|
||||
#endif
|
||||
|
||||
/* Inline implementation for C++ templated wrappers */
|
||||
@@ -665,7 +762,7 @@ struct meshopt_IndexAdapter<T, false>
|
||||
|
||||
meshopt_IndexAdapter(T* result_, const T* input, size_t count_)
|
||||
: result(result_)
|
||||
, data(0)
|
||||
, data(NULL)
|
||||
, count(count_)
|
||||
{
|
||||
size_t size = count > size_t(-1) / sizeof(unsigned int) ? size_t(-1) : count * sizeof(unsigned int);
|
||||
@@ -705,33 +802,33 @@ struct meshopt_IndexAdapter<T, true>
|
||||
template <typename T>
|
||||
inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, indices ? index_count : 0);
|
||||
|
||||
return meshopt_generateVertexRemap(destination, indices ? in.data : 0, index_count, vertices, vertex_count, vertex_size);
|
||||
return meshopt_generateVertexRemap(destination, indices ? in.data : NULL, index_count, vertices, vertex_count, vertex_size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, indices ? index_count : 0);
|
||||
|
||||
return meshopt_generateVertexRemapMulti(destination, indices ? in.data : 0, index_count, vertex_count, streams, stream_count);
|
||||
return meshopt_generateVertexRemapMulti(destination, indices ? in.data : NULL, index_count, vertex_count, streams, stream_count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, indices ? index_count : 0);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
|
||||
meshopt_remapIndexBuffer(out.data, indices ? in.data : 0, index_count, remap);
|
||||
meshopt_remapIndexBuffer(out.data, indices ? in.data : NULL, index_count, remap);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count);
|
||||
|
||||
meshopt_generateShadowIndexBuffer(out.data, in.data, index_count, vertices, vertex_count, vertex_size, vertex_stride);
|
||||
}
|
||||
@@ -739,17 +836,35 @@ inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices,
|
||||
template <typename T>
|
||||
inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count);
|
||||
|
||||
meshopt_generateShadowIndexBufferMulti(out.data, in.data, index_count, vertex_count, streams, stream_count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count * 2);
|
||||
|
||||
meshopt_generateAdjacencyIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count * 4);
|
||||
|
||||
meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count);
|
||||
|
||||
meshopt_optimizeVertexCache(out.data, in.data, index_count, vertex_count);
|
||||
}
|
||||
@@ -757,8 +872,8 @@ inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count);
|
||||
|
||||
meshopt_optimizeVertexCacheStrip(out.data, in.data, index_count, vertex_count);
|
||||
}
|
||||
@@ -766,8 +881,8 @@ inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, s
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count);
|
||||
|
||||
meshopt_optimizeVertexCacheFifo(out.data, in.data, index_count, vertex_count, cache_size);
|
||||
}
|
||||
@@ -775,8 +890,8 @@ inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, si
|
||||
template <typename T>
|
||||
inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count);
|
||||
|
||||
meshopt_optimizeOverdraw(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, threshold);
|
||||
}
|
||||
@@ -784,7 +899,7 @@ inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t in
|
||||
template <typename T>
|
||||
inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
|
||||
return meshopt_optimizeVertexFetchRemap(destination, in.data, index_count, vertex_count);
|
||||
}
|
||||
@@ -800,7 +915,7 @@ inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t
|
||||
template <typename T>
|
||||
inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
|
||||
return meshopt_encodeIndexBuffer(buffer, buffer_size, in.data, index_count);
|
||||
}
|
||||
@@ -817,7 +932,7 @@ inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const u
|
||||
template <typename T>
|
||||
inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
|
||||
return meshopt_encodeIndexSequence(buffer, buffer_size, in.data, index_count);
|
||||
}
|
||||
@@ -832,28 +947,37 @@ inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error)
|
||||
inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count);
|
||||
|
||||
return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error);
|
||||
return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, options, result_error);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count)
|
||||
inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* result_error)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, target_index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count);
|
||||
|
||||
return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count);
|
||||
return meshopt_simplifyWithAttributes(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, vertex_attributes, vertex_attributes_stride, attribute_weights, attribute_count, target_index_count, target_error, options, result_error);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count);
|
||||
|
||||
return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, result_error);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, (index_count / 3) * 5);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, (index_count / 3) * 5);
|
||||
|
||||
return meshopt_stripify(out.data, in.data, index_count, vertex_count, unsigned(restart_index));
|
||||
}
|
||||
@@ -861,8 +985,8 @@ inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_co
|
||||
template <typename T>
|
||||
inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, (index_count - 2) * 3);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, (index_count - 2) * 3);
|
||||
|
||||
return meshopt_unstripify(out.data, in.data, index_count, unsigned(restart_index));
|
||||
}
|
||||
@@ -870,7 +994,7 @@ inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_
|
||||
template <typename T>
|
||||
inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
|
||||
return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size);
|
||||
}
|
||||
@@ -878,7 +1002,7 @@ inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices
|
||||
template <typename T>
|
||||
inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
|
||||
return meshopt_analyzeOverdraw(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
}
|
||||
@@ -886,23 +1010,31 @@ inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size
|
||||
template <typename T>
|
||||
inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
|
||||
return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
|
||||
inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
|
||||
return meshopt_buildMeshlets(destination, in.data, index_count, vertex_count, max_vertices, max_triangles);
|
||||
return meshopt_buildMeshlets(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, cone_weight);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
|
||||
return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
|
||||
return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
}
|
||||
@@ -910,15 +1042,15 @@ inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t inde
|
||||
template <typename T>
|
||||
inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(0, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, 0, index_count);
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count);
|
||||
|
||||
meshopt_spatialSortTriangles(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2016-2020 Arseny Kapoulkine
|
||||
* Copyright (c) 2016-2023 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
||||
@@ -147,7 +147,7 @@ meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices,
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
@@ -272,7 +272,7 @@ void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* ind
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
|
||||
70
Source/ThirdParty/meshoptimizer/quantization.cpp
vendored
Normal file
70
Source/ThirdParty/meshoptimizer/quantization.cpp
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
unsigned short meshopt_quantizeHalf(float v)
|
||||
{
|
||||
union { float f; unsigned int ui; } u = {v};
|
||||
unsigned int ui = u.ui;
|
||||
|
||||
int s = (ui >> 16) & 0x8000;
|
||||
int em = ui & 0x7fffffff;
|
||||
|
||||
// bias exponent and round to nearest; 112 is relative exponent bias (127-15)
|
||||
int h = (em - (112 << 23) + (1 << 12)) >> 13;
|
||||
|
||||
// underflow: flush to zero; 113 encodes exponent -14
|
||||
h = (em < (113 << 23)) ? 0 : h;
|
||||
|
||||
// overflow: infinity; 143 encodes exponent 16
|
||||
h = (em >= (143 << 23)) ? 0x7c00 : h;
|
||||
|
||||
// NaN; note that we convert all types of NaN to qNaN
|
||||
h = (em > (255 << 23)) ? 0x7e00 : h;
|
||||
|
||||
return (unsigned short)(s | h);
|
||||
}
|
||||
|
||||
float meshopt_quantizeFloat(float v, int N)
|
||||
{
|
||||
assert(N >= 0 && N <= 23);
|
||||
|
||||
union { float f; unsigned int ui; } u = {v};
|
||||
unsigned int ui = u.ui;
|
||||
|
||||
const int mask = (1 << (23 - N)) - 1;
|
||||
const int round = (1 << (23 - N)) >> 1;
|
||||
|
||||
int e = ui & 0x7f800000;
|
||||
unsigned int rui = (ui + round) & ~mask;
|
||||
|
||||
// round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0
|
||||
ui = e == 0x7f800000 ? ui : rui;
|
||||
|
||||
// flush denormals to zero
|
||||
ui = e == 0 ? 0 : ui;
|
||||
|
||||
u.ui = ui;
|
||||
return u.f;
|
||||
}
|
||||
|
||||
float meshopt_dequantizeHalf(unsigned short h)
|
||||
{
|
||||
unsigned int s = unsigned(h & 0x8000) << 16;
|
||||
int em = h & 0x7fff;
|
||||
|
||||
// bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15)
|
||||
int r = (em + (112 << 10)) << 13;
|
||||
|
||||
// denormal: flush to zero
|
||||
r = (em < (1 << 10)) ? 0 : r;
|
||||
|
||||
// infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases
|
||||
// 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255
|
||||
r += (em >= (31 << 10)) ? (112 << 23) : 0;
|
||||
|
||||
union { float f; unsigned int ui; } u;
|
||||
u.ui = s | r;
|
||||
return u.f;
|
||||
}
|
||||
934
Source/ThirdParty/meshoptimizer/simplifier.cpp
vendored
934
Source/ThirdParty/meshoptimizer/simplifier.cpp
vendored
File diff suppressed because it is too large
Load Diff
@@ -113,7 +113,7 @@ void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_pos
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
@@ -144,7 +144,7 @@ void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int*
|
||||
using namespace meshopt;
|
||||
|
||||
assert(index_count % 3 == 0);
|
||||
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
(void)vertex_count;
|
||||
|
||||
@@ -110,7 +110,7 @@ static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned
|
||||
return ~0u;
|
||||
}
|
||||
|
||||
static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
|
||||
static unsigned int getNextVertexNeighbor(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
|
||||
{
|
||||
unsigned int best_candidate = ~0u;
|
||||
int best_priority = -1;
|
||||
@@ -221,9 +221,9 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned
|
||||
triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
|
||||
}
|
||||
|
||||
unsigned int cache_holder[2 * (kCacheSizeMax + 3)];
|
||||
unsigned int cache_holder[2 * (kCacheSizeMax + 4)];
|
||||
unsigned int* cache = cache_holder;
|
||||
unsigned int* cache_new = cache_holder + kCacheSizeMax + 3;
|
||||
unsigned int* cache_new = cache_holder + kCacheSizeMax + 4;
|
||||
size_t cache_count = 0;
|
||||
|
||||
unsigned int current_triangle = 0;
|
||||
@@ -260,10 +260,8 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned
|
||||
{
|
||||
unsigned int index = cache[i];
|
||||
|
||||
if (index != a && index != b && index != c)
|
||||
{
|
||||
cache_new[cache_write++] = index;
|
||||
}
|
||||
cache_new[cache_write] = index;
|
||||
cache_write += (index != a && index != b && index != c);
|
||||
}
|
||||
|
||||
unsigned int* cache_temp = cache;
|
||||
@@ -281,16 +279,16 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned
|
||||
{
|
||||
unsigned int index = indices[current_triangle * 3 + k];
|
||||
|
||||
unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
|
||||
size_t neighbours_size = adjacency.counts[index];
|
||||
unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index];
|
||||
size_t neighbors_size = adjacency.counts[index];
|
||||
|
||||
for (size_t i = 0; i < neighbours_size; ++i)
|
||||
for (size_t i = 0; i < neighbors_size; ++i)
|
||||
{
|
||||
unsigned int tri = neighbours[i];
|
||||
unsigned int tri = neighbors[i];
|
||||
|
||||
if (tri == current_triangle)
|
||||
{
|
||||
neighbours[i] = neighbours[neighbours_size - 1];
|
||||
neighbors[i] = neighbors[neighbors_size - 1];
|
||||
adjacency.counts[index]--;
|
||||
break;
|
||||
}
|
||||
@@ -305,6 +303,10 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned
|
||||
{
|
||||
unsigned int index = cache[i];
|
||||
|
||||
// no need to update scores if we are never going to use this vertex
|
||||
if (adjacency.counts[index] == 0)
|
||||
continue;
|
||||
|
||||
int cache_position = i >= cache_size ? -1 : int(i);
|
||||
|
||||
// update vertex score
|
||||
@@ -314,10 +316,10 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned
|
||||
vertex_scores[index] = score;
|
||||
|
||||
// update scores of vertex triangles
|
||||
const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index];
|
||||
const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index];
|
||||
const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[index];
|
||||
const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[index];
|
||||
|
||||
for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
|
||||
for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it)
|
||||
{
|
||||
unsigned int tri = *it;
|
||||
assert(!emitted_flags[tri]);
|
||||
@@ -325,11 +327,8 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned
|
||||
float tri_score = triangle_scores[tri] + score_diff;
|
||||
assert(tri_score > 0);
|
||||
|
||||
if (best_score < tri_score)
|
||||
{
|
||||
best_triangle = tri;
|
||||
best_score = tri_score;
|
||||
}
|
||||
best_triangle = best_score < tri_score ? tri : best_triangle;
|
||||
best_score = best_score < tri_score ? tri_score : best_score;
|
||||
|
||||
triangle_scores[tri] = tri_score;
|
||||
}
|
||||
@@ -412,11 +411,11 @@ void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned i
|
||||
{
|
||||
const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
|
||||
|
||||
// emit all vertex neighbours
|
||||
const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
|
||||
const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex];
|
||||
// emit all vertex neighbors
|
||||
const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
|
||||
const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[current_vertex];
|
||||
|
||||
for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
|
||||
for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it)
|
||||
{
|
||||
unsigned int triangle = *it;
|
||||
|
||||
@@ -461,7 +460,7 @@ void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned i
|
||||
const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
|
||||
|
||||
// get next vertex
|
||||
current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
|
||||
current_vertex = getNextVertexNeighbor(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
|
||||
|
||||
if (current_vertex == ~0u)
|
||||
{
|
||||
|
||||
243
Source/ThirdParty/meshoptimizer/vertexcodec.cpp
vendored
243
Source/ThirdParty/meshoptimizer/vertexcodec.cpp
vendored
@@ -42,16 +42,24 @@
|
||||
#endif
|
||||
|
||||
// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD
|
||||
// Note that we need unimplemented-simd128 subset for a few functions that are implemented de-facto
|
||||
#if defined(__wasm_simd128__)
|
||||
#define SIMD_WASM
|
||||
#define SIMD_TARGET __attribute__((target("unimplemented-simd128")))
|
||||
// Prevent compiling other variant when wasm simd compilation is active
|
||||
#undef SIMD_NEON
|
||||
#undef SIMD_SSE
|
||||
#undef SIMD_AVX
|
||||
#endif
|
||||
|
||||
#ifndef SIMD_TARGET
|
||||
#define SIMD_TARGET
|
||||
#endif
|
||||
|
||||
// When targeting AArch64/x64, optimize for latency to allow decoding of individual 16-byte groups to overlap
|
||||
// We don't do this for 32-bit systems because we need 64-bit math for this and this will hurt in-order CPUs
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64)
|
||||
#define SIMD_LATENCYOPT
|
||||
#endif
|
||||
|
||||
#endif // !MESHOPTIMIZER_NO_SIMD
|
||||
|
||||
#ifdef SIMD_SSE
|
||||
@@ -82,31 +90,14 @@
|
||||
#include <wasm_simd128.h>
|
||||
#endif
|
||||
|
||||
#ifndef TRACE
|
||||
#define TRACE 0
|
||||
#endif
|
||||
|
||||
#if TRACE
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_WASM
|
||||
#define wasmx_splat_v32x4(v, i) wasm_v32x4_shuffle(v, v, i, i, i, i)
|
||||
#define wasmx_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)
|
||||
#define wasmx_unpackhi_v8x16(a, b) wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31)
|
||||
#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11)
|
||||
#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15)
|
||||
#define wasmx_unpacklo_v64x2(a, b) wasm_v64x2_shuffle(a, b, 0, 2)
|
||||
#define wasmx_unpackhi_v64x2(a, b) wasm_v64x2_shuffle(a, b, 1, 3)
|
||||
#endif
|
||||
|
||||
#if defined(SIMD_WASM)
|
||||
// v128_t wasm_v8x16_swizzle(v128_t a, v128_t b)
|
||||
SIMD_TARGET
|
||||
static __inline__ v128_t wasm_v8x16_swizzle(v128_t a, v128_t b)
|
||||
{
|
||||
return (v128_t)__builtin_wasm_swizzle_v8x16((__i8x16)a, (__i8x16)b);
|
||||
}
|
||||
#define wasmx_splat_v32x4(v, i) wasm_i32x4_shuffle(v, v, i, i, i, i)
|
||||
#define wasmx_unpacklo_v8x16(a, b) wasm_i8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)
|
||||
#define wasmx_unpackhi_v8x16(a, b) wasm_i8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31)
|
||||
#define wasmx_unpacklo_v16x8(a, b) wasm_i16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11)
|
||||
#define wasmx_unpackhi_v16x8(a, b) wasm_i16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15)
|
||||
#define wasmx_unpacklo_v64x2(a, b) wasm_i64x2_shuffle(a, b, 0, 2)
|
||||
#define wasmx_unpackhi_v64x2(a, b) wasm_i64x2_shuffle(a, b, 1, 3)
|
||||
#endif
|
||||
|
||||
namespace meshopt
|
||||
@@ -144,19 +135,6 @@ inline unsigned char unzigzag8(unsigned char v)
|
||||
return -(v & 1) ^ (v >> 1);
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
struct Stats
|
||||
{
|
||||
size_t size;
|
||||
size_t header;
|
||||
size_t bitg[4];
|
||||
size_t bitb[4];
|
||||
};
|
||||
|
||||
Stats* bytestats;
|
||||
Stats vertexstats[256];
|
||||
#endif
|
||||
|
||||
static bool encodeBytesGroupZero(const unsigned char* buffer)
|
||||
{
|
||||
for (size_t i = 0; i < kByteGroupSize; ++i)
|
||||
@@ -242,7 +220,7 @@ static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end,
|
||||
size_t header_size = (buffer_size / kByteGroupSize + 3) / 4;
|
||||
|
||||
if (size_t(data_end - data) < header_size)
|
||||
return 0;
|
||||
return NULL;
|
||||
|
||||
data += header_size;
|
||||
|
||||
@@ -251,7 +229,7 @@ static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end,
|
||||
for (size_t i = 0; i < buffer_size; i += kByteGroupSize)
|
||||
{
|
||||
if (size_t(data_end - data) < kByteGroupDecodeLimit)
|
||||
return 0;
|
||||
return NULL;
|
||||
|
||||
int best_bits = 8;
|
||||
size_t best_size = encodeBytesGroupMeasure(buffer + i, 8);
|
||||
@@ -278,17 +256,8 @@ static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end,
|
||||
|
||||
assert(data + best_size == next);
|
||||
data = next;
|
||||
|
||||
#if TRACE > 1
|
||||
bytestats->bitg[bitslog2]++;
|
||||
bytestats->bitb[bitslog2] += best_size;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if TRACE > 1
|
||||
bytestats->header += header_size;
|
||||
#endif
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -317,19 +286,9 @@ static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data
|
||||
vertex_offset += vertex_size;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
const unsigned char* olddata = data;
|
||||
bytestats = &vertexstats[k];
|
||||
#endif
|
||||
|
||||
data = encodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1));
|
||||
if (!data)
|
||||
return 0;
|
||||
|
||||
#if TRACE
|
||||
bytestats = 0;
|
||||
vertexstats[k].size += data - olddata;
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memcpy(last_vertex, &vertex_data[vertex_size * (vertex_count - 1)], vertex_size);
|
||||
@@ -337,7 +296,7 @@ static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data
|
||||
return data;
|
||||
}
|
||||
|
||||
#if defined(SIMD_FALLBACK) || (!defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_AVX))
|
||||
#if defined(SIMD_FALLBACK) || (!defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_AVX) && !defined(SIMD_WASM))
|
||||
static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned char* buffer, int bitslog2)
|
||||
{
|
||||
#define READ() byte = *data++
|
||||
@@ -397,14 +356,14 @@ static const unsigned char* decodeBytes(const unsigned char* data, const unsigne
|
||||
size_t header_size = (buffer_size / kByteGroupSize + 3) / 4;
|
||||
|
||||
if (size_t(data_end - data) < header_size)
|
||||
return 0;
|
||||
return NULL;
|
||||
|
||||
data += header_size;
|
||||
|
||||
for (size_t i = 0; i < buffer_size; i += kByteGroupSize)
|
||||
{
|
||||
if (size_t(data_end - data) < kByteGroupDecodeLimit)
|
||||
return 0;
|
||||
return NULL;
|
||||
|
||||
size_t header_offset = i / kByteGroupSize;
|
||||
|
||||
@@ -429,7 +388,7 @@ static const unsigned char* decodeVertexBlock(const unsigned char* data, const u
|
||||
{
|
||||
data = decodeBytes(data, data_end, buffer, vertex_count_aligned);
|
||||
if (!data)
|
||||
return 0;
|
||||
return NULL;
|
||||
|
||||
size_t vertex_offset = k;
|
||||
|
||||
@@ -458,7 +417,7 @@ static const unsigned char* decodeVertexBlock(const unsigned char* data, const u
|
||||
static unsigned char kDecodeBytesGroupShuffle[256][8];
|
||||
static unsigned char kDecodeBytesGroupCount[256];
|
||||
|
||||
#ifdef EMSCRIPTEN
|
||||
#ifdef __wasm__
|
||||
__attribute__((cold)) // this saves 500 bytes in the output binary - we don't need to vectorize this loop!
|
||||
#endif
|
||||
static bool
|
||||
@@ -521,6 +480,18 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
typedef int unaligned_int;
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_LATENCYOPT
|
||||
unsigned int data32;
|
||||
memcpy(&data32, data, 4);
|
||||
data32 &= data32 >> 1;
|
||||
|
||||
// arrange bits such that low bits of nibbles of data64 contain all 2-bit elements of data32
|
||||
unsigned long long data64 = ((unsigned long long)data32 << 30) | (data32 & 0x3fffffff);
|
||||
|
||||
// adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3
|
||||
int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60);
|
||||
#endif
|
||||
|
||||
__m128i sel2 = _mm_cvtsi32_si128(*reinterpret_cast<const unaligned_int*>(data));
|
||||
__m128i rest = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + 4));
|
||||
|
||||
@@ -539,11 +510,25 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result);
|
||||
|
||||
#ifdef SIMD_LATENCYOPT
|
||||
return data + 4 + datacnt;
|
||||
#else
|
||||
return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1];
|
||||
#endif
|
||||
}
|
||||
|
||||
case 2:
|
||||
{
|
||||
#ifdef SIMD_LATENCYOPT
|
||||
unsigned long long data64;
|
||||
memcpy(&data64, data, 8);
|
||||
data64 &= data64 >> 1;
|
||||
data64 &= data64 >> 2;
|
||||
|
||||
// adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3
|
||||
int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60);
|
||||
#endif
|
||||
|
||||
__m128i sel4 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(data));
|
||||
__m128i rest = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + 8));
|
||||
|
||||
@@ -561,7 +546,11 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result);
|
||||
|
||||
#ifdef SIMD_LATENCYOPT
|
||||
return data + 8 + datacnt;
|
||||
#else
|
||||
return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1];
|
||||
#endif
|
||||
}
|
||||
|
||||
case 3:
|
||||
@@ -653,24 +642,13 @@ static uint8x16_t shuffleBytes(unsigned char mask0, unsigned char mask1, uint8x8
|
||||
|
||||
static void neonMoveMask(uint8x16_t mask, unsigned char& mask0, unsigned char& mask1)
|
||||
{
|
||||
static const unsigned char byte_mask_data[16] = {1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128};
|
||||
// magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00
|
||||
const uint64_t magic = 0x000103070f1f3f80ull;
|
||||
|
||||
uint8x16_t byte_mask = vld1q_u8(byte_mask_data);
|
||||
uint8x16_t masked = vandq_u8(mask, byte_mask);
|
||||
uint64x2_t mask2 = vreinterpretq_u64_u8(mask);
|
||||
|
||||
#ifdef __aarch64__
|
||||
// aarch64 has horizontal sums; MSVC doesn't expose this via arm64_neon.h so this path is exclusive to clang/gcc
|
||||
mask0 = vaddv_u8(vget_low_u8(masked));
|
||||
mask1 = vaddv_u8(vget_high_u8(masked));
|
||||
#else
|
||||
// we need horizontal sums of each half of masked, which can be done in 3 steps (yielding sums of sizes 2, 4, 8)
|
||||
uint8x8_t sum1 = vpadd_u8(vget_low_u8(masked), vget_high_u8(masked));
|
||||
uint8x8_t sum2 = vpadd_u8(sum1, sum1);
|
||||
uint8x8_t sum3 = vpadd_u8(sum2, sum2);
|
||||
|
||||
mask0 = vget_lane_u8(sum3, 0);
|
||||
mask1 = vget_lane_u8(sum3, 1);
|
||||
#endif
|
||||
mask0 = uint8_t((vgetq_lane_u64(mask2, 0) * magic) >> 56);
|
||||
mask1 = uint8_t((vgetq_lane_u64(mask2, 1) * magic) >> 56);
|
||||
}
|
||||
|
||||
static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2)
|
||||
@@ -688,6 +666,18 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
|
||||
case 1:
|
||||
{
|
||||
#ifdef SIMD_LATENCYOPT
|
||||
unsigned int data32;
|
||||
memcpy(&data32, data, 4);
|
||||
data32 &= data32 >> 1;
|
||||
|
||||
// arrange bits such that low bits of nibbles of data64 contain all 2-bit elements of data32
|
||||
unsigned long long data64 = ((unsigned long long)data32 << 30) | (data32 & 0x3fffffff);
|
||||
|
||||
// adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3
|
||||
int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60);
|
||||
#endif
|
||||
|
||||
uint8x8_t sel2 = vld1_u8(data);
|
||||
uint8x8_t sel22 = vzip_u8(vshr_n_u8(sel2, 4), sel2).val[0];
|
||||
uint8x8x2_t sel2222 = vzip_u8(vshr_n_u8(sel22, 2), sel22);
|
||||
@@ -704,11 +694,25 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
|
||||
vst1q_u8(buffer, result);
|
||||
|
||||
#ifdef SIMD_LATENCYOPT
|
||||
return data + 4 + datacnt;
|
||||
#else
|
||||
return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1];
|
||||
#endif
|
||||
}
|
||||
|
||||
case 2:
|
||||
{
|
||||
#ifdef SIMD_LATENCYOPT
|
||||
unsigned long long data64;
|
||||
memcpy(&data64, data, 8);
|
||||
data64 &= data64 >> 1;
|
||||
data64 &= data64 >> 2;
|
||||
|
||||
// adds all 1-bit nibbles together; the sum fits in 4 bits because datacnt=16 would have used mode 3
|
||||
int datacnt = int(((data64 & 0x1111111111111111ull) * 0x1111111111111111ull) >> 60);
|
||||
#endif
|
||||
|
||||
uint8x8_t sel4 = vld1_u8(data);
|
||||
uint8x8x2_t sel44 = vzip_u8(vshr_n_u8(sel4, 4), vand_u8(sel4, vdup_n_u8(15)));
|
||||
uint8x16_t sel = vcombine_u8(sel44.val[0], sel44.val[1]);
|
||||
@@ -724,7 +728,11 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
|
||||
vst1q_u8(buffer, result);
|
||||
|
||||
#ifdef SIMD_LATENCYOPT
|
||||
return data + 8 + datacnt;
|
||||
#else
|
||||
return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1];
|
||||
#endif
|
||||
}
|
||||
|
||||
case 3:
|
||||
@@ -747,13 +755,11 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
SIMD_TARGET
|
||||
static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1)
|
||||
{
|
||||
// TODO: 8b buffer overrun - should we use splat or extend buffers?
|
||||
v128_t sm0 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask0]);
|
||||
v128_t sm1 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask1]);
|
||||
|
||||
// TODO: we should use v8x16_load_splat
|
||||
v128_t sm1off = wasm_v128_load(&kDecodeBytesGroupCount[mask0]);
|
||||
sm1off = wasm_v8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
sm1off = wasm_i8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
v128_t sm1r = wasm_i8x16_add(sm1, sm1off);
|
||||
|
||||
@@ -763,26 +769,16 @@ static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1)
|
||||
SIMD_TARGET
|
||||
static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1)
|
||||
{
|
||||
v128_t mask_0 = wasm_v32x4_shuffle(mask, mask, 0, 2, 1, 3);
|
||||
// magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00
|
||||
const uint64_t magic = 0x000103070f1f3f80ull;
|
||||
|
||||
// TODO: when Chrome supports v128.const we can try doing vectorized and?
|
||||
uint64_t mask_1a = wasm_i64x2_extract_lane(mask_0, 0) & 0x0804020108040201ull;
|
||||
uint64_t mask_1b = wasm_i64x2_extract_lane(mask_0, 1) & 0x8040201080402010ull;
|
||||
|
||||
uint64_t mask_2 = mask_1a | mask_1b;
|
||||
uint64_t mask_4 = mask_2 | (mask_2 >> 16);
|
||||
uint64_t mask_8 = mask_4 | (mask_4 >> 8);
|
||||
|
||||
mask0 = uint8_t(mask_8);
|
||||
mask1 = uint8_t(mask_8 >> 32);
|
||||
mask0 = uint8_t((wasm_i64x2_extract_lane(mask, 0) * magic) >> 56);
|
||||
mask1 = uint8_t((wasm_i64x2_extract_lane(mask, 1) * magic) >> 56);
|
||||
}
|
||||
|
||||
SIMD_TARGET
|
||||
static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2)
|
||||
{
|
||||
unsigned char byte, enc, encv;
|
||||
const unsigned char* data_var;
|
||||
|
||||
switch (bitslog2)
|
||||
{
|
||||
case 0:
|
||||
@@ -796,7 +792,6 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
|
||||
case 1:
|
||||
{
|
||||
// TODO: test 4b load splat
|
||||
v128_t sel2 = wasm_v128_load(data);
|
||||
v128_t rest = wasm_v128_load(data + 4);
|
||||
|
||||
@@ -811,8 +806,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
|
||||
v128_t shuf = decodeShuffleMask(mask0, mask1);
|
||||
|
||||
// TODO: test or/andnot
|
||||
v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask);
|
||||
v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask);
|
||||
|
||||
wasm_v128_store(buffer, result);
|
||||
|
||||
@@ -821,7 +815,6 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
|
||||
case 2:
|
||||
{
|
||||
// TODO: test 8b load splat
|
||||
v128_t sel4 = wasm_v128_load(data);
|
||||
v128_t rest = wasm_v128_load(data + 8);
|
||||
|
||||
@@ -835,8 +828,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi
|
||||
|
||||
v128_t shuf = decodeShuffleMask(mask0, mask1);
|
||||
|
||||
// TODO: test or/andnot
|
||||
v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask);
|
||||
v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask);
|
||||
|
||||
wasm_v128_store(buffer, result);
|
||||
|
||||
@@ -927,8 +919,7 @@ SIMD_TARGET
|
||||
static v128_t unzigzag8(v128_t v)
|
||||
{
|
||||
v128_t xl = wasm_i8x16_neg(wasm_v128_and(v, wasm_i8x16_splat(1)));
|
||||
// TODO: use wasm_u8x16_shr when v8 fixes codegen for constant shifts
|
||||
v128_t xr = wasm_v128_and(wasm_u16x8_shr(v, 1), wasm_i8x16_splat(127));
|
||||
v128_t xr = wasm_u8x16_shr(v, 1);
|
||||
|
||||
return wasm_v128_xor(xl, xr);
|
||||
}
|
||||
@@ -947,7 +938,7 @@ static const unsigned char* decodeBytesSimd(const unsigned char* data, const uns
|
||||
size_t header_size = (buffer_size / kByteGroupSize + 3) / 4;
|
||||
|
||||
if (size_t(data_end - data) < header_size)
|
||||
return 0;
|
||||
return NULL;
|
||||
|
||||
data += header_size;
|
||||
|
||||
@@ -969,7 +960,7 @@ static const unsigned char* decodeBytesSimd(const unsigned char* data, const uns
|
||||
for (; i < buffer_size; i += kByteGroupSize)
|
||||
{
|
||||
if (size_t(data_end - data) < kByteGroupDecodeLimit)
|
||||
return 0;
|
||||
return NULL;
|
||||
|
||||
size_t header_offset = i / kByteGroupSize;
|
||||
|
||||
@@ -997,7 +988,7 @@ static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, con
|
||||
{
|
||||
data = decodeBytesSimd(data, data_end, buffer + j * vertex_count_aligned, vertex_count_aligned);
|
||||
if (!data)
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_AVX)
|
||||
@@ -1020,7 +1011,7 @@ static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, con
|
||||
|
||||
#ifdef SIMD_WASM
|
||||
#define TEMP v128_t
|
||||
#define PREP() v128_t pi = wasm_v128_load(last_vertex + k) // TODO: use wasm_v32x4_load_splat to avoid buffer overrun
|
||||
#define PREP() v128_t pi = wasm_v128_load(last_vertex + k)
|
||||
#define LOAD(i) v128_t r##i = wasm_v128_load(buffer + j + i * vertex_count_aligned)
|
||||
#define GRP4(i) t0 = wasmx_splat_v32x4(r##i, 0), t1 = wasmx_splat_v32x4(r##i, 1), t2 = wasmx_splat_v32x4(r##i, 2), t3 = wasmx_splat_v32x4(r##i, 3)
|
||||
#define FIXD(i) t##i = pi = wasm_i8x16_add(pi, t##i)
|
||||
@@ -1092,7 +1083,7 @@ static unsigned int getCpuFeatures()
|
||||
return cpuinfo[2];
|
||||
}
|
||||
|
||||
unsigned int cpuid = getCpuFeatures();
|
||||
static unsigned int cpuid = getCpuFeatures();
|
||||
#endif
|
||||
|
||||
} // namespace meshopt
|
||||
@@ -1104,10 +1095,6 @@ size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, con
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
assert(vertex_size % 4 == 0);
|
||||
|
||||
#if TRACE
|
||||
memset(vertexstats, 0, sizeof(vertexstats));
|
||||
#endif
|
||||
|
||||
const unsigned char* vertex_data = static_cast<const unsigned char*>(vertices);
|
||||
|
||||
unsigned char* data = buffer;
|
||||
@@ -1160,28 +1147,6 @@ size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, con
|
||||
assert(data >= buffer + tail_size);
|
||||
assert(data <= buffer + buffer_size);
|
||||
|
||||
#if TRACE
|
||||
size_t total_size = data - buffer;
|
||||
|
||||
for (size_t k = 0; k < vertex_size; ++k)
|
||||
{
|
||||
const Stats& vsk = vertexstats[k];
|
||||
|
||||
printf("%2d: %d bytes\t%.1f%%\t%.1f bpv", int(k), int(vsk.size), double(vsk.size) / double(total_size) * 100, double(vsk.size) / double(vertex_count) * 8);
|
||||
|
||||
#if TRACE > 1
|
||||
printf("\t\thdr %d bytes\tbit0 %d (%d bytes)\tbit1 %d (%d bytes)\tbit2 %d (%d bytes)\tbit3 %d (%d bytes)",
|
||||
int(vsk.header),
|
||||
int(vsk.bitg[0]), int(vsk.bitb[0]),
|
||||
int(vsk.bitg[1]), int(vsk.bitb[1]),
|
||||
int(vsk.bitg[2]), int(vsk.bitb[2]),
|
||||
int(vsk.bitg[3]), int(vsk.bitb[3]));
|
||||
#endif
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
return data - buffer;
|
||||
}
|
||||
|
||||
@@ -1217,7 +1182,7 @@ int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t ve
|
||||
assert(vertex_size > 0 && vertex_size <= 256);
|
||||
assert(vertex_size % 4 == 0);
|
||||
|
||||
const unsigned char* (*decode)(const unsigned char*, const unsigned char*, unsigned char*, size_t, size_t, unsigned char[256]) = 0;
|
||||
const unsigned char* (*decode)(const unsigned char*, const unsigned char*, unsigned char*, size_t, size_t, unsigned char[256]) = NULL;
|
||||
|
||||
#if defined(SIMD_SSE) && defined(SIMD_FALLBACK)
|
||||
decode = (cpuid & (1 << 9)) ? decodeVertexBlockSimd : decodeVertexBlock;
|
||||
|
||||
260
Source/ThirdParty/meshoptimizer/vertexfilter.cpp
vendored
260
Source/ThirdParty/meshoptimizer/vertexfilter.cpp
vendored
@@ -2,6 +2,7 @@
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
// The block below auto-detects SIMD ISA that can be used on the target platform
|
||||
#ifndef MESHOPTIMIZER_NO_SIMD
|
||||
@@ -29,6 +30,9 @@
|
||||
// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD
|
||||
#if defined(__wasm_simd128__)
|
||||
#define SIMD_WASM
|
||||
// Prevent compiling other variant when wasm simd compilation is active
|
||||
#undef SIMD_NEON
|
||||
#undef SIMD_SSE
|
||||
#endif
|
||||
|
||||
#endif // !MESHOPTIMIZER_NO_SIMD
|
||||
@@ -51,6 +55,7 @@
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_WASM
|
||||
#undef __DEPRECATED
|
||||
#include <wasm_simd128.h>
|
||||
#endif
|
||||
|
||||
@@ -61,6 +66,10 @@
|
||||
#define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7)
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
@@ -143,7 +152,8 @@ static void decodeFilterExp(unsigned int* data, size_t count)
|
||||
int m = int(v << 8) >> 8;
|
||||
int e = int(v) >> 24;
|
||||
|
||||
union {
|
||||
union
|
||||
{
|
||||
float f;
|
||||
unsigned int ui;
|
||||
} u;
|
||||
@@ -158,11 +168,31 @@ static void decodeFilterExp(unsigned int* data, size_t count)
|
||||
#endif
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
|
||||
template <typename T>
|
||||
static void dispatchSimd(void (*process)(T*, size_t), T* data, size_t count, size_t stride)
|
||||
{
|
||||
assert(stride <= 4);
|
||||
|
||||
size_t count4 = count & ~size_t(3);
|
||||
process(data, count4);
|
||||
|
||||
if (count4 < count)
|
||||
{
|
||||
T tail[4 * 4] = {}; // max stride 4, max count 4
|
||||
size_t tail_size = (count - count4) * stride * sizeof(T);
|
||||
assert(tail_size <= sizeof(tail));
|
||||
|
||||
memcpy(tail, data + count4 * stride, tail_size);
|
||||
process(tail, count - count4);
|
||||
memcpy(data + count4 * stride, tail, tail_size);
|
||||
}
|
||||
}
|
||||
|
||||
inline uint64_t rotateleft64(uint64_t v, int x)
|
||||
{
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
return _rotl64(v, x);
|
||||
#elif defined(__clang__) && __clang_major__ >= 8
|
||||
#elif defined(__clang__) && __has_builtin(__builtin_rotateleft64)
|
||||
return __builtin_rotateleft64(v, x);
|
||||
#else
|
||||
return (v << (x & 63)) | (v >> ((64 - x) & 63));
|
||||
@@ -620,7 +650,7 @@ static void decodeFilterOctSimd(signed char* data, size_t count)
|
||||
static void decodeFilterOctSimd(short* data, size_t count)
|
||||
{
|
||||
const v128_t sign = wasm_f32x4_splat(-0.f);
|
||||
volatile v128_t zmask = wasm_i32x4_splat(0x7fff); // TODO: volatile works around LLVM shuffle "optimizations"
|
||||
const v128_t zmask = wasm_i32x4_splat(0x7fff);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
@@ -732,7 +762,8 @@ static void decodeFilterQuatSimd(short* data, size_t count)
|
||||
v128_t res_1 = wasmx_unpackhi_v16x8(wyr, xzr);
|
||||
|
||||
// compute component index shifted left by 4 (and moved into i32x4 slot)
|
||||
v128_t cm = wasm_i32x4_shl(cf, 4);
|
||||
// TODO: volatile here works around LLVM mis-optimizing code; https://github.com/emscripten-core/emscripten/issues/11449
|
||||
volatile v128_t cm = wasm_i32x4_shl(cf, 4);
|
||||
|
||||
// rotate and store
|
||||
uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]);
|
||||
@@ -765,57 +796,238 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count)
|
||||
}
|
||||
#endif
|
||||
|
||||
// optimized variant of frexp
|
||||
inline int optlog2(float v)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f;
|
||||
unsigned int ui;
|
||||
} u;
|
||||
|
||||
u.f = v;
|
||||
// +1 accounts for implicit 1. in mantissa; denormalized numbers will end up clamped to min_exp by calling code
|
||||
return u.ui == 0 ? 0 : int((u.ui >> 23) & 0xff) - 127 + 1;
|
||||
}
|
||||
|
||||
// optimized variant of ldexp
|
||||
inline float optexp2(int e)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f;
|
||||
unsigned int ui;
|
||||
} u;
|
||||
|
||||
u.ui = unsigned(e + 127) << 23;
|
||||
return u.f;
|
||||
}
|
||||
|
||||
} // namespace meshopt
|
||||
|
||||
void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size)
|
||||
void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_count % 4 == 0);
|
||||
assert(vertex_size == 4 || vertex_size == 8);
|
||||
assert(stride == 4 || stride == 8);
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
|
||||
if (vertex_size == 4)
|
||||
decodeFilterOctSimd(static_cast<signed char*>(buffer), vertex_count);
|
||||
if (stride == 4)
|
||||
dispatchSimd(decodeFilterOctSimd, static_cast<signed char*>(buffer), count, 4);
|
||||
else
|
||||
decodeFilterOctSimd(static_cast<short*>(buffer), vertex_count);
|
||||
dispatchSimd(decodeFilterOctSimd, static_cast<short*>(buffer), count, 4);
|
||||
#else
|
||||
if (vertex_size == 4)
|
||||
decodeFilterOct(static_cast<signed char*>(buffer), vertex_count);
|
||||
if (stride == 4)
|
||||
decodeFilterOct(static_cast<signed char*>(buffer), count);
|
||||
else
|
||||
decodeFilterOct(static_cast<short*>(buffer), vertex_count);
|
||||
decodeFilterOct(static_cast<short*>(buffer), count);
|
||||
#endif
|
||||
}
|
||||
|
||||
void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size)
|
||||
void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_count % 4 == 0);
|
||||
assert(vertex_size == 8);
|
||||
(void)vertex_size;
|
||||
assert(stride == 8);
|
||||
(void)stride;
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
|
||||
decodeFilterQuatSimd(static_cast<short*>(buffer), vertex_count);
|
||||
dispatchSimd(decodeFilterQuatSimd, static_cast<short*>(buffer), count, 4);
|
||||
#else
|
||||
decodeFilterQuat(static_cast<short*>(buffer), vertex_count);
|
||||
decodeFilterQuat(static_cast<short*>(buffer), count);
|
||||
#endif
|
||||
}
|
||||
|
||||
void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size)
|
||||
void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_count % 4 == 0);
|
||||
assert(vertex_size % 4 == 0);
|
||||
assert(stride > 0 && stride % 4 == 0);
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
|
||||
decodeFilterExpSimd(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4));
|
||||
dispatchSimd(decodeFilterExpSimd, static_cast<unsigned int*>(buffer), count * (stride / 4), 1);
|
||||
#else
|
||||
decodeFilterExp(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4));
|
||||
decodeFilterExp(static_cast<unsigned int*>(buffer), count * (stride / 4));
|
||||
#endif
|
||||
}
|
||||
|
||||
void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data)
|
||||
{
|
||||
assert(stride == 4 || stride == 8);
|
||||
assert(bits >= 1 && bits <= 16);
|
||||
|
||||
signed char* d8 = static_cast<signed char*>(destination);
|
||||
short* d16 = static_cast<short*>(destination);
|
||||
|
||||
int bytebits = int(stride * 2);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
const float* n = &data[i * 4];
|
||||
|
||||
// octahedral encoding of a unit vector
|
||||
float nx = n[0], ny = n[1], nz = n[2], nw = n[3];
|
||||
float nl = fabsf(nx) + fabsf(ny) + fabsf(nz);
|
||||
float ns = nl == 0.f ? 0.f : 1.f / nl;
|
||||
|
||||
nx *= ns;
|
||||
ny *= ns;
|
||||
|
||||
float u = (nz >= 0.f) ? nx : (1 - fabsf(ny)) * (nx >= 0.f ? 1.f : -1.f);
|
||||
float v = (nz >= 0.f) ? ny : (1 - fabsf(nx)) * (ny >= 0.f ? 1.f : -1.f);
|
||||
|
||||
int fu = meshopt_quantizeSnorm(u, bits);
|
||||
int fv = meshopt_quantizeSnorm(v, bits);
|
||||
int fo = meshopt_quantizeSnorm(1.f, bits);
|
||||
int fw = meshopt_quantizeSnorm(nw, bytebits);
|
||||
|
||||
if (stride == 4)
|
||||
{
|
||||
d8[i * 4 + 0] = (signed char)(fu);
|
||||
d8[i * 4 + 1] = (signed char)(fv);
|
||||
d8[i * 4 + 2] = (signed char)(fo);
|
||||
d8[i * 4 + 3] = (signed char)(fw);
|
||||
}
|
||||
else
|
||||
{
|
||||
d16[i * 4 + 0] = short(fu);
|
||||
d16[i * 4 + 1] = short(fv);
|
||||
d16[i * 4 + 2] = short(fo);
|
||||
d16[i * 4 + 3] = short(fw);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_encodeFilterQuat(void* destination_, size_t count, size_t stride, int bits, const float* data)
|
||||
{
|
||||
assert(stride == 8);
|
||||
assert(bits >= 4 && bits <= 16);
|
||||
(void)stride;
|
||||
|
||||
short* destination = static_cast<short*>(destination_);
|
||||
|
||||
const float scaler = sqrtf(2.f);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
const float* q = &data[i * 4];
|
||||
short* d = &destination[i * 4];
|
||||
|
||||
// establish maximum quaternion component
|
||||
int qc = 0;
|
||||
qc = fabsf(q[1]) > fabsf(q[qc]) ? 1 : qc;
|
||||
qc = fabsf(q[2]) > fabsf(q[qc]) ? 2 : qc;
|
||||
qc = fabsf(q[3]) > fabsf(q[qc]) ? 3 : qc;
|
||||
|
||||
// we use double-cover properties to discard the sign
|
||||
float sign = q[qc] < 0.f ? -1.f : 1.f;
|
||||
|
||||
// note: we always encode a cyclical swizzle to be able to recover the order via rotation
|
||||
d[0] = short(meshopt_quantizeSnorm(q[(qc + 1) & 3] * scaler * sign, bits));
|
||||
d[1] = short(meshopt_quantizeSnorm(q[(qc + 2) & 3] * scaler * sign, bits));
|
||||
d[2] = short(meshopt_quantizeSnorm(q[(qc + 3) & 3] * scaler * sign, bits));
|
||||
d[3] = short((meshopt_quantizeSnorm(1.f, bits) & ~3) | qc);
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, int bits, const float* data, enum meshopt_EncodeExpMode mode)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(stride > 0 && stride % 4 == 0 && stride <= 256);
|
||||
assert(bits >= 1 && bits <= 24);
|
||||
|
||||
unsigned int* destination = static_cast<unsigned int*>(destination_);
|
||||
size_t stride_float = stride / sizeof(float);
|
||||
|
||||
int component_exp[64];
|
||||
assert(stride_float <= sizeof(component_exp) / sizeof(int));
|
||||
|
||||
const int min_exp = -100;
|
||||
|
||||
if (mode == meshopt_EncodeExpSharedComponent)
|
||||
{
|
||||
for (size_t j = 0; j < stride_float; ++j)
|
||||
component_exp[j] = min_exp;
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
const float* v = &data[i * stride_float];
|
||||
|
||||
// use maximum exponent to encode values; this guarantees that mantissa is [-1, 1]
|
||||
for (size_t j = 0; j < stride_float; ++j)
|
||||
{
|
||||
int e = optlog2(v[j]);
|
||||
|
||||
component_exp[j] = (component_exp[j] < e) ? e : component_exp[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
const float* v = &data[i * stride_float];
|
||||
unsigned int* d = &destination[i * stride_float];
|
||||
|
||||
int vector_exp = min_exp;
|
||||
|
||||
if (mode == meshopt_EncodeExpSharedVector)
|
||||
{
|
||||
// use maximum exponent to encode values; this guarantees that mantissa is [-1, 1]
|
||||
for (size_t j = 0; j < stride_float; ++j)
|
||||
{
|
||||
int e = optlog2(v[j]);
|
||||
|
||||
vector_exp = (vector_exp < e) ? e : vector_exp;
|
||||
}
|
||||
}
|
||||
else if (mode == meshopt_EncodeExpSeparate)
|
||||
{
|
||||
for (size_t j = 0; j < stride_float; ++j)
|
||||
{
|
||||
int e = optlog2(v[j]);
|
||||
|
||||
component_exp[j] = (min_exp < e) ? e : min_exp;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < stride_float; ++j)
|
||||
{
|
||||
int exp = (mode == meshopt_EncodeExpSharedVector) ? vector_exp : component_exp[j];
|
||||
|
||||
// note that we additionally scale the mantissa to make it a K-bit signed integer (K-1 bits for magnitude)
|
||||
exp -= (bits - 1);
|
||||
|
||||
// compute renormalized rounded mantissa for each component
|
||||
int mmask = (1 << 24) - 1;
|
||||
|
||||
int m = int(v[j] * optexp2(-exp) + (v[j] >= 0 ? 0.5f : -0.5f));
|
||||
|
||||
d[j] = (m & mmask) | (unsigned(exp) << 24);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef SIMD_SSE
|
||||
#undef SIMD_NEON
|
||||
#undef SIMD_WASM
|
||||
|
||||
Reference in New Issue
Block a user