diff --git a/Content/Editor/Camera/M_Camera.flax b/Content/Editor/Camera/M_Camera.flax index 7d7213a8d..7d4c71666 100644 --- a/Content/Editor/Camera/M_Camera.flax +++ b/Content/Editor/Camera/M_Camera.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7edc1b9d2c7fbd32fcf778814deb719c71781f657da050ac0c7c78984aeb360d +oid sha256:b73d774c71bd7b46c9c4198a4c957055e6447e31d8252813b272db92301475e7 size 29533 diff --git a/Content/Editor/CubeTexturePreviewMaterial.flax b/Content/Editor/CubeTexturePreviewMaterial.flax index 5969c90fa..2d732c086 100644 --- a/Content/Editor/CubeTexturePreviewMaterial.flax +++ b/Content/Editor/CubeTexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac6023e5d6525c3b7c385a380ed9d6fc71ec9d683c587391d14c9daf6653e31a +oid sha256:c4ec07a3b7e0a2dfd4332598a982c3192c0c357c6bcd128d7a7797fb483780e7 size 31445 diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax index fc45d33cc..d082bd8e7 100644 --- a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax +++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ecd573c40f534f293d4827b1a8150d439d4f5e7729552474926208c5814f3d3e +oid sha256:2830919bea988e1f8bd8299ceac34b8a3695418e2f22ca670f2fec3b3d6d1a2f size 41149 diff --git a/Content/Editor/DebugMaterials/SingleColor/Decal.flax b/Content/Editor/DebugMaterials/SingleColor/Decal.flax index 05e99be76..b94f22bc8 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Decal.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Decal.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0b2ad25738c2bc55bb3e76fc94fc81992b1d65b8b3091b132c75b2ed064c517 -size 10398 +oid sha256:588c29a4b239c32d4b125052e4054a29cf5140562e90ca6fac4d2952e03f66c7 +size 10397 diff --git a/Content/Editor/DebugMaterials/SingleColor/Particle.flax b/Content/Editor/DebugMaterials/SingleColor/Particle.flax index 7a328e7a0..de2043874 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Particle.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Particle.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02ddea5bcb3fccb697081e47cc26a0b546b23b89ceca299e702a1d431775dfd6 +oid sha256:b39cd76254f341c93e83625475b6e7896ef34f1d6d650da52e649bc055d0d03e size 33503 diff --git a/Content/Editor/DebugMaterials/SingleColor/Surface.flax b/Content/Editor/DebugMaterials/SingleColor/Surface.flax index 84e05ee36..7ae8a69c3 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Surface.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Surface.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:741a7619b5aebc6c7c6a573a0407e8b7aa42d1b50d0ed5cf6a21026932807d0e +oid sha256:5861e912cf822c9478f824390f6258d84821b7289e3e993a7dee38b77c5a2f80 size 29398 diff --git a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax index ab4591176..fdcb880df 100644 --- a/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax +++ b/Content/Editor/DebugMaterials/SingleColor/SurfaceAdditive.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:358370943d21a97f8b45ff2181b7c6c2d7a6297e3f166ae7a77363aadf89b152 +oid sha256:b9ed2869a2a754423e0b8c456eed621bd06bdb50cacf7a972a7f024e40a1ea6a size 32954 diff --git a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax index 54151179a..ad27a422c 100644 --- a/Content/Editor/DebugMaterials/SingleColor/Terrain.flax +++ b/Content/Editor/DebugMaterials/SingleColor/Terrain.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:486b4db3e1d825d026753d944a04defe4d72eb73eb03a438944c366f19de824e +oid sha256:05c27ac416ef922ee247adc12a115fd522eb3a1d8873e1056914cd96893a3097 size 21096 diff --git a/Content/Editor/DefaultFontMaterial.flax b/Content/Editor/DefaultFontMaterial.flax index 8d48c5827..d84425aab 100644 --- a/Content/Editor/DefaultFontMaterial.flax +++ b/Content/Editor/DefaultFontMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ebdfc478caabc84a3a75384a64d262d2d509bbac3540eea462e45911719c288f +oid sha256:8e3d4ca149e143fee31e2d038b8efec526ca995dbe13258fbb68c89cd43ecbf7 size 29627 diff --git a/Content/Editor/Gizmo/FoliageBrushMaterial.flax b/Content/Editor/Gizmo/FoliageBrushMaterial.flax index 79385ada6..eb7e784c9 100644 --- a/Content/Editor/Gizmo/FoliageBrushMaterial.flax +++ b/Content/Editor/Gizmo/FoliageBrushMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aa4f1a733150c62064cac60c07980df7c84bb6163dc9507782aa98df07f48874 +oid sha256:7af1150d6e7cb6ecce5cd039f0edc92967c986a13903a201d6dc15ed0751dc57 size 39637 diff --git a/Content/Editor/Gizmo/Material.flax b/Content/Editor/Gizmo/Material.flax index ace3bde90..bbb114662 100644 --- a/Content/Editor/Gizmo/Material.flax +++ b/Content/Editor/Gizmo/Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26e1832496c01cb31bd4dc9000d3cd326ea4fd54de02910d3801d2641bff685c +oid sha256:d575ca1b202c84b8268687b391be5fc8d55497ffa23fb3cd4287fa667de654ab size 34240 diff --git a/Content/Editor/Gizmo/MaterialWire.flax b/Content/Editor/Gizmo/MaterialWire.flax index 7ea0a596f..fb4b8acca 100644 --- a/Content/Editor/Gizmo/MaterialWire.flax +++ b/Content/Editor/Gizmo/MaterialWire.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca8bc1ac9d45534d3efd3b4308d7492fa016726b4ec744be26619069ce911b73 +oid sha256:26f2d88aab9c0cad36ae527b038a36b69755ff3a5a920e8c4563dd5e1ed8ec65 size 32689 diff --git a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax index 0c1461b72..b5d224d58 100644 --- a/Content/Editor/Gizmo/SelectionOutlineMaterial.flax +++ b/Content/Editor/Gizmo/SelectionOutlineMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09f7dff17af9cd055352e0da534f3466c8efa235c40faf5e56da92c788342f6a -size 17394 +oid sha256:5bb75934622d9251a8a9e72cfe4905091770798ffed22de680a70f98434d0ed7 +size 16241 diff --git a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax index bd4935d96..5a5262e2b 100644 --- a/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax +++ b/Content/Editor/Gizmo/VertexColorsPreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1bc0005c64c561a430a17e4707abc000e06498af968890e2c4e223dc07f07c12 +oid sha256:a1afa76c3f9400da065c150a6a58adc904c3596f650e04dfd87b5e1c1b34695e size 30655 diff --git a/Content/Editor/Highlight Material.flax b/Content/Editor/Highlight Material.flax index ccecb98aa..9d09ea792 100644 --- a/Content/Editor/Highlight Material.flax +++ b/Content/Editor/Highlight Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:95d172cd12bb3c818fbccf737e78ab282bc8d0880aa8f45af0562850b0eabe4b -size 31616 +oid sha256:1290ae85e4fe41f9d8c1919b33e165287f79377aeddc68f9117c1795ca341003 +size 31267 diff --git a/Content/Editor/Icons/IconsMaterial.flax b/Content/Editor/Icons/IconsMaterial.flax index b24941463..2ccbce8c9 100644 --- a/Content/Editor/Icons/IconsMaterial.flax +++ b/Content/Editor/Icons/IconsMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5ca4baa1419080395dcf2b5757676406288f112754bc3cd2f27610b58d199622 +oid sha256:340cc500a160344b43b21ed8c4c22b6d776f406581f606ced62a3e92c5bef18a size 31300 diff --git a/Content/Editor/IesProfilePreviewMaterial.flax b/Content/Editor/IesProfilePreviewMaterial.flax index 99bc2662c..b3a382132 100644 --- a/Content/Editor/IesProfilePreviewMaterial.flax +++ b/Content/Editor/IesProfilePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b3b4c61b04d372d2430a7c08dec612af6caa0e57b1cb47ea44d171d729d3f8f8 +oid sha256:d444cd33ec8d2e1e0e6651c3979260f05c06c8bac33ce2441d6974ae4fa178e4 size 20443 diff --git a/Content/Editor/Particles/Particle Material Color.flax b/Content/Editor/Particles/Particle Material Color.flax index 19eb7a3c2..91b06b2fb 100644 --- a/Content/Editor/Particles/Particle Material Color.flax +++ b/Content/Editor/Particles/Particle Material Color.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f3b8a7c48c55e33a41f9fe4dbf9b3109b0e734ff154d6cbd3e4101013b01649 +oid sha256:906443c7db821361b32780c17735bc9477ea96c8979dee371a4899635246af48 size 31708 diff --git a/Content/Editor/Particles/Smoke Material.flax b/Content/Editor/Particles/Smoke Material.flax index 527d19842..e6396c194 100644 --- a/Content/Editor/Particles/Smoke Material.flax +++ b/Content/Editor/Particles/Smoke Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2275282d4e3b5e012a0bbc93fca0d6ffdad89e5a5f0c289678f70748f2efab56 -size 40655 +oid sha256:16db9c1a18b64aea2dcdf3e74f9a44c652bf8bd9b33a5bfda39555d8c002a358 +size 39774 diff --git a/Content/Editor/SpriteMaterial.flax b/Content/Editor/SpriteMaterial.flax index d967a4ea4..2a05418b2 100644 --- a/Content/Editor/SpriteMaterial.flax +++ b/Content/Editor/SpriteMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f5e82be7efa6489cfdfd1babeb1fbb90507aaff7c04eb5f64a4971adf0a2164 +oid sha256:56254b02ffc937d61e8e8fa6492d4805e944ca639c7fcfc0f751b4ac2442365d size 30734 diff --git a/Content/Editor/Terrain/Circle Brush Material.flax b/Content/Editor/Terrain/Circle Brush Material.flax index 6ddc5f3e9..f481be389 100644 --- a/Content/Editor/Terrain/Circle Brush Material.flax +++ b/Content/Editor/Terrain/Circle Brush Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2c7fde7be7d6f9876f9c0db02632c098ab95ade7de57c583d2e495c8ae8665bd +oid sha256:16eefa75a2ae99bba658c4e9b8e8741187b90e577193f76394872764fff2ca0b size 28232 diff --git a/Content/Editor/Terrain/Highlight Terrain Material.flax b/Content/Editor/Terrain/Highlight Terrain Material.flax index c573eb3ee..579db477c 100644 --- a/Content/Editor/Terrain/Highlight Terrain Material.flax +++ b/Content/Editor/Terrain/Highlight Terrain Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9369a554ea1776154f5e39d4aaed044f928d98f1f5955b7590b0972015b07438 +oid sha256:e25a3c9e130e51b28dfe5ce43678f52c277c0def83142a2853c4c8ca84dbf417 size 21179 diff --git a/Content/Editor/TexturePreviewMaterial.flax b/Content/Editor/TexturePreviewMaterial.flax index 2c91f9d8f..d75e19d5e 100644 --- a/Content/Editor/TexturePreviewMaterial.flax +++ b/Content/Editor/TexturePreviewMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4d61f178e72e4d983a919b76368e03c66995ecf50935f6f55b660e34f58755a2 +oid sha256:79de09ba0616eb6066171c2b80cdb6c4235cb52be4836d23162bb9c2585760a0 size 11058 diff --git a/Content/Editor/Wires Debug Material.flax b/Content/Editor/Wires Debug Material.flax index 308a6230a..b1f87a7d0 100644 --- a/Content/Editor/Wires Debug Material.flax +++ b/Content/Editor/Wires Debug Material.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7a42b1bc5a34f9c47d1aeb773ef26ce470b2d88c2b092828f0fcb439583ef27 -size 31616 +oid sha256:02d4c767fb59c67fef16ccc081f6f371bad329a5333047f9f79fd3d50b911f93 +size 31753 diff --git a/Content/Engine/DefaultDeformableMaterial.flax b/Content/Engine/DefaultDeformableMaterial.flax index a397d1ad8..1244ae3ec 100644 --- a/Content/Engine/DefaultDeformableMaterial.flax +++ b/Content/Engine/DefaultDeformableMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be21bb7eecd9c774196dbaa89d1b049b108fc0929d648795056c977fe00364ab -size 19582 +oid sha256:d1f556b230cea8e83d00bd4357d34a77e5e468389a5f3bb615e30f6a3ce3ace4 +size 19734 diff --git a/Content/Engine/DefaultMaterial.flax b/Content/Engine/DefaultMaterial.flax index eddcbace8..bd57e7d44 100644 --- a/Content/Engine/DefaultMaterial.flax +++ b/Content/Engine/DefaultMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a8a4ad5e763704263b94a7a7e0cc30ab7b1cd1abcb5ccae2d4c6062a65920df -size 31928 +oid sha256:c4ec872b3433d58f8aed640c6efee3d911f226740b4844cb07ed0bf94c00ea18 +size 32080 diff --git a/Content/Engine/DefaultRadialMenu.flax b/Content/Engine/DefaultRadialMenu.flax index 60e2ba5f9..5fba9092e 100644 --- a/Content/Engine/DefaultRadialMenu.flax +++ b/Content/Engine/DefaultRadialMenu.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c4151a58e5314937efcd3bdcb9fe0bdd5047b8705931e45e0a4e71a4470e16a0 +oid sha256:0da99403c069966d05daea7fc11d32f20f88bac0463fbc08724840e249ee3bd2 size 21700 diff --git a/Content/Engine/DefaultTerrainMaterial.flax b/Content/Engine/DefaultTerrainMaterial.flax index b302ade35..4147fe0e4 100644 --- a/Content/Engine/DefaultTerrainMaterial.flax +++ b/Content/Engine/DefaultTerrainMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c5cf6924809b9bd7ad3c09722a93f327a0d111676060d136df9c14ab34e8475b -size 23930 +oid sha256:bdfa3b4842a5734d2cd8110af03599b4a5280b33a72b2ba435cd19487cebcde6 +size 24082 diff --git a/Content/Engine/SingleColorMaterial.flax b/Content/Engine/SingleColorMaterial.flax index d6d179150..6d556af2b 100644 --- a/Content/Engine/SingleColorMaterial.flax +++ b/Content/Engine/SingleColorMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:750f69ce59ef020d2e2186ed6c4bf7aac67ecb1692287e358eaed969fc36381a +oid sha256:6ff8f127d46e68e3423339a352f623c079f2c5d93512c5e9b25841edc7cd0f05 size 29615 diff --git a/Content/Engine/SkyboxMaterial.flax b/Content/Engine/SkyboxMaterial.flax index cc369ceee..b51c5bce7 100644 --- a/Content/Engine/SkyboxMaterial.flax +++ b/Content/Engine/SkyboxMaterial.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3eecc9556af6c2a79d39a7e1c52e4019bdccfb43b074eaddd18600a5854dbffe +oid sha256:14c9833ed19302ea7c6e730fff63f1b72dbac71dc2b49c1d62edb61ccaa68b6f size 31974 diff --git a/Content/Shaders/GI/DDGI.flax b/Content/Shaders/GI/DDGI.flax index 6739b2436..257953bf9 100644 --- a/Content/Shaders/GI/DDGI.flax +++ b/Content/Shaders/GI/DDGI.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5577ef4ce821b08a38afe17b9e5d11cb0b409eb05dd89b2ca76ea95d88085dc0 -size 32893 +oid sha256:5b017cf857f443553020e4bc7c8c8c5da3a826a2514322664a023ffa6005f7a5 +size 38217 diff --git a/Content/Shaders/GI/GlobalSurfaceAtlas.flax b/Content/Shaders/GI/GlobalSurfaceAtlas.flax index 1b0173ba5..57990c249 100644 --- a/Content/Shaders/GI/GlobalSurfaceAtlas.flax +++ b/Content/Shaders/GI/GlobalSurfaceAtlas.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f34bf867df5f4296ca66ac691c2bca4efa168fb9e21ca4e613e8086669575cf -size 13296 +oid sha256:615dff65b01507be6c4de722e126324aba20fc197f8e12dafaa94a05e46cba6e +size 13222 diff --git a/Content/Shaders/GlobalSignDistanceField.flax b/Content/Shaders/GlobalSignDistanceField.flax index 590e8f3a9..5afcb4bf4 100644 --- a/Content/Shaders/GlobalSignDistanceField.flax +++ b/Content/Shaders/GlobalSignDistanceField.flax @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:064f54786958f109222c49cbc0358ff4f345b30010fcd5e8cc1fab7bdc68c4fe -size 13349 +oid sha256:1f07ebb16820897e8598ae7a0627cb75b3d28e9dceea3ad4bd9ff543d5cdd01c +size 13979 diff --git a/Flax.flaxproj b/Flax.flaxproj index 7fd591727..5d926a87b 100644 --- a/Flax.flaxproj +++ b/Flax.flaxproj @@ -4,7 +4,7 @@ "Major": 1, "Minor": 11, "Revision": 0, - "Build": 6805 + "Build": 6806 }, "Company": "Flax", "Copyright": "Copyright (c) 2012-2025 Wojciech Figat. All rights reserved.", diff --git a/Source/Editor/CustomEditors/Dedicated/NavMeshBoundsVolumeEditor.cs b/Source/Editor/CustomEditors/Dedicated/NavMeshBoundsVolumeEditor.cs new file mode 100644 index 000000000..2cbf01e41 --- /dev/null +++ b/Source/Editor/CustomEditors/Dedicated/NavMeshBoundsVolumeEditor.cs @@ -0,0 +1,38 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using FlaxEngine; + +namespace FlaxEditor.CustomEditors.Dedicated +{ + /// + /// Custom editor for . + /// + /// + [CustomEditor(typeof(NavMeshBoundsVolume)), DefaultEditor] + internal class NavMeshBoundsVolumeEditor : ActorEditor + { + /// + public override void Initialize(LayoutElementsContainer layout) + { + base.Initialize(layout); + + if (Values.HasDifferentTypes == false) + { + var button = layout.Button("Build"); + button.Button.Clicked += OnBuildClicked; + } + } + + private void OnBuildClicked() + { + foreach (var value in Values) + { + if (value is NavMeshBoundsVolume volume) + { + Navigation.BuildNavMesh(volume.Box, volume.Scene); + Editor.Instance.Scene.MarkSceneEdited(volume.Scene); + } + } + } + } +} diff --git a/Source/Editor/CustomEditors/Dedicated/ScriptsEditor.cs b/Source/Editor/CustomEditors/Dedicated/ScriptsEditor.cs index 954599347..9844f3fda 100644 --- a/Source/Editor/CustomEditors/Dedicated/ScriptsEditor.cs +++ b/Source/Editor/CustomEditors/Dedicated/ScriptsEditor.cs @@ -909,7 +909,8 @@ namespace FlaxEditor.CustomEditors.Dedicated settingsButton.Tag = script; settingsButton.Clicked += OnSettingsButtonClicked; - group.Panel.HeaderTextMargin = new Margin(scriptDrag.Right - 12, 15, 2, 2); + // Adjust margin to not overlap with other ui elements in the header + group.Panel.HeaderTextMargin = group.Panel.HeaderTextMargin with { Left = scriptDrag.Right - 12, Right = settingsButton.Width + Utilities.Constants.UIMargin }; group.Object(values, editor); // Remove drop down arrows and containment lines if no objects in the group if (group.Children.Count == 0) diff --git a/Source/Editor/CustomEditors/Editors/CollectionEditor.cs b/Source/Editor/CustomEditors/Editors/CollectionEditor.cs index b3fff5644..28593a7f5 100644 --- a/Source/Editor/CustomEditors/Editors/CollectionEditor.cs +++ b/Source/Editor/CustomEditors/Editors/CollectionEditor.cs @@ -450,6 +450,7 @@ namespace FlaxEditor.CustomEditors.Editors protected bool NotNullItems; private IntValueBox _sizeBox; + private Label _label; private Color _background; private int _elementsCount, _minCount, _maxCount; private bool _readOnly; @@ -566,7 +567,7 @@ namespace FlaxEditor.CustomEditors.Editors Parent = dropPanel, }; - var label = new Label + _label = new Label { Text = "Size", AnchorPreset = AnchorPresets.TopRight, @@ -672,8 +673,10 @@ namespace FlaxEditor.CustomEditors.Editors Resize(Count + 1); }; } - } + Layout.ContainerControl.SizeChanged += OnLayoutSizeChanged; + } + private void OnSetupContextMenu(ContextMenu menu, DropPanel panel) { if (menu.Items.Any(x => x is ContextMenuButton b && b.Text.Equals("Open All", StringComparison.Ordinal))) @@ -696,10 +699,24 @@ namespace FlaxEditor.CustomEditors.Editors }); } + private void OnLayoutSizeChanged(Control control) + { + if (Layout.ContainerControl is DropPanel dropPanel) + { + // Hide "Size" text when array editor title overlaps + var headerTextSize = dropPanel.HeaderTextFont.GetFont().MeasureText(dropPanel.HeaderText); + if (headerTextSize.X + DropPanel.DropDownIconSize >= _label.Left) + _label.TextColor = _label.TextColorHighlighted = Color.Transparent; + else + _label.TextColor = _label.TextColorHighlighted = FlaxEngine.GUI.Style.Current.Foreground; + } + } + /// protected override void Deinitialize() { _sizeBox = null; + Layout.ContainerControl.SizeChanged -= OnLayoutSizeChanged; base.Deinitialize(); } diff --git a/Source/Editor/CustomEditors/Elements/Container/GroupElement.cs b/Source/Editor/CustomEditors/Elements/Container/GroupElement.cs index 64bc9080b..055c6a29d 100644 --- a/Source/Editor/CustomEditors/Elements/Container/GroupElement.cs +++ b/Source/Editor/CustomEditors/Elements/Container/GroupElement.cs @@ -44,7 +44,8 @@ namespace FlaxEditor.CustomEditors.Elements { var style = Style.Current; var settingsButtonSize = Panel.HeaderHeight; - return new Image + Panel.HeaderTextMargin = Panel.HeaderTextMargin with { Right = settingsButtonSize + Utilities.Constants.UIMargin }; +; return new Image { TooltipText = "Settings", AutoFocus = true, diff --git a/Source/Editor/Editor.cs b/Source/Editor/Editor.cs index 58466e35d..c881d9dcd 100644 --- a/Source/Editor/Editor.cs +++ b/Source/Editor/Editor.cs @@ -23,6 +23,7 @@ using FlaxEngine.Assertions; using FlaxEngine.GUI; using FlaxEngine.Interop; using FlaxEngine.Json; +using FlaxEngine.Utilities; #pragma warning disable CS1591 @@ -1370,7 +1371,7 @@ namespace FlaxEditor public void BuildCSG() { var scenes = Level.Scenes; - scenes.ToList().ForEach(x => x.BuildCSG(0)); + scenes.ForEach(x => x.BuildCSG(0)); Scene.MarkSceneEdited(scenes); } @@ -1380,7 +1381,7 @@ namespace FlaxEditor public void BuildNavMesh() { var scenes = Level.Scenes; - scenes.ToList().ForEach(x => Navigation.BuildNavMesh(x, 0)); + Navigation.BuildNavMesh(); Scene.MarkSceneEdited(scenes); } diff --git a/Source/Editor/GUI/ContextMenu/ContextMenu.cs b/Source/Editor/GUI/ContextMenu/ContextMenu.cs index 896bd5bc2..f5705c6f5 100644 --- a/Source/Editor/GUI/ContextMenu/ContextMenu.cs +++ b/Source/Editor/GUI/ContextMenu/ContextMenu.cs @@ -502,6 +502,7 @@ namespace FlaxEditor.GUI.ContextMenu if (base.OnKeyDown(key)) return true; + // Keyboard navigation around the menu switch (key) { case KeyboardKeys.ArrowDown: @@ -526,6 +527,20 @@ namespace FlaxEditor.GUI.ContextMenu } } break; + case KeyboardKeys.ArrowRight: + for (int i = 0; i < _panel.Children.Count; i++) + { + if (_panel.Children[i] is ContextMenuChildMenu item && item.Visible && item.IsFocused && !item.ContextMenu.IsOpened) + { + item.ShowChild(this); + item.ContextMenu._panel.Children.FirstOrDefault(x => x is ContextMenuButton && x.Visible)?.Focus(); + break; + } + } + break; + case KeyboardKeys.ArrowLeft: + ParentCM?.RootWindow.Focus(); + break; } return false; diff --git a/Source/Editor/GUI/ContextMenu/ContextMenuBase.cs b/Source/Editor/GUI/ContextMenu/ContextMenuBase.cs index 826baa482..041d4f053 100644 --- a/Source/Editor/GUI/ContextMenu/ContextMenuBase.cs +++ b/Source/Editor/GUI/ContextMenu/ContextMenuBase.cs @@ -75,6 +75,11 @@ namespace FlaxEditor.GUI.ContextMenu /// public bool HasChildCMOpened => _childCM != null; + /// + /// Gets the parent context menu (if exists). + /// + public ContextMenuBase ParentCM => _parentCM; + /// /// Gets the topmost context menu. /// @@ -84,9 +89,7 @@ namespace FlaxEditor.GUI.ContextMenu { var cm = this; while (cm._parentCM != null && cm._isSubMenu) - { cm = cm._parentCM; - } return cm; } } @@ -111,6 +114,11 @@ namespace FlaxEditor.GUI.ContextMenu /// public bool UseInput = true; + /// + /// Optional flag that can disable UI navigation (tab/enter). + /// + public bool UseNavigation = true; + /// /// Initializes a new instance of the class. /// @@ -622,6 +630,21 @@ namespace FlaxEditor.GUI.ContextMenu case KeyboardKeys.Escape: Hide(); return true; + case KeyboardKeys.Return: + if (UseNavigation && Root?.FocusedControl != null) + { + Root.SubmitFocused(); + return true; + } + break; + case KeyboardKeys.Tab: + if (UseNavigation && Root != null) + { + bool shiftDown = Root.GetKey(KeyboardKeys.Shift); + Root.Navigate(shiftDown ? NavDirection.Previous : NavDirection.Next); + return true; + } + break; } return false; } diff --git a/Source/Editor/GUI/ContextMenu/ContextMenuChildMenu.cs b/Source/Editor/GUI/ContextMenu/ContextMenuChildMenu.cs index 74ab560fb..78337d011 100644 --- a/Source/Editor/GUI/ContextMenu/ContextMenuChildMenu.cs +++ b/Source/Editor/GUI/ContextMenu/ContextMenuChildMenu.cs @@ -29,7 +29,7 @@ namespace FlaxEditor.GUI.ContextMenu CloseMenuOnClick = false; } - private void ShowChild(ContextMenu parentContextMenu) + internal void ShowChild(ContextMenu parentContextMenu) { // Hide parent CM popups and set itself as child var vAlign = parentContextMenu.ItemsAreaMargin.Top; diff --git a/Source/Editor/GUI/CurveEditor.Contents.cs b/Source/Editor/GUI/CurveEditor.Contents.cs index c2831046b..75f37d457 100644 --- a/Source/Editor/GUI/CurveEditor.Contents.cs +++ b/Source/Editor/GUI/CurveEditor.Contents.cs @@ -522,6 +522,16 @@ namespace FlaxEditor.GUI cm.AddButton("Show whole curve", _editor.ShowWholeCurve); cm.AddButton("Reset view", _editor.ResetView); } + cm.AddSeparator(); + var presetCm = cm.AddChildMenu("Apply preset"); + foreach (var value in Enum.GetValues(typeof(CurvePreset))) + { + CurvePreset preset = (CurvePreset)value; + string name = Utilities.Utils.GetPropertyNameUI(preset.ToString()); + var b = presetCm.ContextMenu.AddButton(name, () => _editor.ApplyPreset(preset)); + b.Enabled = !(_editor is LinearCurveEditor && (preset != CurvePreset.Constant && preset != CurvePreset.Linear)); + } + _editor.OnShowContextMenu(cm, selectionCount); cm.Show(this, location); } @@ -619,6 +629,33 @@ namespace FlaxEditor.GUI } } + /// + /// A list of avaliable curve presets for the . + /// + public enum CurvePreset + { + /// + /// A curve where every point has the same value. + /// + Constant, + /// + /// A curve linear curve. + /// + Linear, + /// + /// A curve that starts a slowly and then accelerates until the end. + /// + EaseIn, + /// + /// A curve that starts a steep and then flattens until the end. + /// + EaseOut, + /// + /// A combination of the and preset. + /// + Smoothstep + } + /// public override void OnKeyframesDeselect(IKeyframesEditor editor) { diff --git a/Source/Editor/GUI/CurveEditor.cs b/Source/Editor/GUI/CurveEditor.cs index 706d07b32..4fb727ea1 100644 --- a/Source/Editor/GUI/CurveEditor.cs +++ b/Source/Editor/GUI/CurveEditor.cs @@ -19,6 +19,48 @@ namespace FlaxEditor.GUI /// public abstract partial class CurveEditor : CurveEditorBase where T : new() { + /// + /// Represents a single point in a . + /// + protected struct CurvePresetPoint + { + /// + /// The time. + /// + public float Time; + + /// + /// The value. + /// + public float Value; + + /// + /// The in tangent. Will be ignored in + /// + public float TangentIn; + + /// + /// The out tangent. Will be ignored in + /// + public float TangentOut; + } + + /// + /// A curve preset. + /// + protected struct CurveEditorPreset() + { + /// + /// If the tangents will be linear or smooth. + /// + public bool LinearTangents; + + /// + /// The points of the preset. + /// + public List Points; + } + private class Popup : ContextMenuBase { private CustomEditorPresenter _presenter; @@ -26,11 +68,12 @@ namespace FlaxEditor.GUI private List _keyframeIndices; private bool _isDirty; - public Popup(CurveEditor editor, object[] selection, List keyframeIndices = null, float height = 140.0f) - : this(editor, height) + public Popup(CurveEditor editor, object[] selection, List keyframeIndices = null, float maxHeight = 140.0f) + : this(editor, maxHeight) { _presenter.Select(selection); _presenter.OpenAllGroups(); + Size = new Float2(Size.X, Mathf.Min(_presenter.ContainerControl.Size.Y, maxHeight)); _keyframeIndices = keyframeIndices; if (keyframeIndices != null && selection.Length != keyframeIndices.Count) throw new Exception(); @@ -169,7 +212,7 @@ namespace FlaxEditor.GUI if (IsSelected) color = Editor.ContainsFocus ? style.SelectionBorder : Color.Lerp(style.ForegroundDisabled, style.SelectionBorder, 0.4f); if (IsMouseOver) - color *= 1.1f; + color *= 1.5f; Render2D.FillRectangle(rect, color); } @@ -285,7 +328,7 @@ namespace FlaxEditor.GUI /// /// The keyframes size. /// - protected static readonly Float2 KeyframesSize = new Float2(7.0f); + protected static readonly Float2 KeyframesSize = new Float2(8.0f); /// /// The colors for the keyframe points. @@ -326,6 +369,63 @@ namespace FlaxEditor.GUI private Color _labelsColor; private Font _labelsFont; + /// + /// Preset values for to be applied to a . + /// + protected Dictionary Presets = new Dictionary + { + { CurvePreset.Constant, new CurveEditorPreset + { + LinearTangents = true, + Points = new List + { + new CurvePresetPoint { Time = 0f, Value = 0.5f, TangentIn = 0f, TangentOut = 0f }, + new CurvePresetPoint { Time = 1f, Value = 0.5f, TangentIn = 0f, TangentOut = 0f }, + } + } + }, + { CurvePreset.EaseIn, new CurveEditorPreset + { + LinearTangents = false, + Points = new List + { + new CurvePresetPoint { Time = 0f, Value = 0f, TangentIn = 0f, TangentOut = 0f }, + new CurvePresetPoint { Time = 1f, Value = 1f, TangentIn = -1.4f, TangentOut = 0f }, + } + } + }, + { CurvePreset.EaseOut, new CurveEditorPreset + { + LinearTangents = false, + Points = new List + { + new CurvePresetPoint { Time = 1f, Value = 1f, TangentIn = 0f, TangentOut = 0f }, + new CurvePresetPoint { Time = 0f, Value = 0f, TangentIn = 0f, TangentOut = 1.4f }, + } + } + }, + { CurvePreset.Linear, new CurveEditorPreset + { + LinearTangents = true, + Points = new List + { + new CurvePresetPoint { Time = 0f, Value = 0f, TangentIn = 0f, TangentOut = 0f }, + new CurvePresetPoint { Time = 1f, Value = 1f, TangentIn = 0f, TangentOut = 0f }, + } + } + }, + { CurvePreset.Smoothstep, new CurveEditorPreset + { + LinearTangents = false, + Points = new List + { + new CurvePresetPoint { Time = 0f, Value = 0f, TangentIn = 0f, TangentOut = 0f }, + new CurvePresetPoint { Time = 1f, Value = 1f, TangentIn = 0f, TangentOut = 0f }, + } + } + }, + }; + /// /// The keyframe UI points. /// @@ -568,6 +668,28 @@ namespace FlaxEditor.GUI /// The list of indices of the keyframes to remove. protected abstract void RemoveKeyframesInternal(HashSet indicesToRemove); + /// + /// Tries to convert a float to the type of the type wildcard of the curve editor. + /// + /// The float. + /// The converted value. + public static object ConvertCurvePresetValueToCurveEditorType(float value) + { + if (typeof(T) == typeof(Float2)) + return new Float2(value); + if (typeof(T) == typeof(Float3)) + return new Float3(value); + if (typeof(T) == typeof(Float4)) + return new Float4(value); + if (typeof(T) == typeof(Vector2)) + return new Vector2(value); + if (typeof(T) == typeof(Vector3)) + return new Vector3(value); + if (typeof(T) == typeof(Vector4)) + return new Vector4(value); + return value; + } + /// /// Called when showing a context menu. Can be used to add custom buttons with actions. /// @@ -752,6 +874,17 @@ namespace FlaxEditor.GUI ShowCurve(false); } + /// + /// Applies a to the curve editor. + /// + /// The preset. + public virtual void ApplyPreset(CurvePreset preset) + { + // Remove existing keyframes + SelectAll(); + RemoveKeyframes(); + } + /// public override void Evaluate(out object result, float time, bool loop = false) { @@ -1028,6 +1161,31 @@ namespace FlaxEditor.GUI return true; } + bool left = key == KeyboardKeys.ArrowLeft; + bool right = key == KeyboardKeys.ArrowRight; + bool up = key == KeyboardKeys.ArrowUp; + bool down = key == KeyboardKeys.ArrowDown; + + if (left || right || up || down) + { + bool shift = Root.GetKey(KeyboardKeys.Shift); + bool alt = Root.GetKey(KeyboardKeys.Alt); + float deltaValue = 10f; + if (shift || alt) + deltaValue = shift ? 2.5f : 5f; + + Float2 moveDelta = Float2.Zero; + if (left || right) + moveDelta.X = left ? -deltaValue : deltaValue; + if (up || down) + moveDelta.Y = up ? -deltaValue : deltaValue; + + _contents.OnMoveStart(Float2.Zero); + _contents.OnMove(moveDelta); + _contents.OnMoveEnd(Float2.Zero); + return true; + } + return false; } @@ -1526,6 +1684,22 @@ namespace FlaxEditor.GUI _tangents[i].Visible = false; } + /// + public override void ApplyPreset(CurvePreset preset) + { + base.ApplyPreset(preset); + + CurveEditorPreset data = Presets[preset]; + foreach (var point in data.Points) + { + float time = point.Time; + object value = ConvertCurvePresetValueToCurveEditorType((float)point.Value); + AddKeyframe(time, value); + } + + ShowWholeCurve(); + } + /// protected override void DrawCurve(ref Rectangle viewRect) { @@ -2312,6 +2486,30 @@ namespace FlaxEditor.GUI } } + /// + public override void ApplyPreset(CurvePreset preset) + { + base.ApplyPreset(preset); + + CurveEditorPreset data = Presets[preset]; + + foreach (var point in data.Points) + { + float time = point.Time; + object value = ConvertCurvePresetValueToCurveEditorType((float)point.Value); + object tangentIn = ConvertCurvePresetValueToCurveEditorType((float)point.TangentIn); + object tangentOut = ConvertCurvePresetValueToCurveEditorType((float)point.TangentOut); + + AddKeyframe(time, value, tangentIn, tangentOut); + } + + SelectAll(); + if (data.LinearTangents) + SetTangentsLinear(); + + ShowWholeCurve(); + } + /// protected override void SetScaleInternal(ref Float2 scale) { diff --git a/Source/Editor/GUI/Docking/DockPanel.cs b/Source/Editor/GUI/Docking/DockPanel.cs index bfac161f0..c8900dcba 100644 --- a/Source/Editor/GUI/Docking/DockPanel.cs +++ b/Source/Editor/GUI/Docking/DockPanel.cs @@ -469,7 +469,7 @@ namespace FlaxEditor.GUI.Docking var childPanels = _childPanels.ToArray(); if (childPanels.Length != 0) { - // Move tabs from child panels into this one + // Fallback: move tabs from child panels into this one. DockWindow selectedTab = null; foreach (var childPanel in childPanels) { @@ -490,7 +490,8 @@ namespace FlaxEditor.GUI.Docking { // Unlink splitter var splitterParent = splitter.Parent; - Assert.IsNotNull(splitterParent); + if (splitterParent == null) + return; splitter.Parent = null; // Move controls from second split panel to the split panel parent @@ -507,17 +508,63 @@ namespace FlaxEditor.GUI.Docking splitter.Dispose(); } } + else if (IsMaster && _childPanels.Count != 0) + { + if (TryCollapseSplitter(_tabsProxy?.Parent as Panel)) + return; + } else if (!IsMaster) { throw new InvalidOperationException(); } } + else if (_childPanels.Count != 0) + { + if (TryCollapseSplitter(_tabsProxy?.Parent as Panel)) + return; + } else if (!IsMaster) { throw new InvalidOperationException(); } } + internal bool CollapseEmptyTabsProxy() + { + if (TabsCount == 0 && ChildPanelsCount > 0) + { + return TryCollapseSplitter(_tabsProxy?.Parent as Panel); + } + return false; + } + + private bool TryCollapseSplitter(Panel removedPanelParent) + { + if (removedPanelParent == null) + return false; + if (!(removedPanelParent.Parent is SplitPanel tabsSplitter)) + return false; + + var splitterParent = tabsSplitter.Parent; + if (splitterParent == null) + return false; + tabsSplitter.Parent = null; + + var scrPanel = removedPanelParent == tabsSplitter.Panel2 ? tabsSplitter.Panel1 : tabsSplitter.Panel2; + var srcPanelChildrenCount = scrPanel.ChildrenCount; + for (int i = srcPanelChildrenCount - 1; i >= 0 && scrPanel.ChildrenCount > 0; i--) + { + scrPanel.GetChild(i).Parent = splitterParent; + } + Assert.IsTrue(scrPanel.ChildrenCount == 0); + Assert.IsTrue(splitterParent.ChildrenCount == srcPanelChildrenCount); + + tabsSplitter.Dispose(); + if (_tabsProxy != null && _tabsProxy.Parent == removedPanelParent) + _tabsProxy = null; + return true; + } + internal virtual void DockWindowInternal(DockState state, DockWindow window, bool autoSelect = true, float? splitterValue = null) { DockWindow(state, window, autoSelect, splitterValue); diff --git a/Source/Editor/GUI/Input/ValueBox.cs b/Source/Editor/GUI/Input/ValueBox.cs index 2252a7f22..5e1e4aaf4 100644 --- a/Source/Editor/GUI/Input/ValueBox.cs +++ b/Source/Editor/GUI/Input/ValueBox.cs @@ -99,6 +99,11 @@ namespace FlaxEditor.GUI.Input /// public event Action SlidingEnd; + /// + /// If enabled, pressing the arrow up or down key increments/ decrements the value. + /// + public bool ArrowKeysIncrement = true; + /// /// Gets or sets the slider speed. Use value 0 to disable and hide slider UI. /// @@ -239,6 +244,27 @@ namespace FlaxEditor.GUI.Input ResetViewOffset(); } + /// + public override bool OnKeyDown(KeyboardKeys key) + { + if (ArrowKeysIncrement && (key == KeyboardKeys.ArrowUp || key == KeyboardKeys.ArrowDown)) + { + bool altDown = Root.GetKey(KeyboardKeys.Alt); + bool shiftDown = Root.GetKey(KeyboardKeys.Shift); + bool controlDown = Root.GetKey(KeyboardKeys.Control); + float deltaValue = altDown ? 0.1f : (shiftDown ? 10f : (controlDown ? 100f : 1f)); + float slideDelta = key == KeyboardKeys.ArrowUp ? deltaValue : -deltaValue; + + _startSlideValue = Value; + ApplySliding(slideDelta); + EndSliding(); + Focus(); + return true; + } + + return base.OnKeyDown(key); + } + /// public override bool OnMouseDown(Float2 location, MouseButton button) { diff --git a/Source/Editor/GUI/Tree/TreeNode.cs b/Source/Editor/GUI/Tree/TreeNode.cs index ed1257819..b0ce8c251 100644 --- a/Source/Editor/GUI/Tree/TreeNode.cs +++ b/Source/Editor/GUI/Tree/TreeNode.cs @@ -1140,8 +1140,11 @@ namespace FlaxEditor.GUI.Tree ParentTree.DraggedOverNode = this; // Expand node if mouse goes over arrow - if (ArrowRect.Contains(location) && HasAnyVisibleChild) + if (ArrowRect.Contains(location) && HasAnyVisibleChild && IsCollapsed) + { Expand(true); + ParentTree?.FlushPendingPerformLayout(); + } result = OnDragEnterHeader(data); } @@ -1172,8 +1175,11 @@ namespace FlaxEditor.GUI.Tree ParentTree.DraggedOverNode = this; // Expand node if mouse goes over arrow - if (ArrowRect.Contains(location) && HasAnyVisibleChild) + if (ArrowRect.Contains(location) && HasAnyVisibleChild && IsCollapsed) + { Expand(true); + ParentTree?.FlushPendingPerformLayout(); + } if (!_isDragOverHeader) result = OnDragEnterHeader(data); diff --git a/Source/Editor/Gizmo/ViewportRubberBandSelector.cs b/Source/Editor/Gizmo/ViewportRubberBandSelector.cs index 542e8b388..66e835fac 100644 --- a/Source/Editor/Gizmo/ViewportRubberBandSelector.cs +++ b/Source/Editor/Gizmo/ViewportRubberBandSelector.cs @@ -36,11 +36,12 @@ public sealed class ViewportRubberBandSelector /// Triggers the start of a rubber band selection. /// /// True if selection started, otherwise false. - public bool TryStartingRubberBandSelection() + public bool TryStartingRubberBandSelection(Float2 mousePosition) { if (!_isRubberBandSpanning && _owner.Gizmos.Active != null && !_owner.Gizmos.Active.IsControllingMouse && !_owner.IsRightMouseButtonDown) { _tryStartRubberBand = true; + _cachedStartingMousePosition = mousePosition; return true; } return false; @@ -82,12 +83,15 @@ public sealed class ViewportRubberBandSelector return; } - if (_tryStartRubberBand && (Mathf.Abs(_owner.MouseDelta.X) > 0.1f || Mathf.Abs(_owner.MouseDelta.Y) > 0.1f) && canStart) + if (_tryStartRubberBand && canStart) { - _isRubberBandSpanning = true; - _cachedStartingMousePosition = mousePosition; - _rubberBandRect = new Rectangle(_cachedStartingMousePosition, Float2.Zero); - _tryStartRubberBand = false; + var delta = mousePosition - _cachedStartingMousePosition; + if (Mathf.Abs(delta.X) > 0.1f || Mathf.Abs(delta.Y) > 0.1f) + { + _isRubberBandSpanning = true; + _rubberBandRect = new Rectangle(_cachedStartingMousePosition, Float2.Zero); + _tryStartRubberBand = false; + } } else if (_isRubberBandSpanning && _owner.Gizmos.Active != null && !_owner.Gizmos.Active.IsControllingMouse && !_owner.IsRightMouseButtonDown) { diff --git a/Source/Editor/Modules/SceneEditingModule.cs b/Source/Editor/Modules/SceneEditingModule.cs index c36866bc3..b1a5be6f1 100644 --- a/Source/Editor/Modules/SceneEditingModule.cs +++ b/Source/Editor/Modules/SceneEditingModule.cs @@ -229,7 +229,7 @@ namespace FlaxEditor.Modules if (!isPlayMode && options.General.AutoRebuildNavMesh && actor.Scene && node.AffectsNavigationWithChildren) { var bounds = actor.BoxWithChildren; - Navigation.BuildNavMesh(actor.Scene, bounds, options.General.AutoRebuildNavMeshTimeoutMs); + Navigation.BuildNavMesh(bounds, options.General.AutoRebuildNavMeshTimeoutMs); } } diff --git a/Source/Editor/Modules/UIModule.cs b/Source/Editor/Modules/UIModule.cs index 663c81909..7449d4f37 100644 --- a/Source/Editor/Modules/UIModule.cs +++ b/Source/Editor/Modules/UIModule.cs @@ -158,6 +158,7 @@ namespace FlaxEditor.Modules private ContextMenuButton _menuToolsProfilerWindow; private ContextMenuButton _menuToolsSetTheCurrentSceneViewAsDefault; private ContextMenuButton _menuToolsTakeScreenshot; + private ContextMenuButton _menuToolsOpenLocalFolder; private ContextMenuChildMenu _menuWindowApplyWindowLayout; private ToolStripButton _toolStripSaveAll; @@ -733,6 +734,16 @@ namespace FlaxEditor.Modules _menuToolsTakeScreenshot = cm.AddButton("Take screenshot", inputOptions.TakeScreenshot, Editor.Windows.TakeScreenshot); cm.AddSeparator(); cm.AddButton("Plugins", () => Editor.Windows.PluginsWin.Show()); + cm.AddSeparator(); + var childMenu = cm.AddChildMenu("Open Product Local folder"); + childMenu.ContextMenu.AddButton("Editor", () => FileSystem.ShowFileExplorer(Globals.ProductLocalFolder)); + _menuToolsOpenLocalFolder = childMenu.ContextMenu.AddButton("Game", () => + { + string localAppData = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData); + GameSettings settings = GameSettings.Load(); + string path = Path.Combine(localAppData, settings.CompanyName, settings.ProductName); + FileSystem.ShowFileExplorer(path); + }); // Window MenuWindow = MainMenu.AddButton("Window"); @@ -1084,6 +1095,10 @@ namespace FlaxEditor.Modules _menuToolsBuildNavMesh.Enabled = canEdit; _menuToolsCancelBuilding.Enabled = GameCooker.IsRunning; _menuToolsSetTheCurrentSceneViewAsDefault.Enabled = Level.ScenesCount > 0; + string localAppData = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData); + GameSettings settings = GameSettings.Load(); + string path = Path.Combine(localAppData, settings.CompanyName, settings.ProductName); + _menuToolsOpenLocalFolder.Enabled = Directory.Exists(path); c.PerformLayout(); } diff --git a/Source/Editor/Modules/WindowsModule.cs b/Source/Editor/Modules/WindowsModule.cs index 51c88e082..0d1f0e2d7 100644 --- a/Source/Editor/Modules/WindowsModule.cs +++ b/Source/Editor/Modules/WindowsModule.cs @@ -490,10 +490,15 @@ namespace FlaxEditor.Modules Editor.LogWarning("Empty panel inside layout."); p.RemoveIt(); } + else + { + p.CollapseEmptyTabsProxy(); + } } } panel.SelectTab(selectedTab); + panel.CollapseEmptyTabsProxy(); } private static void SaveBounds(XmlWriter writer, Window win) diff --git a/Source/Editor/Options/InputOptions.cs b/Source/Editor/Options/InputOptions.cs index ab473ebed..a759b7247 100644 --- a/Source/Editor/Options/InputOptions.cs +++ b/Source/Editor/Options/InputOptions.cs @@ -571,6 +571,10 @@ namespace FlaxEditor.Options [EditorDisplay("View Flags"), EditorOrder(3260)] public InputBinding DebugDraw = new InputBinding(KeyboardKeys.Alpha4, KeyboardKeys.Control, KeyboardKeys.Shift); + [DefaultValue(typeof(InputBinding), "None")] + [EditorDisplay("View Flags"), EditorOrder(3270)] + public InputBinding Particles = new InputBinding(KeyboardKeys.None); + #endregion #region Interface diff --git a/Source/Editor/SceneGraph/Actors/BoxColliderNode.cs b/Source/Editor/SceneGraph/Actors/BoxColliderNode.cs index c4fd47f71..4a7150972 100644 --- a/Source/Editor/SceneGraph/Actors/BoxColliderNode.cs +++ b/Source/Editor/SceneGraph/Actors/BoxColliderNode.cs @@ -42,6 +42,7 @@ namespace FlaxEditor.SceneGraph.Actors if (value is BoxCollider collider) collider.AutoResize(!_keepLocalOrientation); } + Presenter.OnModified(); } } diff --git a/Source/Editor/SceneGraph/Actors/SplineNode.cs b/Source/Editor/SceneGraph/Actors/SplineNode.cs index de319ca1d..515939526 100644 --- a/Source/Editor/SceneGraph/Actors/SplineNode.cs +++ b/Source/Editor/SceneGraph/Actors/SplineNode.cs @@ -555,7 +555,7 @@ namespace FlaxEditor.SceneGraph.Actors var options = Editor.Instance.Options.Options.General; if (options.AutoRebuildNavMesh) { - Navigation.BuildNavMesh(collider.Scene, collider.Box, options.AutoRebuildNavMeshTimeoutMs); + Navigation.BuildNavMesh(collider.Box, options.AutoRebuildNavMeshTimeoutMs); } } } diff --git a/Source/Editor/SceneGraph/Actors/StaticModelNode.cs b/Source/Editor/SceneGraph/Actors/StaticModelNode.cs index e95364c2d..4cd63f05d 100644 --- a/Source/Editor/SceneGraph/Actors/StaticModelNode.cs +++ b/Source/Editor/SceneGraph/Actors/StaticModelNode.cs @@ -47,6 +47,11 @@ namespace FlaxEditor.SceneGraph.Actors } } + /// + /// Gets the model used by this actor. + /// + public Model Model => ((StaticModel)Actor).Model; + /// public StaticModelNode(Actor actor) : base(actor) @@ -120,12 +125,12 @@ namespace FlaxEditor.SceneGraph.Actors { base.OnContextMenu(contextMenu, window); - // Check if every selected node is a primitive + // Check if every selected node is a primitive or has collision asset var selection = GetSelection(window); bool autoOptionEnabled = true; foreach (var node in selection) { - if (node is StaticModelNode staticModelNode && !staticModelNode.IsPrimitive) + if (node is StaticModelNode staticModelNode && (!staticModelNode.IsPrimitive && GetCollisionData(staticModelNode.Model) == null)) { autoOptionEnabled = false; break; @@ -201,6 +206,54 @@ namespace FlaxEditor.SceneGraph.Actors return Array.Empty(); } + private static bool TryCollisionData(Model model, BinaryAssetItem assetItem, out CollisionData collisionData) + { + collisionData = FlaxEngine.Content.Load(assetItem.ID); + if (collisionData) + { + var options = collisionData.Options; + if (options.Model == model.ID || options.Model == Guid.Empty) + return true; + } + return false; + } + + private CollisionData GetCollisionData(Model model) + { + if (model == null) + return null; + + // Check if there already is collision data for that model to reuse + var modelItem = (AssetItem)Editor.Instance.ContentDatabase.Find(model.ID); + if (modelItem?.ParentFolder != null) + { + foreach (var child in modelItem.ParentFolder.Children) + { + // Check if there is collision that was made with this model + if (child is BinaryAssetItem b && b.IsOfType()) + { + if (TryCollisionData(model, b, out var collisionData)) + return collisionData; + } + + // Check if there is an auto-imported collision + if (child is ContentFolder childFolder && childFolder.ShortName == modelItem.ShortName) + { + foreach (var childFolderChild in childFolder.Children) + { + if (childFolderChild is BinaryAssetItem c && c.IsOfType()) + { + if (TryCollisionData(model, c, out var collisionData)) + return collisionData; + } + } + } + } + } + + return null; + } + private void CreateAuto(StaticModel actor, Spawner spawner, bool singleNode) { // Special case for in-built Editor models that can use analytical collision @@ -243,6 +296,15 @@ namespace FlaxEditor.SceneGraph.Actors collider.LocalPosition = new Vector3(0, 50.0f, 0); collider.LocalOrientation = Quaternion.Euler(0, 0, 90.0f); } + else + { + var collider = new MeshCollider + { + Transform = actor.Transform, + CollisionData = GetCollisionData(model), + }; + spawner(collider); + } } private void CreateBox(StaticModel actor, Spawner spawner, bool singleNode) diff --git a/Source/Editor/Surface/Archetypes/Material.cs b/Source/Editor/Surface/Archetypes/Material.cs index e46038639..9ac43082e 100644 --- a/Source/Editor/Surface/Archetypes/Material.cs +++ b/Source/Editor/Surface/Archetypes/Material.cs @@ -304,25 +304,14 @@ namespace FlaxEditor.Surface.Archetypes } } - internal sealed class CustomCodeNode : SurfaceNode + internal sealed class CustomCodeNode : ResizableSurfaceNode { - private Rectangle _resizeButtonRect; - private Float2 _startResizingSize; - private Float2 _startResizingCornerOffset; - private bool _isResizing; private CustomCodeTextBox _textBox; - private int SizeValueIndex => Archetype.TypeID == 8 ? 1 : 3; // Index of the Size stored in Values array - - private Float2 SizeValue - { - get => (Float2)Values[SizeValueIndex]; - set => SetValue(SizeValueIndex, value, false); - } - public CustomCodeNode(uint id, VisjectSurfaceContext context, NodeArchetype nodeArch, GroupArchetype groupArch) : base(id, context, nodeArch, groupArch) { + _sizeValueIndex = Archetype.TypeID == 8 ? 1 : 3; // Index of the Size stored in Values array Float2 pos = new Float2(FlaxEditor.Surface.Constants.NodeMarginX, FlaxEditor.Surface.Constants.NodeMarginY + FlaxEditor.Surface.Constants.NodeHeaderSize), size; if (nodeArch.TypeID == 8) { @@ -345,126 +334,19 @@ namespace FlaxEditor.Surface.Archetypes _textBox.EditEnd += () => SetValue(0, _textBox.Text); } - public override bool CanSelect(ref Float2 location) - { - return base.CanSelect(ref location) && !_resizeButtonRect.MakeOffsetted(Location).Contains(ref location); - } - public override void OnSurfaceLoaded(SurfaceNodeActions action) { base.OnSurfaceLoaded(action); _textBox.Text = (string)Values[0]; - - var size = SizeValue; - if (Surface != null && Surface.GridSnappingEnabled) - size = Surface.SnapToGrid(size, true); - Resize(size.X, size.Y); } public override void OnValuesChanged() { base.OnValuesChanged(); - var size = SizeValue; - Resize(size.X, size.Y); _textBox.Text = (string)Values[0]; } - - protected override void UpdateRectangles() - { - base.UpdateRectangles(); - - const float buttonMargin = FlaxEditor.Surface.Constants.NodeCloseButtonMargin; - const float buttonSize = FlaxEditor.Surface.Constants.NodeCloseButtonSize; - _resizeButtonRect = new Rectangle(_closeButtonRect.Left, Height - buttonSize - buttonMargin - 4, buttonSize, buttonSize); - } - - public override void Draw() - { - base.Draw(); - - var style = Style.Current; - if (_isResizing) - { - Render2D.FillRectangle(_resizeButtonRect, style.Selection); - Render2D.DrawRectangle(_resizeButtonRect, style.SelectionBorder); - } - Render2D.DrawSprite(style.Scale, _resizeButtonRect, _resizeButtonRect.Contains(_mousePosition) && Surface.CanEdit ? style.Foreground : style.ForegroundGrey); - } - - public override void OnLostFocus() - { - if (_isResizing) - EndResizing(); - - base.OnLostFocus(); - } - - public override void OnEndMouseCapture() - { - if (_isResizing) - EndResizing(); - - base.OnEndMouseCapture(); - } - - public override bool OnMouseDown(Float2 location, MouseButton button) - { - if (base.OnMouseDown(location, button)) - return true; - - if (button == MouseButton.Left && _resizeButtonRect.Contains(ref location) && Surface.CanEdit) - { - // Start sliding - _isResizing = true; - _startResizingSize = Size; - _startResizingCornerOffset = Size - location; - StartMouseCapture(); - Cursor = CursorType.SizeNWSE; - return true; - } - - return false; - } - - public override void OnMouseMove(Float2 location) - { - if (_isResizing) - { - var emptySize = CalculateNodeSize(0, 0); - var size = Float2.Max(location - emptySize + _startResizingCornerOffset, new Float2(240, 160)); - Resize(size.X, size.Y); - } - else - { - base.OnMouseMove(location); - } - } - - public override bool OnMouseUp(Float2 location, MouseButton button) - { - if (button == MouseButton.Left && _isResizing) - { - EndResizing(); - return true; - } - - return base.OnMouseUp(location, button); - } - - private void EndResizing() - { - Cursor = CursorType.Default; - EndMouseCapture(); - _isResizing = false; - if (_startResizingSize != Size) - { - var emptySize = CalculateNodeSize(0, 0); - SizeValue = Size - emptySize; - Surface.MarkAsEdited(false); - } - } } internal enum MaterialTemplateInputsMapping diff --git a/Source/Editor/Surface/Archetypes/Textures.cs b/Source/Editor/Surface/Archetypes/Textures.cs index 56f8154d7..f09ee5015 100644 --- a/Source/Editor/Surface/Archetypes/Textures.cs +++ b/Source/Editor/Surface/Archetypes/Textures.cs @@ -23,11 +23,14 @@ namespace FlaxEditor.Surface.Archetypes TextureGroup = 4, } - internal class SampleTextureNode : SurfaceNode + internal class TextureSamplerNode : SurfaceNode { private ComboBox _textureGroupPicker; + protected int _samplerTypeValueIndex = -1; + protected int _textureGroupValueIndex = -1; + protected int _level = 5; - public SampleTextureNode(uint id, VisjectSurfaceContext context, NodeArchetype nodeArch, GroupArchetype groupArch) + protected TextureSamplerNode(uint id, VisjectSurfaceContext context, NodeArchetype nodeArch, GroupArchetype groupArch) : base(id, context, nodeArch, groupArch) { } @@ -48,13 +51,13 @@ namespace FlaxEditor.Surface.Archetypes private void UpdateUI() { - if ((int)Values[0] == (int)CommonSamplerType.TextureGroup) + if ((int)Values[_samplerTypeValueIndex] == (int)CommonSamplerType.TextureGroup) { if (_textureGroupPicker == null) { _textureGroupPicker = new ComboBox { - Location = new Float2(FlaxEditor.Surface.Constants.NodeMarginX + 50, FlaxEditor.Surface.Constants.NodeMarginY + FlaxEditor.Surface.Constants.NodeHeaderSize + FlaxEditor.Surface.Constants.LayoutOffsetY * 5), + Location = new Float2(FlaxEditor.Surface.Constants.NodeMarginX + 50, FlaxEditor.Surface.Constants.NodeMarginY + FlaxEditor.Surface.Constants.NodeHeaderSize + FlaxEditor.Surface.Constants.LayoutOffsetY * _level), Width = 100, Parent = this, }; @@ -71,7 +74,7 @@ namespace FlaxEditor.Surface.Archetypes _textureGroupPicker.Visible = true; } _textureGroupPicker.SelectedIndexChanged -= OnSelectedTextureGroupChanged; - _textureGroupPicker.SelectedIndex = (int)Values[2]; + _textureGroupPicker.SelectedIndex = (int)Values[_textureGroupValueIndex]; _textureGroupPicker.SelectedIndexChanged += OnSelectedTextureGroupChanged; } else if (_textureGroupPicker != null) @@ -83,7 +86,39 @@ namespace FlaxEditor.Surface.Archetypes private void OnSelectedTextureGroupChanged(ComboBox comboBox) { - SetValue(2, _textureGroupPicker.SelectedIndex); + SetValue(_textureGroupValueIndex, _textureGroupPicker.SelectedIndex); + } + } + + internal class SampleTextureNode : TextureSamplerNode + { + public SampleTextureNode(uint id, VisjectSurfaceContext context, NodeArchetype nodeArch, GroupArchetype groupArch) + : base(id, context, nodeArch, groupArch) + { + _samplerTypeValueIndex = 0; + _textureGroupValueIndex = 2; + } + } + + internal class TriplanarSampleTextureNode : TextureSamplerNode + { + public TriplanarSampleTextureNode(uint id, VisjectSurfaceContext context, NodeArchetype nodeArch, GroupArchetype groupArch) + : base(id, context, nodeArch, groupArch) + { + _samplerTypeValueIndex = 3; + _textureGroupValueIndex = 5; + _level = 5; + } + } + + internal class ProceduralSampleTextureNode : TextureSamplerNode + { + public ProceduralSampleTextureNode(uint id, VisjectSurfaceContext context, NodeArchetype nodeArch, GroupArchetype groupArch) + : base(id, context, nodeArch, groupArch) + { + _samplerTypeValueIndex = 0; + _textureGroupValueIndex = 2; + _level = 4; } } @@ -280,9 +315,9 @@ namespace FlaxEditor.Surface.Archetypes ConnectionsHints = ConnectionsHint.Vector, DefaultValues = new object[] { - 0, - -1.0f, - 0, + (int)CommonSamplerType.LinearClamp, // Sampler + -1.0f, // Level + 0, // Texture Group }, Elements = new[] { @@ -402,6 +437,7 @@ namespace FlaxEditor.Surface.Archetypes new NodeArchetype { TypeID = 16, + Create = (id, context, arch, groupArch) => new TriplanarSampleTextureNode(id, context, arch, groupArch), Title = "Triplanar Texture", Description = "Projects a texture using world-space coordinates with triplanar mapping.", Flags = NodeFlags.MaterialGraph, @@ -411,8 +447,9 @@ namespace FlaxEditor.Surface.Archetypes Float3.One, // Scale 1.0f, // Blend Float2.Zero, // Offset - 2, // Sampler + (int)CommonSamplerType.LinearWrap, // Sampler false, // Local + 0, // Texture Group }, Elements = new[] { @@ -430,17 +467,17 @@ namespace FlaxEditor.Surface.Archetypes new NodeArchetype { TypeID = 17, - Create = (id, context, arch, groupArch) => new SampleTextureNode(id, context, arch, groupArch), + Create = (id, context, arch, groupArch) => new ProceduralSampleTextureNode(id, context, arch, groupArch), Title = "Procedural Sample Texture", Description = "Samples a texture to create a more natural look with less obvious tiling.", Flags = NodeFlags.MaterialGraph, - Size = new Float2(240, 110), + Size = new Float2(240, 130), ConnectionsHints = ConnectionsHint.Vector, DefaultValues = new object[] { - 2, - -1.0f, - 0, + (int)CommonSamplerType.LinearWrap, // Sampler + -1.0f, // Level + 0, // Texture Group }, Elements = new[] { @@ -448,8 +485,8 @@ namespace FlaxEditor.Surface.Archetypes NodeElementArchetype.Factory.Input(1, "UVs", true, null, 1), NodeElementArchetype.Factory.Input(2, "Offset", true, typeof(Float2), 3), NodeElementArchetype.Factory.Output(0, "Color", typeof(Float4), 4), - NodeElementArchetype.Factory.Text(0, Surface.Constants.LayoutOffsetY * 4, "Sampler"), - NodeElementArchetype.Factory.ComboBox(50, Surface.Constants.LayoutOffsetY * 4, 100, 0, typeof(CommonSamplerType)) + NodeElementArchetype.Factory.Text(0, Surface.Constants.LayoutOffsetY * 3, "Sampler"), + NodeElementArchetype.Factory.ComboBox(50, Surface.Constants.LayoutOffsetY * 3, 100, 0, typeof(CommonSamplerType)) } }, new NodeArchetype @@ -469,6 +506,7 @@ namespace FlaxEditor.Surface.Archetypes { TypeID = 23, Title = "Triplanar Normal Map", + Create = (id, context, arch, groupArch) => new TriplanarSampleTextureNode(id, context, arch, groupArch), Description = "Projects a normal map texture using world-space coordinates with triplanar mapping.", Flags = NodeFlags.MaterialGraph, Size = new Float2(280, 100), @@ -477,8 +515,9 @@ namespace FlaxEditor.Surface.Archetypes Float3.One, // Scale 1.0f, // Blend Float2.Zero, // Offset - 2, // Sampler + (int)CommonSamplerType.LinearWrap, // Sampler false, // Local + 0, // Texture Group }, Elements = new[] { diff --git a/Source/Editor/Surface/Archetypes/Tools.cs b/Source/Editor/Surface/Archetypes/Tools.cs index aacebd189..68a733197 100644 --- a/Source/Editor/Surface/Archetypes/Tools.cs +++ b/Source/Editor/Surface/Archetypes/Tools.cs @@ -453,7 +453,7 @@ namespace FlaxEditor.Surface.Archetypes } } - private class CurveNode : SurfaceNode where T : struct + private class CurveNode : ResizableSurfaceNode where T : struct { private BezierCurveEditor _curve; private bool _isSavingCurve; @@ -467,7 +467,7 @@ namespace FlaxEditor.Surface.Archetypes Create = (id, context, arch, groupArch) => new CurveNode(id, context, arch, groupArch), Description = "An animation spline represented by a set of keyframes, each representing an endpoint of a Bezier curve.", Flags = NodeFlags.AllGraphs, - Size = new Float2(400, 180.0f), + Size = new Float2(400, 180), DefaultValues = new object[] { // Keyframes count @@ -491,6 +491,8 @@ namespace FlaxEditor.Surface.Archetypes 0.0f, zero, zero, zero, 0.0f, zero, zero, zero, 0.0f, zero, zero, zero, + + new Float2(400, 180), }, Elements = new[] { @@ -504,30 +506,52 @@ namespace FlaxEditor.Surface.Archetypes public CurveNode(uint id, VisjectSurfaceContext context, NodeArchetype nodeArch, GroupArchetype groupArch) : base(id, context, nodeArch, groupArch) { + _sizeValueIndex = 29; // Index of the Size stored in Values array } - + /// public override void OnLoaded(SurfaceNodeActions action) { base.OnLoaded(action); + // Create curve editor var upperLeft = GetBox(0).BottomLeft; var upperRight = GetBox(1).BottomRight; float curveMargin = 20.0f; - _curve = new BezierCurveEditor { MaxKeyframes = 7, Bounds = new Rectangle(upperLeft + new Float2(curveMargin, 10.0f), upperRight.X - upperLeft.X - curveMargin * 2.0f, 140.0f), - Parent = this + Parent = this, + AnchorMax = Float2.One, }; _curve.Edited += OnCurveEdited; _curve.UnlockChildrenRecursive(); _curve.PerformLayout(); + // Sync keyframes UpdateCurveKeyframes(); } + /// + public override void OnSurfaceLoaded(SurfaceNodeActions action) + { + base.OnSurfaceLoaded(action); + + // Ensure the whole curve is shown + _curve.ShowWholeCurve(); + } + + public override void OnValuesChanged() + { + base.OnValuesChanged(); + + if (!_isSavingCurve) + { + UpdateCurveKeyframes(); + } + } + private void OnCurveEdited() { if (_isSavingCurve) @@ -553,17 +577,6 @@ namespace FlaxEditor.Surface.Archetypes _isSavingCurve = false; } - /// - public override void OnValuesChanged() - { - base.OnValuesChanged(); - - if (!_isSavingCurve) - { - UpdateCurveKeyframes(); - } - } - private void UpdateCurveKeyframes() { var count = (int)Values[0]; @@ -1575,7 +1588,7 @@ namespace FlaxEditor.Surface.Archetypes DefaultValues = new object[] { Guid.Empty, - string.Empty + string.Empty, }, Elements = new[] { diff --git a/Source/Editor/Surface/ResizableSurfaceNode.cs b/Source/Editor/Surface/ResizableSurfaceNode.cs new file mode 100644 index 000000000..259c29836 --- /dev/null +++ b/Source/Editor/Surface/ResizableSurfaceNode.cs @@ -0,0 +1,182 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using FlaxEngine; +using FlaxEngine.GUI; + +namespace FlaxEditor.Surface +{ + /// + /// Visject Surface node control that cna be resized. + /// + /// + [HideInEditor] + public class ResizableSurfaceNode : SurfaceNode + { + private Float2 _startResizingSize; + private Float2 _startResizingCornerOffset; + + /// + /// Indicates whether the node is currently being resized. + /// + protected bool _isResizing; + + /// + /// Index of the Float2 value in the node values list to store node size. + /// + protected int _sizeValueIndex = -1; + + /// + /// Minimum node size. + /// + protected Float2 _sizeMin = new Float2(240, 160); + + /// + /// Node resizing rectangle bounds. + /// + protected Rectangle _resizeButtonRect; + + private Float2 SizeValue + { + get => (Float2)Values[_sizeValueIndex]; + set => SetValue(_sizeValueIndex, value, false); + } + + /// + public ResizableSurfaceNode(uint id, VisjectSurfaceContext context, NodeArchetype nodeArch, GroupArchetype groupArch) + : base(id, context, nodeArch, groupArch) + { + } + + /// + public override bool CanSelect(ref Float2 location) + { + return base.CanSelect(ref location) && !_resizeButtonRect.MakeOffsetted(Location).Contains(ref location); + } + + /// + public override void OnSurfaceLoaded(SurfaceNodeActions action) + { + // Reapply the curve node size + var size = SizeValue; + if (Surface != null && Surface.GridSnappingEnabled) + size = Surface.SnapToGrid(size, true); + Resize(size.X, size.Y); + + base.OnSurfaceLoaded(action); + } + + /// + public override void OnValuesChanged() + { + base.OnValuesChanged(); + + var size = SizeValue; + Resize(size.X, size.Y); + } + + /// + public override void Draw() + { + base.Draw(); + + if (Surface.CanEdit) + { + var style = Style.Current; + if (_isResizing) + { + Render2D.FillRectangle(_resizeButtonRect, style.Selection); + Render2D.DrawRectangle(_resizeButtonRect, style.SelectionBorder); + } + Render2D.DrawSprite(style.Scale, _resizeButtonRect, _resizeButtonRect.Contains(_mousePosition) ? style.Foreground : style.ForegroundGrey); + } + } + + /// + public override void OnLostFocus() + { + if (_isResizing) + EndResizing(); + + base.OnLostFocus(); + } + + /// + public override void OnEndMouseCapture() + { + if (_isResizing) + EndResizing(); + + base.OnEndMouseCapture(); + } + + /// + public override bool OnMouseDown(Float2 location, MouseButton button) + { + if (base.OnMouseDown(location, button)) + return true; + + if (button == MouseButton.Left && _resizeButtonRect.Contains(ref location) && Surface.CanEdit) + { + // Start resizing + _isResizing = true; + _startResizingSize = Size; + _startResizingCornerOffset = Size - location; + StartMouseCapture(); + Cursor = CursorType.SizeNWSE; + return true; + } + + return false; + } + + /// + public override void OnMouseMove(Float2 location) + { + if (_isResizing) + { + var emptySize = CalculateNodeSize(0, 0); + var size = Float2.Max(location - emptySize + _startResizingCornerOffset, _sizeMin); + Resize(size.X, size.Y); + } + else + { + base.OnMouseMove(location); + } + } + + /// + public override bool OnMouseUp(Float2 location, MouseButton button) + { + if (button == MouseButton.Left && _isResizing) + { + EndResizing(); + return true; + } + + return base.OnMouseUp(location, button); + } + + /// + protected override void UpdateRectangles() + { + base.UpdateRectangles(); + + const float buttonMargin = Constants.NodeCloseButtonMargin; + const float buttonSize = Constants.NodeCloseButtonSize; + _resizeButtonRect = new Rectangle(_closeButtonRect.Left, Height - buttonSize - buttonMargin - 4, buttonSize, buttonSize); + } + + private void EndResizing() + { + Cursor = CursorType.Default; + EndMouseCapture(); + _isResizing = false; + if (_startResizingSize != Size) + { + var emptySize = CalculateNodeSize(0, 0); + SizeValue = Size - emptySize; + Surface.MarkAsEdited(false); + } + } + } +} diff --git a/Source/Editor/Surface/SurfaceComment.cs b/Source/Editor/Surface/SurfaceComment.cs index 10e9fc776..a76fa245d 100644 --- a/Source/Editor/Surface/SurfaceComment.cs +++ b/Source/Editor/Surface/SurfaceComment.cs @@ -14,18 +14,11 @@ namespace FlaxEditor.Surface /// /// [HideInEditor] - public class SurfaceComment : SurfaceNode + public class SurfaceComment : ResizableSurfaceNode { private Rectangle _colorButtonRect; - private Rectangle _resizeButtonRect; - private Float2 _startResizingSize; private readonly TextBox _renameTextBox; - /// - /// True if sizing tool is in use. - /// - protected bool _isResizing; - /// /// True if rename textbox is active in order to rename comment /// @@ -52,12 +45,6 @@ namespace FlaxEditor.Surface set => SetValue(1, value, false); } - private Float2 SizeValue - { - get => (Float2)Values[2]; - set => SetValue(2, value, false); - } - private int OrderValue { get => (int)Values[3]; @@ -68,6 +55,8 @@ namespace FlaxEditor.Surface public SurfaceComment(uint id, VisjectSurfaceContext context, NodeArchetype nodeArch, GroupArchetype groupArch) : base(id, context, nodeArch, groupArch) { + _sizeValueIndex = 2; // Index of the Size stored in Values array + _sizeMin = new Float2(140.0f, Constants.NodeHeaderSize); _renameTextBox = new TextBox(false, 0, 0, Width) { Height = Constants.NodeHeaderSize, @@ -86,10 +75,6 @@ namespace FlaxEditor.Surface // Read node data Title = TitleValue; Color = ColorValue; - var size = SizeValue; - if (Surface != null && Surface.GridSnappingEnabled) - size = Surface.SnapToGrid(size, true); - Size = size; // Order // Backwards compatibility - When opening with an older version send the old comments to the back @@ -126,27 +111,6 @@ namespace FlaxEditor.Surface // Read node data Title = TitleValue; Color = ColorValue; - Size = SizeValue; - } - - private void EndResizing() - { - // Clear state - _isResizing = false; - - if (_startResizingSize != Size) - { - SizeValue = Size; - Surface.MarkAsEdited(false); - } - - EndMouseCapture(); - } - - /// - public override bool CanSelect(ref Float2 location) - { - return _headerRect.MakeOffsetted(Location).Contains(ref location) && !_resizeButtonRect.MakeOffsetted(Location).Contains(ref location); } /// @@ -158,6 +122,8 @@ namespace FlaxEditor.Surface /// protected override void UpdateRectangles() { + base.UpdateRectangles(); + const float headerSize = Constants.NodeHeaderSize; const float buttonMargin = Constants.NodeCloseButtonMargin; const float buttonSize = Constants.NodeCloseButtonSize; @@ -222,16 +188,13 @@ namespace FlaxEditor.Surface // Color button Render2D.DrawSprite(style.Settings, _colorButtonRect, _colorButtonRect.Contains(_mousePosition) && Surface.CanEdit ? style.Foreground : style.ForegroundGrey); - // Check if is resizing + // Resize button if (_isResizing) { - // Draw overlay Render2D.FillRectangle(_resizeButtonRect, style.Selection); Render2D.DrawRectangle(_resizeButtonRect, style.SelectionBorder); } - - // Resize button - Render2D.DrawSprite(style.Scale, _resizeButtonRect, _resizeButtonRect.Contains(_mousePosition) && Surface.CanEdit ? style.Foreground : style.ForegroundGrey); + Render2D.DrawSprite(style.Scale, _resizeButtonRect, _resizeButtonRect.Contains(_mousePosition) ? style.Foreground : style.ForegroundGrey); } // Selection outline @@ -247,88 +210,28 @@ namespace FlaxEditor.Surface /// protected override Float2 CalculateNodeSize(float width, float height) { - return Size; + // No margins or headers + return new Float2(width, height); } /// public override void OnLostFocus() { - // Check if was resizing - if (_isResizing) - { - EndResizing(); - } - - // Check if was renaming if (_isRenaming) { Rename(_renameTextBox.Text); StopRenaming(); } - // Base base.OnLostFocus(); } - /// - public override void OnEndMouseCapture() - { - // Check if was resizing - if (_isResizing) - { - EndResizing(); - } - else - { - base.OnEndMouseCapture(); - } - } - /// public override bool ContainsPoint(ref Float2 location, bool precise) { return _headerRect.Contains(ref location) || _resizeButtonRect.Contains(ref location); } - /// - public override bool OnMouseDown(Float2 location, MouseButton button) - { - if (base.OnMouseDown(location, button)) - return true; - - // Check if can start resizing - if (button == MouseButton.Left && _resizeButtonRect.Contains(ref location) && Surface.CanEdit) - { - // Start sliding - _isResizing = true; - _startResizingSize = Size; - StartMouseCapture(); - - return true; - } - - return false; - } - - /// - public override void OnMouseMove(Float2 location) - { - // Check if is resizing - if (_isResizing) - { - // Update size - var size = Float2.Max(location, new Float2(140.0f, _headerRect.Bottom)); - if (Surface.GridSnappingEnabled) - size = Surface.SnapToGrid(size, true); - Size = size; - } - else - { - // Base - base.OnMouseMove(location); - } - } - /// public override bool OnMouseDoubleClick(Float2 location, MouseButton button) { @@ -394,12 +297,6 @@ namespace FlaxEditor.Surface /// public override bool OnMouseUp(Float2 location, MouseButton button) { - if (button == MouseButton.Left && _isResizing) - { - EndResizing(); - return true; - } - if (base.OnMouseUp(location, button)) return true; diff --git a/Source/Editor/Tools/Terrain/EditTab.cs b/Source/Editor/Tools/Terrain/EditTab.cs index 551a47974..6a6191122 100644 --- a/Source/Editor/Tools/Terrain/EditTab.cs +++ b/Source/Editor/Tools/Terrain/EditTab.cs @@ -192,7 +192,7 @@ namespace FlaxEditor.Tools.Terrain { if (terrain.Scene && terrain.HasStaticFlag(StaticFlags.Navigation)) { - Navigation.BuildNavMesh(terrain.Scene, patchBounds, editorOptions.General.AutoRebuildNavMeshTimeoutMs); + Navigation.BuildNavMesh(patchBounds, editorOptions.General.AutoRebuildNavMeshTimeoutMs); } } } diff --git a/Source/Editor/Tools/Terrain/EditTerrainGizmo.cs b/Source/Editor/Tools/Terrain/EditTerrainGizmo.cs index 54a6d7fa4..5fc0e894f 100644 --- a/Source/Editor/Tools/Terrain/EditTerrainGizmo.cs +++ b/Source/Editor/Tools/Terrain/EditTerrainGizmo.cs @@ -209,7 +209,7 @@ namespace FlaxEditor.Tools.Terrain { if (terrain.Scene && terrain.HasStaticFlag(StaticFlags.Navigation)) { - Navigation.BuildNavMesh(terrain.Scene, patchBounds, editorOptions.General.AutoRebuildNavMeshTimeoutMs); + Navigation.BuildNavMesh(patchBounds, editorOptions.General.AutoRebuildNavMeshTimeoutMs); } } } diff --git a/Source/Editor/Tools/Terrain/Undo/EditTerrainMapAction.cs b/Source/Editor/Tools/Terrain/Undo/EditTerrainMapAction.cs index 87b0c3cc9..afac0948e 100644 --- a/Source/Editor/Tools/Terrain/Undo/EditTerrainMapAction.cs +++ b/Source/Editor/Tools/Terrain/Undo/EditTerrainMapAction.cs @@ -172,7 +172,7 @@ namespace FlaxEditor.Tools.Terrain.Undo if (_navmeshBoundsModifications != null) { foreach (var bounds in _navmeshBoundsModifications) - Navigation.BuildNavMesh(scene, bounds, _dirtyNavMeshTimeoutMs); + Navigation.BuildNavMesh(bounds, _dirtyNavMeshTimeoutMs); } Editor.Instance.Scene.MarkSceneEdited(scene); @@ -217,11 +217,10 @@ namespace FlaxEditor.Tools.Terrain.Undo } // Update navmesh - var scene = Terrain.Scene; if (_navmeshBoundsModifications != null) { foreach (var bounds in _navmeshBoundsModifications) - Navigation.BuildNavMesh(scene, bounds, _dirtyNavMeshTimeoutMs); + Navigation.BuildNavMesh(bounds, _dirtyNavMeshTimeoutMs); } Editor.Instance.Scene.MarkSceneEdited(Terrain.Scene); diff --git a/Source/Editor/Undo/Actions/DeleteActorsAction.cs b/Source/Editor/Undo/Actions/DeleteActorsAction.cs index 75594ecb9..19ffb1e3f 100644 --- a/Source/Editor/Undo/Actions/DeleteActorsAction.cs +++ b/Source/Editor/Undo/Actions/DeleteActorsAction.cs @@ -303,7 +303,7 @@ namespace FlaxEditor.Actions if (_nodeParents[i] is ActorNode node && node.Actor && node.Actor.Scene && node.AffectsNavigationWithChildren) { var bounds = node.Actor.BoxWithChildren; - Navigation.BuildNavMesh(node.Actor.Scene, bounds, options.General.AutoRebuildNavMeshTimeoutMs); + Navigation.BuildNavMesh(bounds, options.General.AutoRebuildNavMeshTimeoutMs); } } } diff --git a/Source/Editor/Undo/Actions/TransformObjectsAction.cs b/Source/Editor/Undo/Actions/TransformObjectsAction.cs index ebed61174..df013e20e 100644 --- a/Source/Editor/Undo/Actions/TransformObjectsAction.cs +++ b/Source/Editor/Undo/Actions/TransformObjectsAction.cs @@ -121,12 +121,12 @@ namespace FlaxEditor // Handle simple case where objects were moved just a little and use one navmesh build request to improve performance if (data.BeforeBounds.Intersects(ref data.AfterBounds)) { - Navigation.BuildNavMesh(data.Scene, BoundingBox.Merge(data.BeforeBounds, data.AfterBounds), options.General.AutoRebuildNavMeshTimeoutMs); + Navigation.BuildNavMesh(BoundingBox.Merge(data.BeforeBounds, data.AfterBounds), options.General.AutoRebuildNavMeshTimeoutMs); } else { - Navigation.BuildNavMesh(data.Scene, data.BeforeBounds, options.General.AutoRebuildNavMeshTimeoutMs); - Navigation.BuildNavMesh(data.Scene, data.AfterBounds, options.General.AutoRebuildNavMeshTimeoutMs); + Navigation.BuildNavMesh(data.BeforeBounds, options.General.AutoRebuildNavMeshTimeoutMs); + Navigation.BuildNavMesh(data.AfterBounds, options.General.AutoRebuildNavMeshTimeoutMs); } } } diff --git a/Source/Editor/Utilities/ShuntingYardParser.cs b/Source/Editor/Utilities/ShuntingYardParser.cs index 47e2275e5..fe473389c 100644 --- a/Source/Editor/Utilities/ShuntingYardParser.cs +++ b/Source/Editor/Utilities/ShuntingYardParser.cs @@ -444,6 +444,9 @@ namespace FlaxEditor.Utilities /// The result value. public static double Parse(string text) { + // Hack to allow parsing numbers while using "_" as a separator (like this: 1_000) + text = text.Replace("_", string.Empty); + var tokens = Tokenize(text); var rpn = OrderTokens(tokens); return EvaluateRPN(rpn); diff --git a/Source/Editor/Viewport/EditorViewport.cs b/Source/Editor/Viewport/EditorViewport.cs index 5fb1c4657..04563e1cd 100644 --- a/Source/Editor/Viewport/EditorViewport.cs +++ b/Source/Editor/Viewport/EditorViewport.cs @@ -1069,6 +1069,7 @@ namespace FlaxEditor.Viewport InputActions.Add(options => options.Fog, () => Task.ViewFlags ^= ViewFlags.Fog); InputActions.Add(options => options.SpecularLight, () => Task.ViewFlags ^= ViewFlags.SpecularLight); InputActions.Add(options => options.Decals, () => Task.ViewFlags ^= ViewFlags.Decals); + InputActions.Add(options => options.Particles, () => Task.ViewFlags ^= ViewFlags.Particles); InputActions.Add(options => options.CustomPostProcess, () => Task.ViewFlags ^= ViewFlags.CustomPostProcess); InputActions.Add(options => options.Bloom, () => Task.ViewFlags ^= ViewFlags.Bloom); InputActions.Add(options => options.ToneMapping, () => Task.ViewFlags ^= ViewFlags.ToneMapping); @@ -2147,6 +2148,7 @@ namespace FlaxEditor.Viewport new ViewFlagOptions(ViewFlags.Fog, "Fog", Editor.Instance.Options.Options.Input.Fog), new ViewFlagOptions(ViewFlags.SpecularLight, "Specular Light", Editor.Instance.Options.Options.Input.SpecularLight), new ViewFlagOptions(ViewFlags.Decals, "Decals", Editor.Instance.Options.Options.Input.Decals), + new ViewFlagOptions(ViewFlags.Particles, "Particles", Editor.Instance.Options.Options.Input.Particles), new ViewFlagOptions(ViewFlags.CustomPostProcess, "Custom Post Process", Editor.Instance.Options.Options.Input.CustomPostProcess), new ViewFlagOptions(ViewFlags.Bloom, "Bloom", Editor.Instance.Options.Options.Input.Bloom), new ViewFlagOptions(ViewFlags.ToneMapping, "Tone Mapping", Editor.Instance.Options.Options.Input.ToneMapping), @@ -2166,12 +2168,13 @@ namespace FlaxEditor.Viewport if (cm.Visible == false) return; var ccm = (ContextMenu)cm; + var flags = Task.View.Flags; foreach (var e in ccm.Items) { if (e is ContextMenuButton b && b.Tag != null) { var v = (ViewFlags)b.Tag; - b.Icon = (Task.View.Flags & v) != 0 ? Style.Current.CheckBoxTick : SpriteHandle.Invalid; + b.Icon = (flags & v) != 0 ? Style.Current.CheckBoxTick : SpriteHandle.Invalid; } } } diff --git a/Source/Editor/Viewport/MainEditorGizmoViewport.cs b/Source/Editor/Viewport/MainEditorGizmoViewport.cs index e9980db8b..743797b68 100644 --- a/Source/Editor/Viewport/MainEditorGizmoViewport.cs +++ b/Source/Editor/Viewport/MainEditorGizmoViewport.cs @@ -629,7 +629,7 @@ namespace FlaxEditor.Viewport base.OnLeftMouseButtonDown(); if (!IsAltKeyDown) - _rubberBandSelector.TryStartingRubberBandSelection(); + _rubberBandSelector.TryStartingRubberBandSelection(_viewMousePos); } /// diff --git a/Source/Editor/Windows/EditorOptionsWindow.cs b/Source/Editor/Windows/EditorOptionsWindow.cs index 0ee9a92d7..c6bf2fd16 100644 --- a/Source/Editor/Windows/EditorOptionsWindow.cs +++ b/Source/Editor/Windows/EditorOptionsWindow.cs @@ -45,7 +45,7 @@ namespace FlaxEditor.Windows { Parent = this }; - _saveButton = (ToolStripButton)toolstrip.AddButton(editor.Icons.Save64, SaveData).LinkTooltip("Save"); + _saveButton = (ToolStripButton)toolstrip.AddButton(editor.Icons.Save64, SaveData).LinkTooltip("Save."); _saveButton.Enabled = false; _tabs = new Tabs @@ -104,6 +104,8 @@ namespace FlaxEditor.Windows { _saveButton.Enabled = true; _isDataDirty = true; + if (!Title.EndsWith('*')) + Title += "*"; } } @@ -113,6 +115,8 @@ namespace FlaxEditor.Windows { _saveButton.Enabled = false; _isDataDirty = false; + if (Title.EndsWith('*')) + Title = Title.Remove(Title.Length - 1); } } diff --git a/Source/Engine/Content/Assets/Material.cpp b/Source/Engine/Content/Assets/Material.cpp index 019fd9dd8..b4cf55d4d 100644 --- a/Source/Engine/Content/Assets/Material.cpp +++ b/Source/Engine/Content/Assets/Material.cpp @@ -41,6 +41,35 @@ bool Material::IsMaterialInstance() const return false; } +#if USE_EDITOR + +void Material::GetReferences(Array& assets, Array& files) const +{ + ShaderAssetTypeBase::GetReferences(assets, files); + + // Collect references from material graph (needs to load it) + if (!WaitForLoaded() && HasChunk(SHADER_FILE_CHUNK_VISJECT_SURFACE)) + { + ScopeLock lock(Locker); + if (!LoadChunks(GET_CHUNK_FLAG(SHADER_FILE_CHUNK_VISJECT_SURFACE))) + { + const auto surfaceChunk = GetChunk(SHADER_FILE_CHUNK_VISJECT_SURFACE); + if (surfaceChunk) + { + MemoryReadStream stream(surfaceChunk->Get(), surfaceChunk->Size()); + MaterialGraph graph; + if (!graph.Load(&stream, false)) + { + graph.GetReferences(assets); + } + } + } + } + +} + +#endif + const MaterialInfo& Material::GetInfo() const { if (_materialShader) diff --git a/Source/Engine/Content/Assets/Material.h b/Source/Engine/Content/Assets/Material.h index 4ce47b154..cd2ae8e97 100644 --- a/Source/Engine/Content/Assets/Material.h +++ b/Source/Engine/Content/Assets/Material.h @@ -38,6 +38,9 @@ public: public: // [MaterialBase] bool IsMaterialInstance() const override; +#if USE_EDITOR + void GetReferences(Array& assets, Array& files) const override; +#endif // [IMaterial] const MaterialInfo& GetInfo() const override; diff --git a/Source/Engine/ContentImporters/ImportModel.cpp b/Source/Engine/ContentImporters/ImportModel.cpp index 91547dc8d..f3548dc5c 100644 --- a/Source/Engine/ContentImporters/ImportModel.cpp +++ b/Source/Engine/ContentImporters/ImportModel.cpp @@ -478,16 +478,23 @@ CreateAssetResult ImportModel::Import(CreateAssetContext& context) } // Check if restore local changes on asset reimport + constexpr bool RestoreModelOptionsOnReimport = true; constexpr bool RestoreAnimEventsOnReimport = true; + const bool restoreModelOptions = RestoreModelOptionsOnReimport && (options.Type == ModelTool::ModelType::Model || options.Type == ModelTool::ModelType::SkinnedModel); const bool restoreMaterials = options.RestoreMaterialsOnReimport && data->Materials.HasItems(); const bool restoreAnimEvents = RestoreAnimEventsOnReimport && options.Type == ModelTool::ModelType::Animation && data->Animations.HasItems(); - if ((restoreMaterials || restoreAnimEvents) && FileSystem::FileExists(context.TargetAssetPath)) + if ((restoreModelOptions || restoreMaterials || restoreAnimEvents) && FileSystem::FileExists(context.TargetAssetPath)) { AssetReference asset = Content::LoadAsync(context.TargetAssetPath); if (asset && !asset->WaitForLoaded()) { auto* model = ScriptingObject::Cast(asset); auto* animation = ScriptingObject::Cast(asset); + if (restoreModelOptions && model) + { + // Copy general properties + data->MinScreenSize = model->MinScreenSize; + } if (restoreMaterials && model) { // Copy material settings diff --git a/Source/Engine/Debug/DebugCommands.cpp b/Source/Engine/Debug/DebugCommands.cpp index fa171d5dd..58cf2894b 100644 --- a/Source/Engine/Debug/DebugCommands.cpp +++ b/Source/Engine/Debug/DebugCommands.cpp @@ -8,6 +8,7 @@ #include "Engine/Threading/Threading.h" #include "Engine/Threading/Task.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Scripting/BinaryModule.h" #include "Engine/Scripting/Scripting.h" #include "Engine/Scripting/ManagedCLR/MAssembly.h" @@ -219,6 +220,7 @@ namespace if (module == GetBinaryModuleCorlib()) return; PROFILE_CPU(); + PROFILE_MEM(EngineDebug); #if USE_CSHARP if (auto* managedModule = dynamic_cast(module)) @@ -381,6 +383,7 @@ DebugCommandsService DebugCommandsServiceInstance; void DebugCommands::Execute(StringView command) { + PROFILE_MEM(EngineDebug); // TODO: fix missing string handle on 1st command execution (command gets invalid after InitCommands due to dotnet GC or dotnet interop handles flush) String commandCopy = command; command = commandCopy; @@ -423,6 +426,7 @@ void DebugCommands::Search(StringView searchText, Array& matches, bo { if (searchText.IsEmpty()) return; + PROFILE_MEM(EngineDebug); // TODO: fix missing string handle on 1st command execution (command gets invalid after InitCommands due to dotnet GC or dotnet interop handles flush) String searchTextCopy = searchText; searchText = searchTextCopy; diff --git a/Source/Engine/Debug/DebugDraw.cpp b/Source/Engine/Debug/DebugDraw.cpp index 7c798f88f..18552dcec 100644 --- a/Source/Engine/Debug/DebugDraw.cpp +++ b/Source/Engine/Debug/DebugDraw.cpp @@ -480,6 +480,7 @@ DebugDrawCall WriteLists(int32& vertexCounter, const Array& listA, const Arra FORCE_INLINE DebugTriangle* AppendTriangles(int32 count, float duration, bool depthTest) { + PROFILE_MEM(EngineDebug); Array* list; if (depthTest) list = duration > 0 ? &Context->DebugDrawDepthTest.DefaultTriangles : &Context->DebugDrawDepthTest.OneFrameTriangles; @@ -490,6 +491,19 @@ FORCE_INLINE DebugTriangle* AppendTriangles(int32 count, float duration, bool de return list->Get() + startIndex; } +FORCE_INLINE DebugTriangle* AppendWireTriangles(int32 count, float duration, bool depthTest) +{ + PROFILE_MEM(EngineDebug); + Array* list; + if (depthTest) + list = duration > 0 ? &Context->DebugDrawDepthTest.DefaultWireTriangles : &Context->DebugDrawDepthTest.OneFrameWireTriangles; + else + list = duration > 0 ? &Context->DebugDrawDefault.DefaultWireTriangles : &Context->DebugDrawDefault.OneFrameWireTriangles; + const int32 startIndex = list->Count(); + list->AddUninitialized(count); + return list->Get() + startIndex; +} + inline void DrawText3D(const DebugText3D& t, const RenderContext& renderContext, const Float3& viewUp, const Matrix& f, const Matrix& vp, const Viewport& viewport, GPUContext* context, GPUTextureView* target, GPUTextureView* depthBuffer) { Matrix w, fw, m; @@ -527,7 +541,7 @@ DebugDrawService DebugDrawServiceInstance; bool DebugDrawService::Init() { - PROFILE_MEM(Graphics); + PROFILE_MEM(EngineDebug); Context = &GlobalContext; // Init wireframe sphere cache @@ -646,7 +660,7 @@ void DebugDrawService::Update() } PROFILE_CPU(); - PROFILE_MEM(Graphics); + PROFILE_MEM(EngineDebug); // Update lists float deltaTime = Time::Update.DeltaTime.GetTotalSeconds(); @@ -1102,6 +1116,7 @@ void DebugDraw::DrawRay(const Ray& ray, const Color& color, float length, float void DebugDraw::DrawLine(const Vector3& start, const Vector3& end, const Color& color, float duration, bool depthTest) { + PROFILE_MEM(EngineDebug); const Float3 startF = start - Context->Origin, endF = end - Context->Origin; auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; if (duration > 0) @@ -1120,6 +1135,7 @@ void DebugDraw::DrawLine(const Vector3& start, const Vector3& end, const Color& void DebugDraw::DrawLine(const Vector3& start, const Vector3& end, const Color& startColor, const Color& endColor, float duration, bool depthTest) { + PROFILE_MEM(EngineDebug); const Float3 startF = start - Context->Origin, endF = end - Context->Origin; auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; if (duration > 0) @@ -1149,6 +1165,7 @@ void DebugDraw::DrawLines(const Span& lines, const Matrix& transform, co } // Draw lines + PROFILE_MEM(EngineDebug); const Float3* p = lines.Get(); auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; const Matrix transformF = transform * Matrix::Translation(-Context->Origin); @@ -1188,6 +1205,7 @@ void DebugDraw::DrawLines(GPUBuffer* lines, const Matrix& transform, float durat } // Draw lines + PROFILE_MEM(EngineDebug); auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; auto& geometry = debugDrawData.GeometryBuffers.AddOne(); geometry.Buffer = lines; @@ -1212,6 +1230,7 @@ void DebugDraw::DrawLines(const Span& lines, const Matrix& transform, c } // Draw lines + PROFILE_MEM(EngineDebug); const Double3* p = lines.Get(); auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; const Matrix transformF = transform * Matrix::Translation(-Context->Origin); @@ -1258,6 +1277,7 @@ void DebugDraw::DrawBezier(const Vector3& p1, const Vector3& p2, const Vector3& const float segmentCountInv = 1.0f / (float)segmentCount; // Draw segmented curve from lines + PROFILE_MEM(EngineDebug); auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; if (duration > 0) { @@ -1298,6 +1318,7 @@ void DebugDraw::DrawWireBox(const BoundingBox& box, const Color& color, float du c -= Context->Origin; // Draw lines + PROFILE_MEM(EngineDebug); auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; if (duration > 0) { @@ -1332,6 +1353,7 @@ void DebugDraw::DrawWireFrustum(const BoundingFrustum& frustum, const Color& col c -= Context->Origin; // Draw lines + PROFILE_MEM(EngineDebug); auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; if (duration > 0) { @@ -1366,6 +1388,7 @@ void DebugDraw::DrawWireBox(const OrientedBoundingBox& box, const Color& color, c -= Context->Origin; // Draw lines + PROFILE_MEM(EngineDebug); auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; if (duration > 0) { @@ -1407,6 +1430,7 @@ void DebugDraw::DrawWireSphere(const BoundingSphere& sphere, const Color& color, auto& cache = SphereCache[index]; // Draw lines of the unit sphere after linear transform + PROFILE_MEM(EngineDebug); auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; if (duration > 0) { @@ -1442,6 +1466,7 @@ void DebugDraw::DrawSphere(const BoundingSphere& sphere, const Color& color, flo list = duration > 0 ? &Context->DebugDrawDepthTest.DefaultTriangles : &Context->DebugDrawDepthTest.OneFrameTriangles; else list = duration > 0 ? &Context->DebugDrawDefault.DefaultTriangles : &Context->DebugDrawDefault.OneFrameTriangles; + PROFILE_MEM(EngineDebug); list->EnsureCapacity(list->Count() + SphereTriangleCache.Count()); const Float3 centerF = sphere.Center - Context->Origin; @@ -1473,6 +1498,7 @@ void DebugDraw::DrawCircle(const Vector3& position, const Float3& normal, float Matrix::Multiply(scale, world, matrix); // Draw lines of the unit circle after linear transform + PROFILE_MEM(EngineDebug); Float3 prev = Float3::Transform(CircleCache[0], matrix); auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; for (int32 i = 1; i < DEBUG_DRAW_CIRCLE_VERTICES;) @@ -1503,6 +1529,7 @@ void DebugDraw::DrawWireTriangle(const Vector3& v0, const Vector3& v1, const Vec void DebugDraw::DrawTriangle(const Vector3& v0, const Vector3& v1, const Vector3& v2, const Color& color, float duration, bool depthTest) { + PROFILE_MEM(EngineDebug); DebugTriangle t; t.Color = Color32(color); t.TimeLeft = duration; @@ -1558,6 +1585,7 @@ void DebugDraw::DrawTriangles(GPUBuffer* triangles, const Matrix& transform, flo DebugLog::ThrowException("Cannot draw debug lines with incorrect amount of items in array"); return; } + PROFILE_MEM(EngineDebug); auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; auto& geometry = debugDrawData.GeometryBuffers.AddOne(); geometry.Buffer = triangles; @@ -1714,7 +1742,7 @@ void DebugDraw::DrawWireTriangles(const Span& vertices, const Color& col DebugTriangle t; t.Color = Color32(color); t.TimeLeft = duration; - auto dst = AppendTriangles(vertices.Length() / 3, duration, depthTest); + auto dst = AppendWireTriangles(vertices.Length() / 3, duration, depthTest); const Float3 origin = Context->Origin; for (int32 i = 0; i < vertices.Length();) { @@ -1736,7 +1764,7 @@ void DebugDraw::DrawWireTriangles(const Span& vertices, const SpanOrigin; for (int32 i = 0; i < indices.Length();) { @@ -1758,7 +1786,7 @@ void DebugDraw::DrawWireTriangles(const Span& vertices, const Color& co DebugTriangle t; t.Color = Color32(color); t.TimeLeft = duration; - auto dst = AppendTriangles(vertices.Length() / 3, duration, depthTest); + auto dst = AppendWireTriangles(vertices.Length() / 3, duration, depthTest); const Double3 origin = Context->Origin; for (int32 i = 0; i < vertices.Length();) { @@ -1780,7 +1808,7 @@ void DebugDraw::DrawWireTriangles(const Span& vertices, const SpanOrigin; for (int32 i = 0; i < indices.Length();) { @@ -1847,6 +1875,7 @@ void DebugDraw::DrawWireCapsule(const Vector3& position, const Quaternion& orien Matrix::Multiply(rotation, translation, world); // Write vertices + PROFILE_MEM(EngineDebug); auto& debugDrawData = depthTest ? Context->DebugDrawDepthTest : Context->DebugDrawDefault; Color32 color32(color); if (duration > 0) @@ -1941,6 +1970,7 @@ namespace void DrawCylinder(Array* list, const Vector3& position, const Quaternion& orientation, float radius, float height, const Color& color, float duration) { // Setup cache + PROFILE_MEM(EngineDebug); Float3 CylinderCache[DEBUG_DRAW_CYLINDER_VERTICES]; const float angleBetweenFacets = TWO_PI / DEBUG_DRAW_CYLINDER_RESOLUTION; const float verticalOffset = height * 0.5f; @@ -2012,6 +2042,7 @@ namespace void DrawCone(Array* list, const Vector3& position, const Quaternion& orientation, float radius, float angleXY, float angleXZ, const Color& color, float duration) { + PROFILE_MEM(EngineDebug); const float tolerance = 0.001f; const float angle1 = Math::Clamp(angleXY, tolerance, PI - tolerance); const float angle2 = Math::Clamp(angleXZ, tolerance, PI - tolerance); @@ -2101,6 +2132,7 @@ void DebugDraw::DrawArc(const Vector3& position, const Quaternion& orientation, { if (angle <= 0) return; + PROFILE_MEM(EngineDebug); if (angle > TWO_PI) angle = TWO_PI; Array* list; @@ -2133,6 +2165,7 @@ void DebugDraw::DrawWireArc(const Vector3& position, const Quaternion& orientati { if (angle <= 0) return; + PROFILE_MEM(EngineDebug); if (angle > TWO_PI) angle = TWO_PI; const int32 resolution = Math::CeilToInt((float)DEBUG_DRAW_CONE_RESOLUTION / TWO_PI * angle); @@ -2199,6 +2232,7 @@ void DebugDraw::DrawBox(const BoundingBox& box, const Color& color, float durati list = duration > 0 ? &Context->DebugDrawDepthTest.DefaultTriangles : &Context->DebugDrawDepthTest.OneFrameTriangles; else list = duration > 0 ? &Context->DebugDrawDefault.DefaultTriangles : &Context->DebugDrawDefault.OneFrameTriangles; + PROFILE_MEM(EngineDebug); list->EnsureCapacity(list->Count() + 36); for (int i0 = 0; i0 < 36;) { @@ -2227,6 +2261,7 @@ void DebugDraw::DrawBox(const OrientedBoundingBox& box, const Color& color, floa list = duration > 0 ? &Context->DebugDrawDepthTest.DefaultTriangles : &Context->DebugDrawDepthTest.OneFrameTriangles; else list = duration > 0 ? &Context->DebugDrawDefault.DefaultTriangles : &Context->DebugDrawDefault.OneFrameTriangles; + PROFILE_MEM(EngineDebug); list->EnsureCapacity(list->Count() + 36); for (int i0 = 0; i0 < 36;) { @@ -2242,6 +2277,7 @@ void DebugDraw::DrawText(const StringView& text, const Float2& position, const C { if (text.Length() == 0 || size < 4) return; + PROFILE_MEM(EngineDebug); Array* list = duration > 0 ? &Context->DebugDrawDefault.DefaultText2D : &Context->DebugDrawDefault.OneFrameText2D; auto& t = list->AddOne(); t.Text.Resize(text.Length() + 1); @@ -2257,6 +2293,7 @@ void DebugDraw::DrawText(const StringView& text, const Vector3& position, const { if (text.Length() == 0 || size < 4) return; + PROFILE_MEM(EngineDebug); Array* list = duration > 0 ? &Context->DebugDrawDefault.DefaultText3D : &Context->DebugDrawDefault.OneFrameText3D; auto& t = list->AddOne(); t.Text.Resize(text.Length() + 1); @@ -2274,6 +2311,7 @@ void DebugDraw::DrawText(const StringView& text, const Transform& transform, con { if (text.Length() == 0 || size < 4) return; + PROFILE_MEM(EngineDebug); Array* list = duration > 0 ? &Context->DebugDrawDefault.DefaultText3D : &Context->DebugDrawDefault.OneFrameText3D; auto& t = list->AddOne(); t.Text.Resize(text.Length() + 1); diff --git a/Source/Engine/Foliage/Foliage.cpp b/Source/Engine/Foliage/Foliage.cpp index ddc3468f7..f8b9c7b0f 100644 --- a/Source/Engine/Foliage/Foliage.cpp +++ b/Source/Engine/Foliage/Foliage.cpp @@ -44,20 +44,39 @@ void Foliage::AddToCluster(ChunkedArray ZeroTolerance); ASSERT(cluster->Bounds.Intersects(instance.Bounds)); - // Find target cluster - while (cluster->Children[0]) + // Minor clusters don't use bounds intersection but try to find the first free cluster instead + if (cluster->IsMinor) { + // Insert into the first non-full child cluster or subdivide 1st child +#define CHECK_CHILD(idx) \ + if (cluster->Children[idx]->Instances.Count() < FOLIAGE_CLUSTER_CAPACITY) \ + { \ + cluster->Children[idx]->Instances.Add(&instance); \ + return; \ + } + CHECK_CHILD(3); + CHECK_CHILD(2); + CHECK_CHILD(1); + cluster = cluster->Children[0]; +#undef CHECK_CHILD + } + else + { + // Find target cluster + while (cluster->Children[0]) + { #define CHECK_CHILD(idx) \ if (cluster->Children[idx]->Bounds.Intersects(instance.Bounds)) \ { \ cluster = cluster->Children[idx]; \ continue; \ } - CHECK_CHILD(0); - CHECK_CHILD(1); - CHECK_CHILD(2); - CHECK_CHILD(3); + CHECK_CHILD(0); + CHECK_CHILD(1); + CHECK_CHILD(2); + CHECK_CHILD(3); #undef CHECK_CHILD + } } // Check if it's not full @@ -79,11 +98,20 @@ void Foliage::AddToCluster(ChunkedArrayBounds.Minimum; const Vector3 max = cluster->Bounds.Maximum; - const Vector3 size = cluster->Bounds.GetSize(); + const Vector3 size = max - min; cluster->Children[0]->Init(BoundingBox(min, min + size * Vector3(0.5f, 1.0f, 0.5f))); cluster->Children[1]->Init(BoundingBox(min + size * Vector3(0.5f, 0.0f, 0.5f), max)); cluster->Children[2]->Init(BoundingBox(min + size * Vector3(0.5f, 0.0f, 0.0f), min + size * Vector3(1.0f, 1.0f, 0.5f))); cluster->Children[3]->Init(BoundingBox(min + size * Vector3(0.0f, 0.0f, 0.5f), min + size * Vector3(0.5f, 1.0f, 1.0f))); + if (cluster->IsMinor || size.MinValue() < 1.0f) + { + // Mark children as minor to avoid infinite subdivision + cluster->IsMinor = true; + cluster->Children[0]->IsMinor = true; + cluster->Children[1]->IsMinor = true; + cluster->Children[2]->IsMinor = true; + cluster->Children[3]->IsMinor = true; + } // Move instances to a proper cells for (int32 i = 0; i < cluster->Instances.Count(); i++) diff --git a/Source/Engine/Foliage/FoliageCluster.cpp b/Source/Engine/Foliage/FoliageCluster.cpp index 1f76e5086..fd4c0f753 100644 --- a/Source/Engine/Foliage/FoliageCluster.cpp +++ b/Source/Engine/Foliage/FoliageCluster.cpp @@ -9,6 +9,7 @@ void FoliageCluster::Init(const BoundingBox& bounds) Bounds = bounds; TotalBounds = bounds; MaxCullDistance = 0.0f; + IsMinor = false; Children[0] = nullptr; Children[1] = nullptr; diff --git a/Source/Engine/Foliage/FoliageCluster.h b/Source/Engine/Foliage/FoliageCluster.h index 55cbeb027..c55305c5d 100644 --- a/Source/Engine/Foliage/FoliageCluster.h +++ b/Source/Engine/Foliage/FoliageCluster.h @@ -33,6 +33,11 @@ public: /// float MaxCullDistance; + /// + /// Flag used by clusters that are not typical quad-tree nodes but have no volume (eg. lots of instances placed on top of each other). + /// + int32 IsMinor : 1; + /// /// The child clusters. If any element is valid then all are created. /// diff --git a/Source/Engine/Graphics/Enums.h b/Source/Engine/Graphics/Enums.h index f6af6c16b..107fe3533 100644 --- a/Source/Engine/Graphics/Enums.h +++ b/Source/Engine/Graphics/Enums.h @@ -1075,20 +1075,25 @@ API_ENUM(Attributes="Flags") enum class ViewFlags : uint64 /// LightsDebug = 1 << 27, + /// + /// Shows/hides particle effects. + /// + Particles = 1 << 28, + /// /// Default flags for Game. /// - DefaultGame = Reflections | DepthOfField | Fog | Decals | MotionBlur | SSR | AO | GI | DirectionalLights | PointLights | SpotLights | SkyLights | Shadows | SpecularLight | AntiAliasing | CustomPostProcess | Bloom | ToneMapping | EyeAdaptation | CameraArtifacts | LensFlares | ContactShadows | GlobalSDF | Sky, + DefaultGame = Reflections | DepthOfField | Fog | Decals | MotionBlur | SSR | AO | GI | DirectionalLights | PointLights | SpotLights | SkyLights | Shadows | SpecularLight | AntiAliasing | CustomPostProcess | Bloom | ToneMapping | EyeAdaptation | CameraArtifacts | LensFlares | ContactShadows | GlobalSDF | Sky | Particles, /// /// Default flags for Editor. /// - DefaultEditor = Reflections | Fog | Decals | DebugDraw | SSR | AO | GI | DirectionalLights | PointLights | SpotLights | SkyLights | Shadows | SpecularLight | AntiAliasing | CustomPostProcess | Bloom | ToneMapping | EyeAdaptation | CameraArtifacts | LensFlares | EditorSprites | ContactShadows | GlobalSDF | Sky, + DefaultEditor = Reflections | Fog | Decals | DebugDraw | SSR | AO | GI | DirectionalLights | PointLights | SpotLights | SkyLights | Shadows | SpecularLight | AntiAliasing | CustomPostProcess | Bloom | ToneMapping | EyeAdaptation | CameraArtifacts | LensFlares | EditorSprites | ContactShadows | GlobalSDF | Sky | Particles, /// /// Default flags for materials/models previews generating. /// - DefaultAssetPreview = Reflections | Decals | DirectionalLights | PointLights | SpotLights | SkyLights | SpecularLight | AntiAliasing | Bloom | ToneMapping | EyeAdaptation | CameraArtifacts | LensFlares | ContactShadows | Sky, + DefaultAssetPreview = Reflections | Decals | DirectionalLights | PointLights | SpotLights | SkyLights | SpecularLight | AntiAliasing | Bloom | ToneMapping | EyeAdaptation | CameraArtifacts | LensFlares | ContactShadows | Sky | Particles, }; DECLARE_ENUM_OPERATORS(ViewFlags); diff --git a/Source/Engine/Graphics/Materials/MaterialShader.h b/Source/Engine/Graphics/Materials/MaterialShader.h index 5da4ee04f..bb68520c0 100644 --- a/Source/Engine/Graphics/Materials/MaterialShader.h +++ b/Source/Engine/Graphics/Materials/MaterialShader.h @@ -10,7 +10,7 @@ /// /// Current materials shader version. /// -#define MATERIAL_GRAPH_VERSION 178 +#define MATERIAL_GRAPH_VERSION 179 class Material; class GPUShader; diff --git a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp index 64dfe8303..19f2042f4 100644 --- a/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp +++ b/Source/Engine/Graphics/Materials/MaterialShaderFeatures.cpp @@ -191,7 +191,7 @@ bool GlobalIlluminationFeature::Bind(MaterialShader::BindParameters& params, Spa { // Unbind SRVs to prevent issues data.DDGI.CascadesCount = 0; - data.DDGI.FallbackIrradiance = Float3::Zero; + data.DDGI.FallbackIrradiance = Float4::Zero; params.GPUContext->UnBindSR(srv + 0); params.GPUContext->UnBindSR(srv + 1); params.GPUContext->UnBindSR(srv + 2); diff --git a/Source/Engine/Graphics/Models/MeshAccessor.h b/Source/Engine/Graphics/Models/MeshAccessor.h index 67b30e502..25fc01a1a 100644 --- a/Source/Engine/Graphics/Models/MeshAccessor.h +++ b/Source/Engine/Graphics/Models/MeshAccessor.h @@ -17,7 +17,7 @@ public: /// /// Mesh data stream. /// - struct Stream + struct FLAXENGINE_API Stream { friend MeshAccessor; diff --git a/Source/Engine/Graphics/PostProcessSettings.h b/Source/Engine/Graphics/PostProcessSettings.h index 670d99611..a300063e7 100644 --- a/Source/Engine/Graphics/PostProcessSettings.h +++ b/Source/Engine/Graphics/PostProcessSettings.h @@ -378,7 +378,7 @@ API_STRUCT() struct FLAXENGINE_API GlobalIlluminationSettings : ISerializable /// The irradiance lighting outside the GI range used as a fallback to prevent pure-black scene outside the Global Illumination range. /// API_FIELD(Attributes="EditorOrder(40), PostProcessSetting((int)GlobalIlluminationSettingsOverride.FallbackIrradiance)") - Color FallbackIrradiance = Color::Black; + Color FallbackIrradiance = Color::Transparent; public: /// diff --git a/Source/Engine/Graphics/RenderTools.cpp b/Source/Engine/Graphics/RenderTools.cpp index b0d587c8d..effbe6e1b 100644 --- a/Source/Engine/Graphics/RenderTools.cpp +++ b/Source/Engine/Graphics/RenderTools.cpp @@ -620,6 +620,40 @@ void RenderTools::ComputeSphereModelDrawMatrix(const RenderView& view, const Flo resultIsViewInside = Float3::DistanceSquared(view.Position, position) < Math::Square(radius * 1.1f); // Manually tweaked bias } +Float3 RenderTools::GetColorQuantizationError(PixelFormat format) +{ + Float3 mantissaBits; + switch (format) + { + case PixelFormat::R11G11B10_Float: + mantissaBits = Float3(6, 6, 5); + break; + case PixelFormat::R10G10B10A2_UNorm: + mantissaBits = Float3(10, 10, 10); + break; + case PixelFormat::R16G16B16A16_Float: + mantissaBits = Float3(16, 16, 16); + break; + case PixelFormat::R32G32B32A32_Float: + mantissaBits = Float3(23, 23, 23); + break; + case PixelFormat::R9G9B9E5_SharedExp: + mantissaBits = Float3(5, 6, 5); + break; + case PixelFormat::R8G8B8A8_UNorm: + case PixelFormat::B8G8R8A8_UNorm: + mantissaBits = Float3(8, 8, 8); + break; + default: + return Float3::Zero; + } + return { + Math::Pow(0.5f, mantissaBits.X), + Math::Pow(0.5f, mantissaBits.Y), + Math::Pow(0.5f, mantissaBits.Z) + }; +} + int32 MipLevelsCount(int32 width) { int32 result = 1; diff --git a/Source/Engine/Graphics/RenderTools.h b/Source/Engine/Graphics/RenderTools.h index 18357a13a..5f0dc23dc 100644 --- a/Source/Engine/Graphics/RenderTools.h +++ b/Source/Engine/Graphics/RenderTools.h @@ -140,6 +140,9 @@ public: static void CalculateTangentFrame(Float3& resultNormal, Float4& resultTangent, const Float3& normal, const Float3& tangent); static void ComputeSphereModelDrawMatrix(const RenderView& view, const Float3& position, float radius, Matrix& resultWorld, bool& resultIsViewInside); + + // Calculates error for a given render target format to reduce floating-point precision artifacts via QuantizeColor (from Noise.hlsl). + static Float3 GetColorQuantizationError(PixelFormat format); }; // Calculates mip levels count for a texture 1D. diff --git a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp index 23382673f..05c6d605a 100644 --- a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp +++ b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp @@ -59,7 +59,7 @@ namespace elements.Get()[j].Slot = (byte)slot; } } - GPUVertexLayout* result = anyValid ? GPUVertexLayout::Get(elements) : nullptr; + GPUVertexLayout* result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr; VertexBufferCache.Add(key, result); return result; } @@ -97,6 +97,7 @@ GPUVertexLayout::GPUVertexLayout() void GPUVertexLayout::SetElements(const Elements& elements, bool explicitOffsets) { uint32 offsets[GPU_MAX_VB_BINDED + 1] = {}; + uint32 maxOffset[GPU_MAX_VB_BINDED + 1] = {}; _elements = elements; for (int32 i = 0; i < _elements.Count(); i++) { @@ -108,9 +109,10 @@ void GPUVertexLayout::SetElements(const Elements& elements, bool explicitOffsets else e.Offset = (byte)offset; offset += PixelFormatExtensions::SizeInBytes(e.Format); + maxOffset[e.Slot] = Math::Max(maxOffset[e.Slot], offset); } _stride = 0; - for (uint32 offset : offsets) + for (uint32 offset : maxOffset) _stride += offset; } @@ -139,7 +141,7 @@ VertexElement GPUVertexLayout::FindElement(VertexElement::Types type) const GPUVertexLayout* GPUVertexLayout::Get(const Elements& elements, bool explicitOffsets) { // Hash input layout - uint32 hash = 0; + uint32 hash = explicitOffsets ? 131 : 0; for (const VertexElement& element : elements) { CombineHash(hash, GetHash(element)); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUShaderProgramDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUShaderProgramDX11.h index 319e1a939..567cbb618 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUShaderProgramDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUShaderProgramDX11.h @@ -4,6 +4,7 @@ #include "Engine/Graphics/Shaders/GPUShaderProgram.h" #include "Engine/Core/Types/DataContainer.h" +#include "Engine/Core/Collections/Dictionary.h" #include "../IncludeDirectXHeaders.h" #if GRAPHICS_API_DIRECTX11 diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 98143c7c3..4dc923234 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -3,16 +3,9 @@ #if GRAPHICS_API_DIRECTX12 #include "Engine/Graphics/Config.h" +#include "Engine/Platform/Platform.h" +#include "../IncludeDirectXHeaders.h" #if USE_PIX && GPU_ALLOW_PROFILE_EVENTS -// Include these header files before pix3 -#define WIN32_LEAN_AND_MEAN -#define NOMINMAX -#define NOGDI -#define NODRAWTEXT -//#define NOCTLMGR -#define NOFLATSBAPIS -#include -#include #include #endif #include "GPUContextDX12.h" diff --git a/Source/Engine/Input/Input.cpp b/Source/Engine/Input/Input.cpp index 1f33f9fef..6860c3463 100644 --- a/Source/Engine/Input/Input.cpp +++ b/Source/Engine/Input/Input.cpp @@ -81,6 +81,8 @@ Delegate Input::MouseWheel; Delegate Input::MouseMove; Delegate Input::MouseMoveRelative; Action Input::MouseLeave; +Delegate Input::GamepadButtonDown; +Delegate Input::GamepadButtonUp; Delegate Input::TouchDown; Delegate Input::TouchMove; Delegate Input::TouchUp; @@ -1045,6 +1047,19 @@ void InputService::Update() break; } } + // TODO: route gamepad button events into global InputEvents queue to improve processing + for (int32 i = 0; i < Input::Gamepads.Count(); i++) + { + auto gamepad = Input::Gamepads[i]; + for (int32 buttonIdx = 1; buttonIdx < (int32)GamepadButton::MAX; buttonIdx++) + { + GamepadButton button = (GamepadButton)buttonIdx; + if (gamepad->GetButtonDown(button)) + Input::GamepadButtonDown((InputGamepadIndex)i, button); + else if (gamepad->GetButtonUp(button)) + Input::GamepadButtonUp((InputGamepadIndex)i, button); + } + } // Update all actions for (int32 i = 0; i < Input::ActionMappings.Count(); i++) diff --git a/Source/Engine/Input/Input.h b/Source/Engine/Input/Input.h index 0021f99a5..964a247d9 100644 --- a/Source/Engine/Input/Input.h +++ b/Source/Engine/Input/Input.h @@ -118,6 +118,16 @@ public: /// API_EVENT() static Action MouseLeave; + /// + /// Event fired when gamepad button goes down. + /// + API_EVENT() static Delegate GamepadButtonDown; + + /// + /// Event fired when gamepad button goes up. + /// + API_EVENT() static Delegate GamepadButtonUp; + /// /// Event fired when touch action begins. /// diff --git a/Source/Engine/Level/Actors/Light.cpp b/Source/Engine/Level/Actors/Light.cpp index ad7fe54a2..6de7977c9 100644 --- a/Source/Engine/Level/Actors/Light.cpp +++ b/Source/Engine/Level/Actors/Light.cpp @@ -107,6 +107,7 @@ void LightWithShadow::Serialize(SerializeStream& stream, const void* otherObj) SERIALIZE(ContactShadowsLength); SERIALIZE(ShadowsUpdateRate); SERIALIZE(ShadowsUpdateRateAtDistance); + SERIALIZE(ShadowsResolution); } void LightWithShadow::Deserialize(DeserializeStream& stream, ISerializeModifier* modifier) @@ -125,4 +126,5 @@ void LightWithShadow::Deserialize(DeserializeStream& stream, ISerializeModifier* DESERIALIZE(ContactShadowsLength); DESERIALIZE(ShadowsUpdateRate); DESERIALIZE(ShadowsUpdateRateAtDistance); + DESERIALIZE(ShadowsResolution); } diff --git a/Source/Engine/Navigation/NavMesh.cpp b/Source/Engine/Navigation/NavMesh.cpp index 5593d732a..5932607a0 100644 --- a/Source/Engine/Navigation/NavMesh.cpp +++ b/Source/Engine/Navigation/NavMesh.cpp @@ -25,6 +25,7 @@ NavMesh::NavMesh(const SpawnParams& params) void NavMesh::SaveNavMesh() { #if COMPILE_WITH_ASSETS_IMPORTER + PROFILE_MEM(NavigationMesh); // Skip if scene is missing const auto scene = GetScene(); @@ -111,7 +112,7 @@ void NavMesh::OnAssetLoaded(Asset* asset, void* caller) if (Data.Tiles.HasItems()) return; ScopeLock lock(DataAsset->Locker); - PROFILE_MEM(Navigation); + PROFILE_MEM(NavigationMesh); // Remove added tiles if (_navMeshActive) diff --git a/Source/Engine/Navigation/NavMeshBoundsVolume.cpp b/Source/Engine/Navigation/NavMeshBoundsVolume.cpp index 56351fded..c54f2f072 100644 --- a/Source/Engine/Navigation/NavMeshBoundsVolume.cpp +++ b/Source/Engine/Navigation/NavMeshBoundsVolume.cpp @@ -6,7 +6,7 @@ #if USE_EDITOR #include "Editor/Editor.h" #include "Editor/Managed/ManagedEditor.h" -#include "NavMeshBuilder.h" +#include "Navigation.h" #endif NavMeshBoundsVolume::NavMeshBoundsVolume(const SpawnParams& params) @@ -55,9 +55,30 @@ void NavMeshBoundsVolume::OnBoundsChanged(const BoundingBox& prevBounds) // Auto-rebuild modified navmesh area if (IsDuringPlay() && IsActiveInHierarchy() && !Editor::IsPlayMode && Editor::Managed->CanAutoBuildNavMesh()) { - BoundingBox dirtyBounds; - BoundingBox::Merge(prevBounds, _box, dirtyBounds); - NavMeshBuilder::Build(GetScene(), dirtyBounds, ManagedEditor::ManagedEditorOptions.AutoRebuildNavMeshTimeoutMs); + if (_box.Intersects(prevBounds)) + { + // Bounds were moved a bit so merge into a single request (for performance reasons) + BoundingBox dirtyBounds; + BoundingBox::Merge(prevBounds, _box, dirtyBounds); + Navigation::BuildNavMesh(dirtyBounds, ManagedEditor::ManagedEditorOptions.AutoRebuildNavMeshTimeoutMs); + } + else + { + // Dirty each bounds in separate + Navigation::BuildNavMesh(prevBounds, ManagedEditor::ManagedEditorOptions.AutoRebuildNavMeshTimeoutMs); + Navigation::BuildNavMesh(_box, ManagedEditor::ManagedEditorOptions.AutoRebuildNavMeshTimeoutMs); + } + } +} + +void NavMeshBoundsVolume::OnActiveInTreeChanged() +{ + BoxVolume::OnActiveInTreeChanged(); + + // Auto-rebuild + if (IsDuringPlay() && !Editor::IsPlayMode && Editor::Managed->CanAutoBuildNavMesh()) + { + Navigation::BuildNavMesh(_box, ManagedEditor::ManagedEditorOptions.AutoRebuildNavMeshTimeoutMs); } } diff --git a/Source/Engine/Navigation/NavMeshBoundsVolume.h b/Source/Engine/Navigation/NavMeshBoundsVolume.h index c04bc0483..80df5035a 100644 --- a/Source/Engine/Navigation/NavMeshBoundsVolume.h +++ b/Source/Engine/Navigation/NavMeshBoundsVolume.h @@ -30,6 +30,7 @@ protected: void OnDisable() override; #if USE_EDITOR void OnBoundsChanged(const BoundingBox& prevBounds) override; + void OnActiveInTreeChanged() override; Color GetWiresColor() override; #endif }; diff --git a/Source/Engine/Navigation/NavMeshBuilder.cpp b/Source/Engine/Navigation/NavMeshBuilder.cpp index e92173846..896cf4217 100644 --- a/Source/Engine/Navigation/NavMeshBuilder.cpp +++ b/Source/Engine/Navigation/NavMeshBuilder.cpp @@ -3,6 +3,7 @@ #if COMPILE_WITH_NAV_MESH_BUILDER #include "NavMeshBuilder.h" +#include "Navigation.h" #include "NavMesh.h" #include "NavigationSettings.h" #include "NavMeshBoundsVolume.h" @@ -706,6 +707,7 @@ struct BuildRequest ScriptingObjectReference Scene; DateTime Time; BoundingBox DirtyBounds; + bool SpecificScene; }; CriticalSection NavBuildQueueLocker; @@ -713,6 +715,7 @@ Array NavBuildQueue; CriticalSection NavBuildTasksLocker; int32 NavBuildTasksMaxCount = 0; +bool NavBuildCheckMissingNavMeshes = false; Array NavBuildTasks; class NavMeshTileBuildTask : public ThreadPoolTask @@ -733,7 +736,7 @@ public: bool Run() override { PROFILE_CPU_NAMED("BuildNavMeshTile"); - PROFILE_MEM(Navigation); + PROFILE_MEM(NavigationBuilding); const auto navMesh = NavMesh.Get(); if (!navMesh) return false; @@ -776,13 +779,13 @@ void CancelNavMeshTileBuildTasks(NavMeshRuntime* runtime) NavBuildTasksLocker.Unlock(); } -void CancelNavMeshTileBuildTasks(NavMeshRuntime* runtime, int32 x, int32 y) +void CancelNavMeshTileBuildTasks(NavMeshRuntime* runtime, int32 x, int32 y, NavMesh* navMesh) { NavBuildTasksLocker.Lock(); for (int32 i = 0; i < NavBuildTasks.Count(); i++) { auto task = NavBuildTasks[i]; - if (task->Runtime == runtime && task->X == x && task->Y == y) + if (task->Runtime == runtime && task->X == x && task->Y == y && task->NavMesh == navMesh) { NavBuildTasksLocker.Unlock(); @@ -838,7 +841,7 @@ void NavMeshBuilder::Init() Level::SceneUnloading.Bind(); } -bool NavMeshBuilder::IsBuildingNavMesh() +bool Navigation::IsBuildingNavMesh() { NavBuildTasksLocker.Lock(); const bool hasAnyTask = NavBuildTasks.HasItems(); @@ -847,7 +850,7 @@ bool NavMeshBuilder::IsBuildingNavMesh() return hasAnyTask; } -float NavMeshBuilder::GetNavMeshBuildingProgress() +float Navigation::GetNavMeshBuildingProgress() { NavBuildTasksLocker.Lock(); float result = 1.0f; @@ -907,15 +910,13 @@ void BuildDirtyBounds(Scene* scene, NavMesh* navMesh, const BoundingBox& dirtyBo // Align dirty bounds to tile size BoundingBox dirtyBoundsNavMesh; BoundingBox::Transform(dirtyBounds, worldToNavMesh, dirtyBoundsNavMesh); - BoundingBox dirtyBoundsAligned; - dirtyBoundsAligned.Minimum = Float3::Floor(dirtyBoundsNavMesh.Minimum / tileSize) * tileSize; - dirtyBoundsAligned.Maximum = Float3::Ceil(dirtyBoundsNavMesh.Maximum / tileSize) * tileSize; + dirtyBoundsNavMesh.Minimum = Float3::Floor(dirtyBoundsNavMesh.Minimum / tileSize) * tileSize; + dirtyBoundsNavMesh.Maximum = Float3::Ceil(dirtyBoundsNavMesh.Maximum / tileSize) * tileSize; // Calculate tiles range for the given navigation dirty bounds (aligned to tiles size) - const Int3 tilesMin(dirtyBoundsAligned.Minimum / tileSize); - const Int3 tilesMax(dirtyBoundsAligned.Maximum / tileSize); - const int32 tilesX = tilesMax.X - tilesMin.X; - const int32 tilesY = tilesMax.Z - tilesMin.Z; + const Int3 tilesMin(dirtyBoundsNavMesh.Minimum / tileSize); + const Int3 tilesMax(dirtyBoundsNavMesh.Maximum / tileSize); + const int32 tilesXZ = (tilesMax.X - tilesMin.X) * (tilesMax.Z - tilesMin.Z); { PROFILE_CPU_NAMED("Prepare"); @@ -932,18 +933,18 @@ void BuildDirtyBounds(Scene* scene, NavMesh* navMesh, const BoundingBox& dirtyBo // Remove all tiles from navmesh runtime runtime->RemoveTiles(navMesh); runtime->SetTileSize(tileSize); - runtime->EnsureCapacity(tilesX * tilesY); + runtime->EnsureCapacity(tilesXZ); // Remove all tiles from navmesh data navMesh->Data.TileSize = tileSize; navMesh->Data.Tiles.Clear(); - navMesh->Data.Tiles.EnsureCapacity(tilesX * tilesX); + navMesh->Data.Tiles.EnsureCapacity(tilesXZ); navMesh->IsDataDirty = true; } else { // Ensure to have enough memory for tiles - runtime->EnsureCapacity(tilesX * tilesY); + runtime->EnsureCapacity(tilesXZ); } runtime->Locker.Unlock(); @@ -959,11 +960,10 @@ void BuildDirtyBounds(Scene* scene, NavMesh* navMesh, const BoundingBox& dirtyBo // Cache navmesh volumes Array> volumes; - for (int32 i = 0; i < scene->Navigation.Volumes.Count(); i++) + for (const NavMeshBoundsVolume* volume : scene->Navigation.Volumes) { - const auto volume = scene->Navigation.Volumes.Get()[i]; if (!volume->AgentsMask.IsNavMeshSupported(navMesh->Properties) || - !volume->GetBox().Intersects(dirtyBoundsAligned)) + !volume->GetBox().Intersects(dirtyBoundsNavMesh)) continue; auto& bounds = volumes.AddOne(); BoundingBox::Transform(volume->GetBox(), worldToNavMesh, bounds); @@ -1026,7 +1026,7 @@ void BuildDirtyBounds(Scene* scene, NavMesh* navMesh, const BoundingBox& dirtyBo for (const auto& tile : unusedTiles) { // Wait for any async tasks that are producing this tile - CancelNavMeshTileBuildTasks(runtime, tile.X, tile.Y); + CancelNavMeshTileBuildTasks(runtime, tile.X, tile.Y, navMesh); } runtime->Locker.Lock(); for (const auto& tile : unusedTiles) @@ -1095,6 +1095,7 @@ void BuildDirtyBounds(Scene* scene, const BoundingBox& dirtyBounds, bool rebuild else if (settings->AutoAddMissingNavMeshes) { // Spawn missing navmesh + PROFILE_MEM(Navigation); navMesh = New(); navMesh->SetStaticFlags(StaticFlags::FullyStatic); navMesh->SetName(TEXT("NavMesh.") + navMeshProperties.Name); @@ -1108,39 +1109,6 @@ void BuildDirtyBounds(Scene* scene, const BoundingBox& dirtyBounds, bool rebuild { BuildDirtyBounds(scene, navMesh, dirtyBounds, rebuild); } - - // Remove unused navmeshes - if (settings->AutoRemoveMissingNavMeshes) - { - for (NavMesh* navMesh : scene->Navigation.Meshes) - { - // Skip used navmeshes - if (navMesh->Data.Tiles.HasItems()) - continue; - - // Skip navmeshes during async building - int32 usageCount = 0; - NavBuildTasksLocker.Lock(); - for (int32 i = 0; i < NavBuildTasks.Count(); i++) - { - if (NavBuildTasks.Get()[i]->NavMesh == navMesh) - usageCount++; - } - NavBuildTasksLocker.Unlock(); - if (usageCount != 0) - continue; - - navMesh->DeleteObject(); - } - } -} - -void BuildWholeScene(Scene* scene) -{ - // Compute total navigation area bounds - const BoundingBox worldBounds = scene->Navigation.GetNavigationBounds(); - - BuildDirtyBounds(scene, worldBounds, true); } void ClearNavigation(Scene* scene) @@ -1154,22 +1122,58 @@ void ClearNavigation(Scene* scene) } } +void BuildNavigation(BuildRequest& request) +{ + // If scene is not specified then build all loaded scenes + if (!request.Scene) + { + for (Scene* scene : Level::Scenes) + { + request.Scene = scene; + BuildNavigation(request); + } + return; + } + + // Early out if scene is not using navigation + if (request.Scene->Navigation.Volumes.IsEmpty()) + { + ClearNavigation(request.Scene); + return; + } + + // Check if similar request is already in a queue + for (auto& e : NavBuildQueue) + { + if (e.Scene == request.Scene && (e.DirtyBounds == request.DirtyBounds || request.DirtyBounds == BoundingBox::Empty)) + { + e = request; + return; + } + } + + // Enqueue request + NavBuildQueue.Add(request); +} + void NavMeshBuilder::Update() { - PROFILE_MEM(Navigation); + PROFILE_MEM(NavigationBuilding); ScopeLock lock(NavBuildQueueLocker); // Process nav mesh building requests and kick the tasks const auto now = DateTime::NowUTC(); + bool didRebuild = false; for (int32 i = 0; NavBuildQueue.HasItems() && i < NavBuildQueue.Count(); i++) { auto req = NavBuildQueue.Get()[i]; if (now - req.Time >= 0) { NavBuildQueue.RemoveAt(i--); - const auto scene = req.Scene.Get(); + Scene* scene = req.Scene.Get(); if (!scene) continue; + bool rebuild = req.DirtyBounds == BoundingBox::Empty; // Early out if scene has no bounds volumes to define nav mesh area if (scene->Navigation.Volumes.IsEmpty()) @@ -1179,80 +1183,69 @@ void NavMeshBuilder::Update() } // Check if build a custom dirty bounds or whole scene - if (req.DirtyBounds == BoundingBox::Empty) - { - BuildWholeScene(scene); - } + if (rebuild) + req.DirtyBounds = scene->Navigation.GetNavigationBounds(); // Compute total navigation area bounds + if (didRebuild) + rebuild = false; // When rebuilding navmesh for multiple scenes, rebuild only the first one (other scenes will use additive update) else + didRebuild = true; + BuildDirtyBounds(scene, req.DirtyBounds, rebuild); + NavBuildCheckMissingNavMeshes = true; + } + } + + // Remove unused navmeshes (when all active tasks are done) + // TODO: ignore AutoRemoveMissingNavMeshes in game and make it editor-only? + if (NavBuildCheckMissingNavMeshes && NavBuildTasksMaxCount == 0 && NavigationSettings::Get()->AutoRemoveMissingNavMeshes) + { + NavBuildCheckMissingNavMeshes = false; + NavBuildTasksLocker.Lock(); + int32 taskCount = NavBuildTasks.Count(); + NavBuildTasksLocker.Unlock(); + if (taskCount == 0) + { + for (Scene* scene : Level::Scenes) { - BuildDirtyBounds(scene, req.DirtyBounds, false); + for (NavMesh* navMesh : scene->Navigation.Meshes) + { + if (!navMesh->Data.Tiles.HasItems()) + { + navMesh->DeleteObject(); + } + } } } } } -void NavMeshBuilder::Build(Scene* scene, float timeoutMs) +void Navigation::BuildNavMesh(Scene* scene, float timeoutMs) { - if (!scene) - { - LOG(Warning, "Could not generate navmesh without scene."); - return; - } - - // Early out if scene is not using navigation - if (scene->Navigation.Volumes.IsEmpty()) - { - ClearNavigation(scene); - return; - } - - PROFILE_CPU_NAMED("NavMeshBuilder"); - PROFILE_MEM(Navigation); + PROFILE_CPU(); + PROFILE_MEM(NavigationBuilding); ScopeLock lock(NavBuildQueueLocker); BuildRequest req; req.Scene = scene; req.Time = DateTime::NowUTC() + TimeSpan::FromMilliseconds(timeoutMs); req.DirtyBounds = BoundingBox::Empty; - - for (int32 i = 0; i < NavBuildQueue.Count(); i++) - { - auto& e = NavBuildQueue.Get()[i]; - if (e.Scene == scene && e.DirtyBounds == req.DirtyBounds) - { - e = req; - return; - } - } - - NavBuildQueue.Add(req); + req.SpecificScene = scene != nullptr; + BuildNavigation(req); } -void NavMeshBuilder::Build(Scene* scene, const BoundingBox& dirtyBounds, float timeoutMs) +void Navigation::BuildNavMesh(const BoundingBox& dirtyBounds, Scene* scene, float timeoutMs) { - if (!scene) - { - LOG(Warning, "Could not generate navmesh without scene."); - return; - } - - // Early out if scene is not using navigation - if (scene->Navigation.Volumes.IsEmpty()) - { - ClearNavigation(scene); - return; - } - - PROFILE_CPU_NAMED("NavMeshBuilder"); - PROFILE_MEM(Navigation); + if (dirtyBounds.GetVolume() <= ZeroTolerance) + return; // Skip updating empty bounds + PROFILE_CPU(); + PROFILE_MEM(NavigationBuilding); ScopeLock lock(NavBuildQueueLocker); BuildRequest req; req.Scene = scene; req.Time = DateTime::NowUTC() + TimeSpan::FromMilliseconds(timeoutMs); req.DirtyBounds = dirtyBounds; - - NavBuildQueue.Add(req); + req.SpecificScene = scene != nullptr; + BuildNavigation(req); } #endif diff --git a/Source/Engine/Navigation/NavMeshBuilder.h b/Source/Engine/Navigation/NavMeshBuilder.h index a3477db27..355bac7de 100644 --- a/Source/Engine/Navigation/NavMeshBuilder.h +++ b/Source/Engine/Navigation/NavMeshBuilder.h @@ -15,11 +15,7 @@ class FLAXENGINE_API NavMeshBuilder { public: static void Init(); - static bool IsBuildingNavMesh(); - static float GetNavMeshBuildingProgress(); static void Update(); - static void Build(Scene* scene, float timeoutMs); - static void Build(Scene* scene, const BoundingBox& dirtyBounds, float timeoutMs); }; #endif diff --git a/Source/Engine/Navigation/NavMeshRuntime.cpp b/Source/Engine/Navigation/NavMeshRuntime.cpp index 2758077c6..0ace29415 100644 --- a/Source/Engine/Navigation/NavMeshRuntime.cpp +++ b/Source/Engine/Navigation/NavMeshRuntime.cpp @@ -5,6 +5,9 @@ #include "NavMesh.h" #include "Engine/Core/Log.h" #include "Engine/Core/Random.h" +#if COMPILE_WITH_DEBUG_DRAW +#include "Engine/Level/Scene/Scene.h" +#endif #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/ProfilerMemory.h" #include "Engine/Threading/Threading.h" @@ -326,7 +329,7 @@ void NavMeshRuntime::EnsureCapacity(int32 tilesToAddCount) if (newTilesCount <= capacity) return; PROFILE_CPU_NAMED("NavMeshRuntime.EnsureCapacity"); - PROFILE_MEM(Navigation); + PROFILE_MEM(NavigationMesh); // Navmesh tiles capacity growing rule int32 newCapacity = capacity ? capacity : 32; @@ -387,7 +390,7 @@ void NavMeshRuntime::AddTiles(NavMesh* navMesh) return; auto& data = navMesh->Data; PROFILE_CPU_NAMED("NavMeshRuntime.AddTiles"); - PROFILE_MEM(Navigation); + PROFILE_MEM(NavigationMesh); ScopeLock lock(Locker); // Validate data (must match navmesh) or init navmesh to match the tiles options @@ -419,7 +422,7 @@ void NavMeshRuntime::AddTile(NavMesh* navMesh, NavMeshTileData& tileData) ASSERT(navMesh); auto& data = navMesh->Data; PROFILE_CPU_NAMED("NavMeshRuntime.AddTile"); - PROFILE_MEM(Navigation); + PROFILE_MEM(NavigationMesh); ScopeLock lock(Locker); // Validate data (must match navmesh) or init navmesh to match the tiles options @@ -603,7 +606,21 @@ void NavMeshRuntime::DebugDraw() if (!tile->header) continue; - //DebugDraw::DrawWireBox(*(BoundingBox*)&tile->header->bmin[0], Color::CadetBlue); +#if 0 + // Debug draw tile bounds and owner scene name + BoundingBox tileBounds = *(BoundingBox*)&tile->header->bmin[0]; + DebugDraw::DrawWireBox(tileBounds, Color::CadetBlue); + // TODO: build map from tile coords to tile data to avoid this loop + for (const auto& e : _tiles) + { + if (e.X == tile->header->x && e.Y == tile->header->y && e.Layer == tile->header->layer) + { + if (e.NavMesh && e.NavMesh->GetScene()) + DebugDraw::DrawText(e.NavMesh->GetScene()->GetName(), tileBounds.Minimum + tileBounds.GetSize() * Float3(0.5f, 0.8f, 0.5f), Color::CadetBlue); + break; + } + } +#endif for (int i = 0; i < tile->header->polyCount; i++) { diff --git a/Source/Engine/Navigation/NavMeshRuntime.h b/Source/Engine/Navigation/NavMeshRuntime.h index 1ca6607b9..9e1165196 100644 --- a/Source/Engine/Navigation/NavMeshRuntime.h +++ b/Source/Engine/Navigation/NavMeshRuntime.h @@ -111,7 +111,7 @@ public: /// The start position. /// The result hit information. Valid only when query succeed. /// The maximum distance to search for wall (search radius). - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() bool FindDistanceToWall(const Vector3& startPosition, NavMeshHit& hitInfo, float maxDistance = MAX_float) const; /// @@ -187,7 +187,7 @@ public: /// The start position. /// The end position. /// The result hit information. Valid only when query succeed. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() bool RayCast(const Vector3& startPosition, const Vector3& endPosition, API_PARAM(Out) NavMeshHit& hitInfo) const; public: diff --git a/Source/Engine/Navigation/NavModifierVolume.cpp b/Source/Engine/Navigation/NavModifierVolume.cpp index 9e1295f70..aa71e7aa1 100644 --- a/Source/Engine/Navigation/NavModifierVolume.cpp +++ b/Source/Engine/Navigation/NavModifierVolume.cpp @@ -2,7 +2,7 @@ #include "NavModifierVolume.h" #include "NavigationSettings.h" -#include "NavMeshBuilder.h" +#include "Navigation.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Serialization/Serialization.h" #if USE_EDITOR @@ -83,7 +83,7 @@ void NavModifierVolume::OnBoundsChanged(const BoundingBox& prevBounds) #else const float timeoutMs = 0.0f; #endif - NavMeshBuilder::Build(GetScene(), dirtyBounds, timeoutMs); + Navigation::BuildNavMesh(dirtyBounds, GetScene(), timeoutMs); } #endif } diff --git a/Source/Engine/Navigation/Navigation.cpp b/Source/Engine/Navigation/Navigation.cpp index 34983652f..908819765 100644 --- a/Source/Engine/Navigation/Navigation.cpp +++ b/Source/Engine/Navigation/Navigation.cpp @@ -180,7 +180,7 @@ NavigationService NavigationServiceInstance; void* dtAllocDefault(size_t size, dtAllocHint) { - PROFILE_MEM(Navigation); + PROFILE_MEM(NavigationMesh); return Allocator::Allocate(size); } @@ -382,30 +382,6 @@ bool Navigation::RayCast(const Vector3& startPosition, const Vector3& endPositio return NavMeshes.First()->RayCast(startPosition, endPosition, hitInfo); } -#if COMPILE_WITH_NAV_MESH_BUILDER - -bool Navigation::IsBuildingNavMesh() -{ - return NavMeshBuilder::IsBuildingNavMesh(); -} - -float Navigation::GetNavMeshBuildingProgress() -{ - return NavMeshBuilder::GetNavMeshBuildingProgress(); -} - -void Navigation::BuildNavMesh(Scene* scene, float timeoutMs) -{ - NavMeshBuilder::Build(scene, timeoutMs); -} - -void Navigation::BuildNavMesh(Scene* scene, const BoundingBox& dirtyBounds, float timeoutMs) -{ - NavMeshBuilder::Build(scene, dirtyBounds, timeoutMs); -} - -#endif - #if COMPILE_WITH_DEBUG_DRAW void Navigation::DrawNavMesh() diff --git a/Source/Engine/Navigation/Navigation.h b/Source/Engine/Navigation/Navigation.h index 4d8b181e7..80c8eb84a 100644 --- a/Source/Engine/Navigation/Navigation.h +++ b/Source/Engine/Navigation/Navigation.h @@ -19,7 +19,7 @@ public: /// The start position. /// The result hit information. Valid only when query succeed. /// The maximum distance to search for wall (search radius). - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() static bool FindDistanceToWall(const Vector3& startPosition, API_PARAM(Out) NavMeshHit& hitInfo, float maxDistance = MAX_float); /// @@ -81,12 +81,10 @@ public: /// The start position. /// The end position. /// The result hit information. Valid only when query succeed. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() static bool RayCast(const Vector3& startPosition, const Vector3& endPosition, API_PARAM(Out) NavMeshHit& hitInfo); -public: #if COMPILE_WITH_NAV_MESH_BUILDER - /// /// Returns true if navigation system is during navmesh building (any request is valid or async task active). /// @@ -100,32 +98,49 @@ public: /// /// Builds the Nav Mesh for the given scene (discards all its tiles). /// - /// - /// Requests are enqueued till the next game scripts update. Actual navmesh building in done via Thread Pool tasks in a background to prevent game thread stalls. - /// - /// The scene. + /// Requests are enqueued till the next game scripts update. Actual navmesh building in done via Thread Pool tasks in a background to prevent game thread stalls. + /// The scene. Pass null to build navmesh for all loaded scenes. /// The timeout to wait before building Nav Mesh (in milliseconds). - API_FUNCTION() static void BuildNavMesh(Scene* scene, float timeoutMs = 50); + API_FUNCTION() static void BuildNavMesh(Scene* scene = nullptr, float timeoutMs = 50); /// /// Builds the Nav Mesh for the given scene (builds only the tiles overlapping the given bounding box). /// - /// - /// Requests are enqueued till the next game scripts update. Actual navmesh building in done via Thread Pool tasks in a background to prevent game thread stalls. - /// + /// Requests are enqueued till the next game scripts update. Actual navmesh building in done via Thread Pool tasks in a background to prevent game thread stalls. + /// The bounds in world-space to build overlapping tiles. + /// The scene. Pass null to build navmesh for all loaded scenes that intersect with a given bounds. + /// The timeout to wait before building Nav Mesh (in milliseconds). + API_FUNCTION() static void BuildNavMesh(const BoundingBox& dirtyBounds, Scene* scene = nullptr, float timeoutMs = 50); + + /// + /// Builds the Nav Mesh for all the loaded scenes (builds only the tiles overlapping the given bounding box). + /// + /// Requests are enqueued till the next game scripts update. Actual navmesh building in done via Thread Pool tasks in a background to prevent game thread stalls. + /// The bounds in world-space to build overlapping tiles. + /// The timeout to wait before building Nav Mesh (in milliseconds). + API_FUNCTION() static void BuildNavMesh(const BoundingBox& dirtyBounds, float timeoutMs = 50) + { + BuildNavMesh(dirtyBounds, nullptr, timeoutMs); + } + + /// + /// Builds the Nav Mesh for the given scene (builds only the tiles overlapping the given bounding box). + /// [Deprecated in v1.12] + /// + /// Requests are enqueued till the next game scripts update. Actual navmesh building in done via Thread Pool tasks in a background to prevent game thread stalls. /// The scene. /// The bounds in world-space to build overlapping tiles. /// The timeout to wait before building Nav Mesh (in milliseconds). - API_FUNCTION() static void BuildNavMesh(Scene* scene, const BoundingBox& dirtyBounds, float timeoutMs = 50); - + API_FUNCTION() DEPRECATED("Use BuildNavMesh with reordered arguments instead") static void BuildNavMesh(Scene* scene, const BoundingBox& dirtyBounds, float timeoutMs = 50) + { + BuildNavMesh(dirtyBounds, scene, timeoutMs); + } #endif #if COMPILE_WITH_DEBUG_DRAW - /// /// Draws the navigation for all the scenes (uses DebugDraw interface). /// static void DrawNavMesh(); - #endif }; diff --git a/Source/Engine/Particles/ParticleEffect.cpp b/Source/Engine/Particles/ParticleEffect.cpp index 6e94594b0..14467664c 100644 --- a/Source/Engine/Particles/ParticleEffect.cpp +++ b/Source/Engine/Particles/ParticleEffect.cpp @@ -11,6 +11,10 @@ #include "Engine/Level/Scene/Scene.h" #include "Engine/Engine/Time.h" #include "Engine/Engine/Engine.h" +#if USE_EDITOR +#include "Editor/Editor.h" +#include "Editor/Managed/ManagedEditor.h" +#endif ParticleEffect::ParticleEffect(const SpawnParams& params) : Actor(params) @@ -465,7 +469,12 @@ void ParticleEffect::Update() if (UpdateMode == SimulationUpdateMode::FixedTimestep) { // Check if last simulation update was past enough to kick a new on - const float time = Time::Update.Time.GetTotalSeconds(); + bool useTimeScale = UseTimeScale; +#if USE_EDITOR + if (!Editor::IsPlayMode && IsDuringPlay()) + useTimeScale = false; +#endif + const float time = (useTimeScale ? Time::Update.Time : Time::Update.UnscaledTime).GetTotalSeconds(); if (time - Instance.LastUpdateTime < FixedTimestep) return; } @@ -475,9 +484,6 @@ void ParticleEffect::Update() #if USE_EDITOR -#include "Editor/Editor.h" -#include "Editor/Managed/ManagedEditor.h" - void ParticleEffect::UpdateExecuteInEditor() { // Auto-play in Editor @@ -601,7 +607,9 @@ bool ParticleEffect::HasContentLoaded() const void ParticleEffect::Draw(RenderContext& renderContext) { - if (renderContext.View.Pass == DrawPass::GlobalSDF || renderContext.View.Pass == DrawPass::GlobalSurfaceAtlas) + if (renderContext.View.Pass == DrawPass::GlobalSDF || + renderContext.View.Pass == DrawPass::GlobalSurfaceAtlas || + EnumHasNoneFlags(renderContext.View.Flags, ViewFlags::Particles)) return; _lastMinDstSqr = Math::Min(_lastMinDstSqr, Vector3::DistanceSquared(GetPosition(), renderContext.View.WorldPosition)); RenderContextBatch renderContextBatch(renderContext); @@ -610,10 +618,12 @@ void ParticleEffect::Draw(RenderContext& renderContext) void ParticleEffect::Draw(RenderContextBatch& renderContextBatch) { + const RenderView& mainView = renderContextBatch.GetMainContext().View; + if (EnumHasNoneFlags(mainView.Flags, ViewFlags::Particles)) + return; Particles::DrawParticles(renderContextBatch, this); // Cull again against the main context (if using multiple ones) to skip caching draw distance from shadow projections - const RenderView& mainView = renderContextBatch.GetMainContext().View; const BoundingSphere bounds(_sphere.Center - mainView.Origin, _sphere.Radius); if (renderContextBatch.Contexts.Count() > 1 && !mainView.CullingFrustum.Intersects(bounds)) return; diff --git a/Source/Engine/Physics/Colliders/BoxCollider.cpp b/Source/Engine/Physics/Colliders/BoxCollider.cpp index 1e90cb91f..47e551b37 100644 --- a/Source/Engine/Physics/Colliders/BoxCollider.cpp +++ b/Source/Engine/Physics/Colliders/BoxCollider.cpp @@ -23,15 +23,15 @@ void BoxCollider::SetSize(const Float3& value) void BoxCollider::AutoResize(bool globalOrientation = true) { Actor* parent = GetParent(); - if (Cast(parent)) + if (parent == nullptr || Cast(parent)) return; // Get bounds of all siblings (excluding itself) const Vector3 parentScale = parent->GetScale(); if (parentScale.IsAnyZero()) - return; // Avoid division by zero + return; - // Hacky way to get unrotated bounded box of parent. + // Hacky way to get unrotated bounded box of parent const Quaternion parentOrientation = parent->GetOrientation(); parent->SetOrientation(Quaternion::Identity); BoundingBox parentBox = parent->GetBox(); diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp index 3edfaa0c6..8c3d3610e 100644 --- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp +++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp @@ -2784,6 +2784,69 @@ float PhysicsBackend::ComputeShapeSqrDistanceToPoint(void* shape, const Vector3& { auto shapePhysX = (PxShape*)shape; const PxTransform trans(C2P(position), C2P(orientation)); + + // Special case for heightfield collider (not implemented in PhysX) + if (shapePhysX->getGeometryType() == PxGeometryType::eHEIGHTFIELD) + { + // Do a bunch of raycasts in all directions to find the closest point on the heightfield + PxVec3 origin = C2P(point); + Array unitDirections; + constexpr int32 resolution = 32; + unitDirections.EnsureCapacity((resolution + 1) * (resolution + 1)); + for (int32 i = 0; i <= resolution; i++) + { + float phi = PI * (float)i / resolution; + float sinPhi = Math::Sin(phi); + float cosPhi = Math::Cos(phi); + for (int32 j = 0; j <= resolution; j++) + { + float theta = 2.0f * PI * (float)j / resolution; + float cosTheta = Math::Cos(theta); + float sinTheta = Math::Sin(theta); + + PxVec3 v; + v.x = cosTheta * sinPhi; + v.y = cosPhi; + v.z = sinTheta * sinPhi; + + // All generated vectors are unit vectors (length 1) + unitDirections.Add(v); + } + } + + PxReal maxDistance = PX_MAX_REAL; // Search indefinitely + PxQueryFilterData filterData; + filterData.data.word0 = (PxU32)shapePhysX->getSimulationFilterData().word0; + PxHitFlags hitFlags = PxHitFlag::ePOSITION | PxHitFlag::eMESH_BOTH_SIDES; // Both sides added for if it is underneath the height field + PxRaycastBuffer buffer; + auto scene = shapePhysX->getActor()->getScene(); + + PxReal closestDistance = maxDistance; + PxVec3 tempClosestPoint; + for (PxVec3& unitDir : unitDirections) + { + bool hitResult = scene->raycast(origin, unitDir, maxDistance, buffer, hitFlags, filterData); + if (hitResult) + { + auto& hit = buffer.getAnyHit(0); + if (hit.distance < closestDistance && hit.distance > 0.0f) + { + tempClosestPoint = hit.position; + closestDistance = hit.distance; + } + } + } + + if (closestDistance < maxDistance) + { + *closestPoint = P2C(tempClosestPoint); + return closestDistance * closestDistance; // Result is squared distance + } + + return -1.0f; + } + + // Default point distance for other collider queries #if USE_LARGE_WORLDS PxVec3 closestPointPx; float result = PxGeometryQuery::pointDistance(C2P(point), shapePhysX->getGeometry(), trans, &closestPointPx); diff --git a/Source/Engine/Physics/Physics.h b/Source/Engine/Physics/Physics.h index 2fc116020..85cd5e77b 100644 --- a/Source/Engine/Physics/Physics.h +++ b/Source/Engine/Physics/Physics.h @@ -102,7 +102,7 @@ public: /// The end position of the line. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() static bool LineCast(const Vector3& start, const Vector3& end, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -113,18 +113,18 @@ public: /// The result hit information. Valid only when method returns true. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() static bool LineCast(const Vector3& start, const Vector3& end, API_PARAM(Out) RayCastHit& hitInfo, uint32 layerMask = MAX_uint32, bool hitTriggers = true); // - /// Performs a line between two points in the scene, returns all hitpoints infos. + /// Performs a line between two points in the scene, returns all hit points info. /// /// The origin of the ray. /// The end position of the line. /// The result hits. Valid only when method returns true. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() static bool LineCastAll(const Vector3& start, const Vector3& end, API_PARAM(Out) Array& results, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -135,7 +135,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() static bool RayCast(const Vector3& origin, const Vector3& direction, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -147,7 +147,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() static bool RayCast(const Vector3& origin, const Vector3& direction, API_PARAM(Out) RayCastHit& hitInfo, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -159,7 +159,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() static bool RayCastAll(const Vector3& origin, const Vector3& direction, API_PARAM(Out) Array& results, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -172,7 +172,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if box hits an matching object, otherwise false. + /// True if box hits a matching object, otherwise false. API_FUNCTION() static bool BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -186,7 +186,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if box hits an matching object, otherwise false. + /// True if box hits a matching object, otherwise false. API_FUNCTION() static bool BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, API_PARAM(Out) RayCastHit& hitInfo, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -200,7 +200,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if box hits an matching object, otherwise false. + /// True if box hits a matching object, otherwise false. API_FUNCTION() static bool BoxCastAll(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -212,7 +212,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if sphere hits an matching object, otherwise false. + /// True if sphere hits a matching object, otherwise false. API_FUNCTION() static bool SphereCast(const Vector3& center, float radius, const Vector3& direction, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -225,7 +225,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if sphere hits an matching object, otherwise false. + /// True if sphere hits a matching object, otherwise false. API_FUNCTION() static bool SphereCast(const Vector3& center, float radius, const Vector3& direction, API_PARAM(Out) RayCastHit& hitInfo, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -238,7 +238,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if sphere hits an matching object, otherwise false. + /// True if sphere hits a matching object, otherwise false. API_FUNCTION() static bool SphereCastAll(const Vector3& center, float radius, const Vector3& direction, API_PARAM(Out) Array& results, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -252,7 +252,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if capsule hits an matching object, otherwise false. + /// True if capsule hits a matching object, otherwise false. API_FUNCTION() static bool CapsuleCast(const Vector3& center, float radius, float height, const Vector3& direction, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -267,7 +267,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if capsule hits an matching object, otherwise false. + /// True if capsule hits a matching object, otherwise false. API_FUNCTION() static bool CapsuleCast(const Vector3& center, float radius, float height, const Vector3& direction, API_PARAM(Out) RayCastHit& hitInfo, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -282,7 +282,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if capsule hits an matching object, otherwise false. + /// True if capsule hits a matching object, otherwise false. API_FUNCTION() static bool CapsuleCastAll(const Vector3& center, float radius, float height, const Vector3& direction, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -296,7 +296,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if convex mesh hits an matching object, otherwise false. + /// True if convex mesh hits a matching object, otherwise false. API_FUNCTION() static bool ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -311,7 +311,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if convex mesh hits an matching object, otherwise false. + /// True if convex mesh hits a matching object, otherwise false. API_FUNCTION() static bool ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, API_PARAM(Out) RayCastHit& hitInfo, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -326,7 +326,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if convex mesh hits an matching object, otherwise false. + /// True if convex mesh hits a matching object, otherwise false. API_FUNCTION() static bool ConvexCastAll(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -375,7 +375,7 @@ public: API_FUNCTION() static bool CheckConvex(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given box. + /// Finds all colliders touching or inside the given box. /// /// The box center. /// The half size of the box in each direction. @@ -387,7 +387,7 @@ public: API_FUNCTION() static bool OverlapBox(const Vector3& center, const Vector3& halfExtents, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given sphere. + /// Finds all colliders touching or inside the given sphere. /// /// The sphere center. /// The radius of the sphere. @@ -398,7 +398,7 @@ public: API_FUNCTION() static bool OverlapSphere(const Vector3& center, float radius, API_PARAM(Out) Array& results, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given capsule. + /// Finds all colliders touching or inside the given capsule. /// /// The capsule center. /// The radius of the capsule. @@ -411,7 +411,7 @@ public: API_FUNCTION() static bool OverlapCapsule(const Vector3& center, float radius, float height, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given convex mesh. + /// Finds all colliders touching or inside the given convex mesh. /// /// The convex mesh center. /// Collision data of the convex mesh. @@ -424,7 +424,7 @@ public: API_FUNCTION() static bool OverlapConvex(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given box. + /// Finds all colliders touching or inside the given box. /// /// The box center. /// The half size of the box in each direction. @@ -436,7 +436,7 @@ public: API_FUNCTION() static bool OverlapBox(const Vector3& center, const Vector3& halfExtents, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given sphere. + /// Finds all colliders touching or inside the given sphere. /// /// The sphere center. /// The radius of the sphere. @@ -447,7 +447,7 @@ public: API_FUNCTION() static bool OverlapSphere(const Vector3& center, float radius, API_PARAM(Out) Array& results, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given capsule. + /// Finds all colliders touching or inside the given capsule. /// /// The capsule center. /// The radius of the capsule. @@ -460,7 +460,7 @@ public: API_FUNCTION() static bool OverlapCapsule(const Vector3& center, float radius, float height, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given convex mesh. + /// Finds all colliders touching or inside the given convex mesh. /// /// The convex mesh center. /// Collision data of the convex mesh. diff --git a/Source/Engine/Physics/PhysicsScene.h b/Source/Engine/Physics/PhysicsScene.h index 602e6f713..a7cb91cbe 100644 --- a/Source/Engine/Physics/PhysicsScene.h +++ b/Source/Engine/Physics/PhysicsScene.h @@ -140,7 +140,7 @@ public: /// The end position of the line. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() bool LineCast(const Vector3& start, const Vector3& end, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -151,18 +151,18 @@ public: /// The result hit information. Valid only when method returns true. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() bool LineCast(const Vector3& start, const Vector3& end, API_PARAM(Out) RayCastHit& hitInfo, uint32 layerMask = MAX_uint32, bool hitTriggers = true); // - /// Performs a line between two points in the scene, returns all hitpoints infos. + /// Performs a line between two points in the scene, returns all hit points info. /// /// The origin of the ray. /// The normalized direction of the ray. /// The result hits. Valid only when method returns true. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() bool LineCastAll(const Vector3& start, const Vector3& end, API_PARAM(Out) Array& results, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -173,7 +173,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() bool RayCast(const Vector3& origin, const Vector3& direction, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -185,7 +185,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() bool RayCast(const Vector3& origin, const Vector3& direction, API_PARAM(Out) RayCastHit& hitInfo, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -197,7 +197,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if ray hits an matching object, otherwise false. + /// True if ray hits a matching object, otherwise false. API_FUNCTION() bool RayCastAll(const Vector3& origin, const Vector3& direction, API_PARAM(Out) Array& results, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -210,7 +210,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if box hits an matching object, otherwise false. + /// True if box hits a matching object, otherwise false. API_FUNCTION() bool BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -224,7 +224,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if box hits an matching object, otherwise false. + /// True if box hits a matching object, otherwise false. API_FUNCTION() bool BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, API_PARAM(Out) RayCastHit& hitInfo, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -238,7 +238,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if box hits an matching object, otherwise false. + /// True if box hits a matching object, otherwise false. API_FUNCTION() bool BoxCastAll(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -250,7 +250,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if sphere hits an matching object, otherwise false. + /// True if sphere hits a matching object, otherwise false. API_FUNCTION() bool SphereCast(const Vector3& center, float radius, const Vector3& direction, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -263,7 +263,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if sphere hits an matching object, otherwise false. + /// True if sphere hits a matching object, otherwise false. API_FUNCTION() bool SphereCast(const Vector3& center, float radius, const Vector3& direction, API_PARAM(Out) RayCastHit& hitInfo, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -276,7 +276,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if sphere hits an matching object, otherwise false. + /// True if sphere hits a matching object, otherwise false. API_FUNCTION() bool SphereCastAll(const Vector3& center, float radius, const Vector3& direction, API_PARAM(Out) Array& results, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -290,7 +290,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if capsule hits an matching object, otherwise false. + /// True if capsule hits a matching object, otherwise false. API_FUNCTION() bool CapsuleCast(const Vector3& center, float radius, float height, const Vector3& direction, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -305,7 +305,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if capsule hits an matching object, otherwise false. + /// True if capsule hits a matching object, otherwise false. API_FUNCTION() bool CapsuleCast(const Vector3& center, float radius, float height, const Vector3& direction, API_PARAM(Out) RayCastHit& hitInfo, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -320,7 +320,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if capsule hits an matching object, otherwise false. + /// True if capsule hits a matching object, otherwise false. API_FUNCTION() bool CapsuleCastAll(const Vector3& center, float radius, float height, const Vector3& direction, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -334,7 +334,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if convex mesh hits an matching object, otherwise false. + /// True if convex mesh hits a matching object, otherwise false. API_FUNCTION() bool ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -349,7 +349,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if convex mesh hits an matching object, otherwise false. + /// True if convex mesh hits a matching object, otherwise false. API_FUNCTION() bool ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, API_PARAM(Out) RayCastHit& hitInfo, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -364,7 +364,7 @@ public: /// The maximum distance the ray should check for collisions. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. - /// True if convex mesh hits an matching object, otherwise false. + /// True if convex mesh hits a matching object, otherwise false. API_FUNCTION() bool ConvexCastAll(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, float maxDistance = MAX_float, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// @@ -413,7 +413,7 @@ public: API_FUNCTION() bool CheckConvex(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given box. + /// Finds all colliders touching or inside the given box. /// /// The box center. /// The half size of the box in each direction. @@ -425,7 +425,7 @@ public: API_FUNCTION() bool OverlapBox(const Vector3& center, const Vector3& halfExtents, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given sphere. + /// Finds all colliders touching or inside the given sphere. /// /// The sphere center. /// The radius of the sphere. @@ -436,7 +436,7 @@ public: API_FUNCTION() bool OverlapSphere(const Vector3& center, float radius, API_PARAM(Out) Array& results, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given capsule. + /// Finds all colliders touching or inside the given capsule. /// /// The capsule center. /// The radius of the capsule. @@ -449,7 +449,7 @@ public: API_FUNCTION() bool OverlapCapsule(const Vector3& center, float radius, float height, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given convex mesh. + /// Finds all colliders touching or inside the given convex mesh. /// /// The convex mesh center. /// Collision data of the convex mesh. @@ -462,7 +462,7 @@ public: API_FUNCTION() bool OverlapConvex(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given box. + /// Finds all colliders touching or inside the given box. /// /// The box center. /// The half size of the box in each direction. @@ -474,7 +474,7 @@ public: API_FUNCTION() bool OverlapBox(const Vector3& center, const Vector3& halfExtents, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given sphere. + /// Finds all colliders touching or inside the given sphere. /// /// The sphere center. /// The radius of the sphere. @@ -485,7 +485,7 @@ public: API_FUNCTION() bool OverlapSphere(const Vector3& center, float radius, API_PARAM(Out) Array& results, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given capsule. + /// Finds all colliders touching or inside the given capsule. /// /// The capsule center. /// The radius of the capsule. @@ -498,7 +498,7 @@ public: API_FUNCTION() bool OverlapCapsule(const Vector3& center, float radius, float height, API_PARAM(Out) Array& results, const Quaternion& rotation = Quaternion::Identity, uint32 layerMask = MAX_uint32, bool hitTriggers = true); /// - /// Finds all colliders touching or inside of the given convex mesh. + /// Finds all colliders touching or inside the given convex mesh. /// /// The convex mesh center. /// Collision data of the convex mesh. diff --git a/Source/Engine/Platform/Windows/WindowsPlatform.cpp b/Source/Engine/Platform/Windows/WindowsPlatform.cpp index 1f7683354..5fd6540ee 100644 --- a/Source/Engine/Platform/Windows/WindowsPlatform.cpp +++ b/Source/Engine/Platform/Windows/WindowsPlatform.cpp @@ -549,7 +549,6 @@ void WindowsPlatform::ReleaseMutex() } } -PRAGMA_DISABLE_OPTIMIZATION; void CheckInstructionSet() { #if PLATFORM_ARCH_X86 || PLATFORM_ARCH_X64 diff --git a/Source/Engine/Profiler/ProfilerMemory.cpp b/Source/Engine/Profiler/ProfilerMemory.cpp index c936ff5b2..6b8f18ce3 100644 --- a/Source/Engine/Profiler/ProfilerMemory.cpp +++ b/Source/Engine/Profiler/ProfilerMemory.cpp @@ -243,6 +243,7 @@ void InitProfilerMemory(const Char* cmdLine, int32 stage) #define INIT_PARENT(parent, child) GroupParents[(int32)ProfilerMemory::Groups::child] = (uint8)ProfilerMemory::Groups::parent INIT_PARENT(Engine, EngineThreading); INIT_PARENT(Engine, EngineDelegate); + INIT_PARENT(Engine, EngineDebug); INIT_PARENT(Malloc, MallocArena); INIT_PARENT(Graphics, GraphicsTextures); INIT_PARENT(Graphics, GraphicsRenderTargets); @@ -260,6 +261,8 @@ void InitProfilerMemory(const Char* cmdLine, int32 stage) INIT_PARENT(Content, ContentFiles); INIT_PARENT(Level, LevelFoliage); INIT_PARENT(Level, LevelTerrain); + INIT_PARENT(Navigation, NavigationMesh); + INIT_PARENT(Navigation, NavigationBuilding); INIT_PARENT(Scripting, ScriptingVisual); INIT_PARENT(Scripting, ScriptingCSharp); INIT_PARENT(ScriptingCSharp, ScriptingCSharpGCCommitted); diff --git a/Source/Engine/Profiler/ProfilerMemory.h b/Source/Engine/Profiler/ProfilerMemory.h index 5dddb912b..9177ae6e7 100644 --- a/Source/Engine/Profiler/ProfilerMemory.h +++ b/Source/Engine/Profiler/ProfilerMemory.h @@ -44,6 +44,8 @@ public: EngineThreading, // Memory used by Delegate (engine events system to store all references). EngineDelegate, + // Memory used by debug tools (eg. DebugDraw, DebugCommands or DebugLog). + EngineDebug, // Total graphics memory usage. Graphics, @@ -105,6 +107,10 @@ public: // Total navigation system memory. Navigation, + // Navigation mesh memory. + NavigationMesh, + // Navigation mesh builder memory. + NavigationBuilding, // Total networking system memory. Networking, diff --git a/Source/Engine/Renderer/ColorGradingPass.cpp b/Source/Engine/Renderer/ColorGradingPass.cpp index d6e164622..c0b40d3f6 100644 --- a/Source/Engine/Renderer/ColorGradingPass.cpp +++ b/Source/Engine/Renderer/ColorGradingPass.cpp @@ -37,8 +37,45 @@ GPU_CB_STRUCT(Data { Float3 Dummy; float LutWeight; + + void Init(const PostProcessSettings& settings, GPUTexture*& lut) + { + Dummy = Float2::Zero; + auto& toneMapping = settings.ToneMapping; + auto& colorGrading = settings.ColorGrading; + // White Balance + WhiteTemp = toneMapping.WhiteTemperature; + WhiteTint = toneMapping.WhiteTint; + // Shadows + ColorSaturationShadows = colorGrading.ColorSaturationShadows * colorGrading.ColorSaturation; + ColorContrastShadows = colorGrading.ColorContrastShadows * colorGrading.ColorContrast; + ColorGammaShadows = colorGrading.ColorGammaShadows * colorGrading.ColorGamma; + ColorGainShadows = colorGrading.ColorGainShadows * colorGrading.ColorGain; + ColorOffsetShadows = colorGrading.ColorOffsetShadows + colorGrading.ColorOffset; + ColorCorrectionShadowsMax = colorGrading.ShadowsMax; + // Midtones + ColorSaturationMidtones = colorGrading.ColorSaturationMidtones * colorGrading.ColorSaturation; + ColorContrastMidtones = colorGrading.ColorContrastMidtones * colorGrading.ColorContrast; + ColorGammaMidtones = colorGrading.ColorGammaMidtones * colorGrading.ColorGamma; + ColorGainMidtones = colorGrading.ColorGainMidtones * colorGrading.ColorGain; + ColorOffsetMidtones = colorGrading.ColorOffsetMidtones + colorGrading.ColorOffset; + // Highlights + ColorSaturationHighlights = colorGrading.ColorSaturationHighlights * colorGrading.ColorSaturation; + ColorContrastHighlights = colorGrading.ColorContrastHighlights * colorGrading.ColorContrast; + ColorGammaHighlights = colorGrading.ColorGammaHighlights * colorGrading.ColorGamma; + ColorGainHighlights = colorGrading.ColorGainHighlights * colorGrading.ColorGain; + ColorOffsetHighlights = colorGrading.ColorOffsetHighlights + colorGrading.ColorOffset; + ColorCorrectionHighlightsMin = colorGrading.HighlightsMin; + // + Texture* lutTexture = colorGrading.LutTexture.Get(); + const bool useLut = lutTexture && lutTexture->IsLoaded() && lutTexture->GetResidentMipLevels() > 0 && colorGrading.LutWeight > ZeroTolerance; + LutWeight = useLut ? colorGrading.LutWeight : 0.0f; + lut = useLut ? lutTexture->GetTexture() : nullptr; + } }); +Data DefaultData; + // Custom render buffer for caching Color Grading LUT. class ColorGradingCustomBuffer : public RenderBuffers::CustomBuffer { @@ -46,7 +83,7 @@ public: GPUTexture* LUT = nullptr; Data CachedData; ToneMappingMode Mode = ToneMappingMode::None; - Texture* LutTexture = nullptr; + GPUTexture* LutTexture = nullptr; #if COMPILE_WITH_DEV_ENV uint64 FrameRendered = 0; #endif @@ -82,6 +119,9 @@ bool ColorGradingPass::Init() #if COMPILE_WITH_DEV_ENV _shader.Get()->OnReloading.Bind(this); #endif + PostProcessSettings defaultSettings; + GPUTexture* defaultLut; + DefaultData.Init(defaultSettings, defaultLut); return false; } @@ -125,6 +165,18 @@ GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) { PROFILE_CPU(); + // Prepare the parameters + Data data; + GPUTexture* lutTexture; + auto& toneMapping = renderContext.List->Settings.ToneMapping; + data.Init(renderContext.List->Settings, lutTexture); + + // Skip if color grading is unsued + if (Platform::MemoryCompare(&DefaultData, &data, sizeof(Data)) == 0 && + lutTexture == nullptr && + toneMapping.Mode == ToneMappingMode::None) + return nullptr; + // Check if can use volume texture (3D) for a LUT (faster on modern platforms, requires geometry shader) const auto device = GPUDevice::Instance; bool use3D = GPU_ALLOW_GEOMETRY_SHADERS && Graphics::PostProcessing::ColorGradingVolumeLUT; @@ -172,41 +224,8 @@ GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) RENDER_TARGET_POOL_SET_NAME(colorGradingBuffer.LUT, "ColorGrading.LUT"); } - // Prepare the parameters - Data data; - data.Dummy = Float2::Zero; - auto& toneMapping = renderContext.List->Settings.ToneMapping; - auto& colorGrading = renderContext.List->Settings.ColorGrading; - // White Balance - data.WhiteTemp = toneMapping.WhiteTemperature; - data.WhiteTint = toneMapping.WhiteTint; - // Shadows - data.ColorSaturationShadows = colorGrading.ColorSaturationShadows * colorGrading.ColorSaturation; - data.ColorContrastShadows = colorGrading.ColorContrastShadows * colorGrading.ColorContrast; - data.ColorGammaShadows = colorGrading.ColorGammaShadows * colorGrading.ColorGamma; - data.ColorGainShadows = colorGrading.ColorGainShadows * colorGrading.ColorGain; - data.ColorOffsetShadows = colorGrading.ColorOffsetShadows + colorGrading.ColorOffset; - data.ColorCorrectionShadowsMax = colorGrading.ShadowsMax; - // Midtones - data.ColorSaturationMidtones = colorGrading.ColorSaturationMidtones * colorGrading.ColorSaturation; - data.ColorContrastMidtones = colorGrading.ColorContrastMidtones * colorGrading.ColorContrast; - data.ColorGammaMidtones = colorGrading.ColorGammaMidtones * colorGrading.ColorGamma; - data.ColorGainMidtones = colorGrading.ColorGainMidtones * colorGrading.ColorGain; - data.ColorOffsetMidtones = colorGrading.ColorOffsetMidtones + colorGrading.ColorOffset; - // Highlights - data.ColorSaturationHighlights = colorGrading.ColorSaturationHighlights * colorGrading.ColorSaturation; - data.ColorContrastHighlights = colorGrading.ColorContrastHighlights * colorGrading.ColorContrast; - data.ColorGammaHighlights = colorGrading.ColorGammaHighlights * colorGrading.ColorGamma; - data.ColorGainHighlights = colorGrading.ColorGainHighlights * colorGrading.ColorGain; - data.ColorOffsetHighlights = colorGrading.ColorOffsetHighlights + colorGrading.ColorOffset; - data.ColorCorrectionHighlightsMin = colorGrading.HighlightsMin; - // - Texture* lutTexture = colorGrading.LutTexture.Get(); - const bool useLut = lutTexture && lutTexture->IsLoaded() && lutTexture->GetResidentMipLevels() > 0 && colorGrading.LutWeight > ZeroTolerance; - data.LutWeight = useLut ? colorGrading.LutWeight : 0.0f; - // Check if LUT parameter hasn't been changed since the last time - if (Platform::MemoryCompare(&colorGradingBuffer.CachedData , &data, sizeof(Data)) == 0 && + if (Platform::MemoryCompare(&colorGradingBuffer.CachedData, &data, sizeof(Data)) == 0 && colorGradingBuffer.Mode == toneMapping.Mode && #if COMPILE_WITH_DEV_ENV colorGradingBuffer.FrameRendered > _reloadedFrame && @@ -232,7 +251,7 @@ GPUTexture* ColorGradingPass::RenderLUT(RenderContext& renderContext) context->BindCB(0, cb); context->SetViewportAndScissors((float)lutDesc.Width, (float)lutDesc.Height); context->SetState(_psLut.Get((int32)toneMapping.Mode)); - context->BindSR(0, useLut ? lutTexture->GetTexture() : nullptr); + context->BindSR(0, lutTexture); #if GPU_ALLOW_GEOMETRY_SHADERS if (use3D) { diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index a954cf31f..25550ecd8 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -11,6 +11,7 @@ #include "Engine/Core/Math/Quaternion.h" #include "Engine/Core/Config/GraphicsSettings.h" #include "Engine/Engine/Engine.h" +#include "Engine/Engine/Units.h" #include "Engine/Content/Content.h" #include "Engine/Debug/DebugDraw.h" #include "Engine/Graphics/GPUContext.h" @@ -41,6 +42,7 @@ #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) #define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8 #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 +#define DDGI_PROBE_EMPTY_AREA_DENSITY 8 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency #define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count #define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging @@ -68,11 +70,14 @@ GPU_CB_STRUCT(Data0 { Int4 ProbeScrollClears[4]; Float3 ViewDir; float Padding1; + Float3 QuantizationError; + int32 FrameIndexMod8; }); GPU_CB_STRUCT(Data1 { // TODO: use push constants on Vulkan or root signature data on DX12 to reduce overhead of changing single DWORD - Float2 Padding2; + float Padding2; + int32 StepSize; uint32 CascadeIndex; uint32 ProbeIndexOffset; }); @@ -214,6 +219,7 @@ bool DynamicDiffuseGlobalIlluminationPass::setupResources() return true; _csClassify = shader->GetCS("CS_Classify"); _csUpdateProbesInitArgs = shader->GetCS("CS_UpdateProbesInitArgs"); + _csUpdateInactiveProbes = shader->GetCS("CS_UpdateInactiveProbes"); _csTraceRays[0] = shader->GetCS("CS_TraceRays", 0); _csTraceRays[1] = shader->GetCS("CS_TraceRays", 1); _csTraceRays[2] = shader->GetCS("CS_TraceRays", 2); @@ -245,6 +251,7 @@ void DynamicDiffuseGlobalIlluminationPass::OnShaderReloading(Asset* obj) LastFrameShaderReload = Engine::FrameCount; _csClassify = nullptr; _csUpdateProbesInitArgs = nullptr; + _csUpdateInactiveProbes = nullptr; _csTraceRays[0] = nullptr; _csTraceRays[1] = nullptr; _csTraceRays[2] = nullptr; @@ -322,7 +329,6 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont const float indirectLightingIntensity = settings.Intensity; const float probeHistoryWeight = Math::Clamp(settings.TemporalResponse, 0.0f, 0.98f); const float distance = settings.Distance; - const Color fallbackIrradiance = settings.FallbackIrradiance; // Automatically calculate amount of cascades to cover the GI distance at the current probes spacing const int32 idealProbesCount = 20; // Ideal amount of probes per-cascade to try to fit in order to cover whole distance @@ -335,7 +341,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont } // Calculate the probes count based on the amount of cascades and the distance to cover - const float cascadesDistanceScales[] = { 1.0f, 3.0f, 6.0f, 10.0f }; // Scales each cascade further away from the camera origin + const float cascadesDistanceScales[] = { 1.0f, 3.0f, 5.0f, 10.0f }; // Scales each cascade further away from the camera origin const float distanceExtent = distance / cascadesDistanceScales[cascadesCount - 1]; const float verticalRangeScale = 0.8f; // Scales the probes volume size at Y axis (horizontal aspect ratio makes the DDGI use less probes vertically to cover whole screen) Int3 probesCounts(Float3::Ceil(Float3(distanceExtent, distanceExtent * verticalRangeScale, distanceExtent) / probesSpacing)); @@ -351,6 +357,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Initialize cascades float probesSpacings[4]; Float3 viewOrigins[4]; + Float3 blendOrigins[4]; for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) { // Each cascade has higher spacing between probes @@ -361,14 +368,15 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Calculate view origin for cascade by shifting it towards the view direction to account for better view frustum coverage Float3 viewOrigin = renderContext.View.Position; Float3 viewDirection = renderContext.View.Direction; - const Float3 probesDistance = Float3(probesCounts) * cascadeProbesSpacing; + const Float3 probesDistance = Float3(probesCounts - 1) * cascadeProbesSpacing; const float probesDistanceMax = probesDistance.MaxValue(); const Float3 viewRayHit = CollisionsHelper::LineHitsBox(viewOrigin, viewOrigin + viewDirection * (probesDistanceMax * 2.0f), viewOrigin - probesDistance, viewOrigin + probesDistance); const float viewOriginOffset = viewRayHit.Y * probesDistanceMax * 0.6f; viewOrigin += viewDirection * viewOriginOffset; + //viewOrigin = Float3::Zero; + blendOrigins[cascadeIndex] = viewOrigin; const float viewOriginSnapping = cascadeProbesSpacing; viewOrigin = Float3::Floor(viewOrigin / viewOriginSnapping) * viewOriginSnapping; - //viewOrigin = Float3::Zero; viewOrigins[cascadeIndex] = viewOrigin; } @@ -500,6 +508,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont { auto& cascade = ddgiData.Cascades[cascadeIndex]; ddgiData.Result.Constants.ProbesOriginAndSpacing[cascadeIndex] = Float4(cascade.ProbesOrigin, cascade.ProbesSpacing); + ddgiData.Result.Constants.BlendOrigin[cascadeIndex] = Float4(blendOrigins[cascadeIndex], 0.0f); ddgiData.Result.Constants.ProbesScrollOffsets[cascadeIndex] = Int4(cascade.ProbeScrollOffsets, 0); } ddgiData.Result.Constants.RayMaxDistance = distance; @@ -508,7 +517,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont ddgiData.Result.Constants.ProbeHistoryWeight = probeHistoryWeight; ddgiData.Result.Constants.IrradianceGamma = 1.5f; ddgiData.Result.Constants.IndirectLightingIntensity = indirectLightingIntensity; - ddgiData.Result.Constants.FallbackIrradiance = fallbackIrradiance.ToFloat3() * fallbackIrradiance.A; + ddgiData.Result.Constants.FallbackIrradiance = settings.FallbackIrradiance.ToFloat4(); ddgiData.Result.ProbesData = ddgiData.ProbesData->View(); ddgiData.Result.ProbesDistance = ddgiData.ProbesDistance->View(); ddgiData.Result.ProbesIrradiance = ddgiData.ProbesIrradiance->View(); @@ -535,6 +544,8 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont data.TemporalTime = renderContext.List->Setup.UseTemporalAAJitter ? RenderTools::ComputeTemporalTime() : 0.0f; data.ViewDir = renderContext.View.Direction; data.SkyboxIntensity = renderContext.List->Sky ? renderContext.List->Sky->GetIndirectLightingIntensity() : 1.0f; + data.QuantizationError = RenderTools::GetColorQuantizationError(ddgiData.ProbesIrradiance->Format()); + data.FrameIndexMod8 = (int32)(Engine::FrameCount % 8); GBufferPass::SetInputs(renderContext.View, data.GBuffer); context->UpdateCB(_cb0, &data); context->BindCB(0, _cb0); @@ -581,6 +592,23 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont context->ResetUA(); } + // For inactive probes, search nearby ones to find the closest valid for quick fallback when sampling irradiance + { + PROFILE_GPU_CPU_NAMED("Update Inactive Probes"); + // TODO: this could run within GPUComputePass during Trace Rays or Update Probes to overlap compute works + context->BindUA(0, ddgiData.Result.ProbesData); + Data1 data; + data.CascadeIndex = cascadeIndex; + int32 iterations = Math::CeilToInt(Math::Log2((float)Math::Min(probesCounts.MaxValue(), DDGI_PROBE_EMPTY_AREA_DENSITY) + 1.0f)); + for (int32 i = iterations - 1; i >= 0; i--) + { + data.StepSize = Math::FloorToInt(Math::Pow(2, (float)i) + 0.5f); // Jump Flood step size + context->UpdateCB(_cb1, &data); + context->Dispatch(_csUpdateInactiveProbes, threadGroupsX, 1, 1); + } + context->ResetUA(); + } + // Update probes in batches so ProbesTrace texture can be smaller uint32 arg = 0; // TODO: use rays allocator to dispatch raytracing in packets (eg. 8 threads in a group instead of hardcoded limit) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h index e6ace0373..5953da887 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h @@ -15,7 +15,8 @@ public: // Constant buffer data for DDGI access on a GPU. GPU_CB_STRUCT(ConstantsData { Float4 ProbesOriginAndSpacing[4]; - Int4 ProbesScrollOffsets[4]; + Float4 BlendOrigin[4]; // w is unused + Int4 ProbesScrollOffsets[4]; // w is unused uint32 ProbesCounts[3]; uint32 CascadesCount; float IrradianceGamma; @@ -24,8 +25,7 @@ public: float IndirectLightingIntensity; Float3 ViewPos; uint32 RaysCount; - Float3 FallbackIrradiance; - float Padding0; + Float4 FallbackIrradiance; }); // Binding data for the GPU. @@ -44,6 +44,7 @@ private: GPUConstantBuffer* _cb1 = nullptr; GPUShaderProgramCS* _csClassify; GPUShaderProgramCS* _csUpdateProbesInitArgs; + GPUShaderProgramCS* _csUpdateInactiveProbes; GPUShaderProgramCS* _csTraceRays[4]; GPUShaderProgramCS* _csUpdateProbesIrradiance; GPUShaderProgramCS* _csUpdateProbesDistance; diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index ce0ec1881..7216a8fa8 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -428,6 +428,7 @@ public: // Write to objects buffer (this must match unpacking logic in HLSL) uint32 objectAddress = ObjectsBuffer.Data.Count() / sizeof(Float4); ObjectsListBuffer.Write(objectAddress); + ObjectsBuffer.Data.EnsureCapacity(ObjectsBuffer.Data.Count() + sizeof(Float4) * (GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE + 6 * GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE)); auto* objectData = ObjectsBuffer.WriteReserve(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE); objectData[0] = Float4(object.Position, object.Radius); objectData[1] = Float4::Zero; @@ -511,6 +512,7 @@ public: { // Dirty object to redraw object->LastFrameUpdated = 0; + return; } GlobalSurfaceAtlasLight* light = Lights.TryGet(a->GetID()); if (light) diff --git a/Source/Engine/Renderer/PostProcessingPass.cpp b/Source/Engine/Renderer/PostProcessingPass.cpp index 030541e4c..5ac204523 100644 --- a/Source/Engine/Renderer/PostProcessingPass.cpp +++ b/Source/Engine/Renderer/PostProcessingPass.cpp @@ -269,7 +269,7 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input, int32 bloomMipCount = CalculateBloomMipCount(w1, h1); // Ensure to have valid data and if at least one effect should be applied - if (!(useBloom || useToneMapping || useCameraArtifacts) || checkIfSkipPass() || w8 <= 1 || h8 <= 1) + if (!(useBloom || useToneMapping || useCameraArtifacts || colorGradingLUT) || checkIfSkipPass() || w8 <= 1 || h8 <= 1) { // Resources are missing. Do not perform rendering. Just copy raw frame context->SetViewportAndScissors((float)output->Width(), (float)output->Height()); diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 7a72cd923..fd7d43c8b 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -402,6 +402,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont case ViewMode::MaterialComplexity: case ViewMode::Wireframe: case ViewMode::NoPostFx: + case ViewMode::VertexColors: + case ViewMode::QuadOverdraw: setup.UseTemporalAAJitter = false; break; } diff --git a/Source/Engine/Scripting/Scripting.cs b/Source/Engine/Scripting/Scripting.cs index 7f9f2980c..229e411f3 100644 --- a/Source/Engine/Scripting/Scripting.cs +++ b/Source/Engine/Scripting/Scripting.cs @@ -137,8 +137,8 @@ namespace FlaxEngine { Debug.LogError($"Unhandled Exception: {exception.Message}"); Debug.LogException(exception); - if (e.IsTerminating && !System.Diagnostics.Debugger.IsAttached) - Platform.Fatal($"Unhandled Exception: {exception}"); + //if (e.IsTerminating && !System.Diagnostics.Debugger.IsAttached) + // Platform.Fatal($"Unhandled Exception: {exception}"); } } diff --git a/Source/Engine/Threading/Task.cpp b/Source/Engine/Threading/Task.cpp index a640019d1..cef08b0bc 100644 --- a/Source/Engine/Threading/Task.cpp +++ b/Source/Engine/Threading/Task.cpp @@ -148,9 +148,8 @@ Task* Task::StartNew(Function::Signature& action, Object* target) void Task::Execute() { - if (IsCanceled()) + if (!IsQueued()) return; - ASSERT(IsQueued()); SetState(TaskState::Running); // Perform an operation diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp index c48a4c569..7978b4f9e 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp @@ -534,7 +534,7 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) } // Sample Texture case 9: - // Procedural Texture Sample + // Procedural Sample Texture case 17: { // Get input boxes @@ -739,7 +739,7 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) const int32 samplerIndex = node->Values.Count() >= 4 ? node->Values[3].AsInt : LinearWrap; if (samplerIndex == TextureGroup) { - auto& textureGroupSampler = findOrAddTextureGroupSampler(node->Values[3].AsInt); + auto& textureGroupSampler = findOrAddTextureGroupSampler(node->Values[5].AsInt); samplerName = *textureGroupSampler.ShaderName; } else if (samplerIndex >= 0 && samplerIndex < ARRAY_COUNT(SamplerNames)) @@ -828,7 +828,7 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) const int32 samplerIndex = node->Values[3].AsInt; if (samplerIndex == TextureGroup) { - auto& textureGroupSampler = findOrAddTextureGroupSampler(node->Values[3].AsInt); + auto& textureGroupSampler = findOrAddTextureGroupSampler(node->Values[5].AsInt); samplerName = *textureGroupSampler.ShaderName; } else if (samplerIndex >= 0 && samplerIndex < ARRAY_COUNT(SamplerNames)) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index 843822b98..57afeb7a5 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -567,6 +567,7 @@ void ModelTool::Options::Serialize(SerializeStream& stream, const void* otherObj SERIALIZE(CalculateBoneOffsetMatrices); SERIALIZE(LightmapUVsSource); SERIALIZE(CollisionMeshesPrefix); + SERIALIZE(CollisionMeshesPostfix); SERIALIZE(CollisionType); SERIALIZE(PositionFormat); SERIALIZE(TexCoordFormat); @@ -621,6 +622,7 @@ void ModelTool::Options::Deserialize(DeserializeStream& stream, ISerializeModifi DESERIALIZE(CalculateBoneOffsetMatrices); DESERIALIZE(LightmapUVsSource); DESERIALIZE(CollisionMeshesPrefix); + DESERIALIZE(CollisionMeshesPostfix); DESERIALIZE(CollisionType); DESERIALIZE(PositionFormat); DESERIALIZE(TexCoordFormat); @@ -1830,7 +1832,7 @@ bool ModelTool::ImportModel(const String& path, ModelData& data, Options& option } // Collision mesh output - if (options.CollisionMeshesPrefix.HasChars()) + if (options.CollisionMeshesPrefix.HasChars() || options.CollisionMeshesPostfix.HasChars()) { // Extract collision meshes from the model ModelData collisionModel; @@ -1839,7 +1841,8 @@ bool ModelTool::ImportModel(const String& path, ModelData& data, Options& option for (int32 i = lod.Meshes.Count() - 1; i >= 0; i--) { auto mesh = lod.Meshes[i]; - if (mesh->Name.StartsWith(options.CollisionMeshesPrefix, StringSearchCase::IgnoreCase)) + if ((options.CollisionMeshesPrefix.HasChars() && mesh->Name.StartsWith(options.CollisionMeshesPrefix, StringSearchCase::IgnoreCase)) || + (options.CollisionMeshesPostfix.HasChars() && mesh->Name.EndsWith(options.CollisionMeshesPostfix, StringSearchCase::IgnoreCase))) { // Remove material slot used by this mesh (if no other mesh else uses it) int32 materialSlotUsageCount = 0; diff --git a/Source/Engine/Tools/ModelTool/ModelTool.h b/Source/Engine/Tools/ModelTool/ModelTool.h index d7545b92e..bc96e8308 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.h +++ b/Source/Engine/Tools/ModelTool/ModelTool.h @@ -221,6 +221,9 @@ public: // If specified, all meshes that name starts with this prefix in the name will be imported as a separate collision data asset (excluded used for rendering). API_FIELD(Attributes="EditorOrder(100), EditorDisplay(\"Geometry\"), VisibleIf(nameof(ShowGeometry))") String CollisionMeshesPrefix = TEXT(""); + // If specified, all meshes that name ends with this postfix in the name will be imported as a separate collision data asset (excluded used for rendering). + API_FIELD(Attributes="EditorOrder(101), EditorDisplay(\"Geometry\"), VisibleIf(nameof(ShowGeometry))") + String CollisionMeshesPostfix = TEXT(""); // The type of collision that should be generated if the mesh has a collision prefix specified. API_FIELD(Attributes="EditorOrder(105), EditorDisplay(\"Geometry\"), VisibleIf(nameof(ShowGeometry))") CollisionDataType CollisionType = CollisionDataType::ConvexMesh; diff --git a/Source/Engine/UI/GUI/CanvasScaler.cs b/Source/Engine/UI/GUI/CanvasScaler.cs index 6bd18ea51..1e30fd22f 100644 --- a/Source/Engine/UI/GUI/CanvasScaler.cs +++ b/Source/Engine/UI/GUI/CanvasScaler.cs @@ -449,8 +449,7 @@ namespace FlaxEngine.GUI /// public override bool RayCast(ref Float2 location, out Control hit) { - var p = location / _scale; - if (RayCastChildren(ref p, out hit)) + if (RayCastChildren(ref location, out hit)) return true; return base.RayCast(ref location, out hit); } diff --git a/Source/Engine/UI/GUI/Common/RichTextBox.Parsing.cs b/Source/Engine/UI/GUI/Common/RichTextBox.Parsing.cs index 20ef1c401..bb6ee22a5 100644 --- a/Source/Engine/UI/GUI/Common/RichTextBox.Parsing.cs +++ b/Source/Engine/UI/GUI/Common/RichTextBox.Parsing.cs @@ -143,6 +143,40 @@ namespace FlaxEngine.GUI context.Caret.X = 0; OnLineAdded(ref context, _text.Length - 1); } + + // Organize lines vertically + if (_textBlocks.Count != 0) + { + var lastBlock = _textBlocks[_textBlocks.Count - 1]; + + // Get style (global or leftover from style stack or the last lime) + var verticalAlignments = _textStyle.Alignment; + if (context.StyleStack.Count > 1) + verticalAlignments = context.StyleStack.Peek().Alignment; + else if ((lastBlock.Style.Alignment & TextBlockStyle.Alignments.VerticalMask) != TextBlockStyle.Alignments.Baseline) + verticalAlignments = lastBlock.Style.Alignment; + + var totalSize = lastBlock.Bounds.BottomRight; + var sizeOffset = Size - totalSize; + var textBlocks = CollectionsMarshal.AsSpan(_textBlocks); + if ((verticalAlignments & TextBlockStyle.Alignments.Middle) == TextBlockStyle.Alignments.Middle) + { + sizeOffset.Y *= 0.5f; + for (int i = 0; i < _textBlocks.Count; i++) + { + ref TextBlock textBlock = ref textBlocks[i]; + textBlock.Bounds.Location.Y += sizeOffset.Y; + } + } + else if ((verticalAlignments & TextBlockStyle.Alignments.Bottom) == TextBlockStyle.Alignments.Bottom) + { + for (int i = 0; i < _textBlocks.Count; i++) + { + ref TextBlock textBlock = ref textBlocks[i]; + textBlock.Bounds.Location.Y += sizeOffset.Y; + } + } + } } /// @@ -239,14 +273,15 @@ namespace FlaxEngine.GUI } // Organize text blocks within line - var horizontalAlignments = TextBlockStyle.Alignments.Baseline; - var verticalAlignments = TextBlockStyle.Alignments.Baseline; + var lineAlignments = TextBlockStyle.Alignments.Baseline; for (int i = context.LineStartTextBlockIndex; i < _textBlocks.Count; i++) { ref TextBlock textBlock = ref textBlocks[i]; var vOffset = lineSize.Y - textBlock.Bounds.Height; - horizontalAlignments |= textBlock.Style.Alignment & TextBlockStyle.Alignments.HorizontalMask; - verticalAlignments |= textBlock.Style.Alignment & TextBlockStyle.Alignments.VerticalMask; + if (i == context.LineStartTextBlockIndex) + lineAlignments = textBlock.Style.Alignment; + else + lineAlignments &= textBlock.Style.Alignment; switch (textBlock.Style.Alignment & TextBlockStyle.Alignments.VerticalMask) { case TextBlockStyle.Alignments.Baseline: @@ -275,9 +310,9 @@ namespace FlaxEngine.GUI } } - // Organize blocks within whole container + // Organize whole line horizontally var sizeOffset = Size - lineSize; - if ((horizontalAlignments & TextBlockStyle.Alignments.Center) == TextBlockStyle.Alignments.Center) + if ((lineAlignments & TextBlockStyle.Alignments.Center) == TextBlockStyle.Alignments.Center) { sizeOffset.X *= 0.5f; for (int i = context.LineStartTextBlockIndex; i < _textBlocks.Count; i++) @@ -286,7 +321,7 @@ namespace FlaxEngine.GUI textBlock.Bounds.Location.X += sizeOffset.X; } } - else if ((horizontalAlignments & TextBlockStyle.Alignments.Right) == TextBlockStyle.Alignments.Right) + else if ((lineAlignments & TextBlockStyle.Alignments.Right) == TextBlockStyle.Alignments.Right) { for (int i = context.LineStartTextBlockIndex; i < _textBlocks.Count; i++) { @@ -294,23 +329,6 @@ namespace FlaxEngine.GUI textBlock.Bounds.Location.X += sizeOffset.X; } } - if ((verticalAlignments & TextBlockStyle.Alignments.Middle) == TextBlockStyle.Alignments.Middle) - { - sizeOffset.Y *= 0.5f; - for (int i = context.LineStartTextBlockIndex; i < _textBlocks.Count; i++) - { - ref TextBlock textBlock = ref textBlocks[i]; - textBlock.Bounds.Location.Y += sizeOffset.Y; - } - } - else if ((verticalAlignments & TextBlockStyle.Alignments.Bottom) == TextBlockStyle.Alignments.Bottom) - { - for (int i = context.LineStartTextBlockIndex; i < _textBlocks.Count; i++) - { - ref TextBlock textBlock = ref textBlocks[i]; - textBlock.Bounds.Location.Y += sizeOffset.Y; - } - } // Move to the next line context.LineStartCharacterIndex = lineEnd + 1; diff --git a/Source/Engine/UI/GUI/Common/RichTextBox.Tags.cs b/Source/Engine/UI/GUI/Common/RichTextBox.Tags.cs index b57fac47d..3bb99762f 100644 --- a/Source/Engine/UI/GUI/Common/RichTextBox.Tags.cs +++ b/Source/Engine/UI/GUI/Common/RichTextBox.Tags.cs @@ -175,7 +175,7 @@ namespace FlaxEngine.GUI // Setup size var font = imageBlock.Style.Font.GetFont(); if (font) - imageBlock.Bounds.Size = new Float2(font.Height); + imageBlock.Bounds.Size = new Float2(font.Ascender); var imageSize = image.Size; imageBlock.Bounds.Size.X *= imageSize.X / imageSize.Y; // Keep original aspect ratio bool hasWidth = TryParseNumberTag(ref tag, "width", imageBlock.Bounds.Width, out var width); @@ -215,16 +215,16 @@ namespace FlaxEngine.GUI switch (valign) { case "top": - style.Alignment = TextBlockStyle.Alignments.Top; + style.Alignment |= TextBlockStyle.Alignments.Top; break; case "bottom": - style.Alignment = TextBlockStyle.Alignments.Bottom; + style.Alignment |= TextBlockStyle.Alignments.Bottom; break; case "middle": - style.Alignment = TextBlockStyle.Alignments.Middle; + style.Alignment |= TextBlockStyle.Alignments.Middle; break; case "baseline": - style.Alignment = TextBlockStyle.Alignments.Baseline; + style.Alignment |= TextBlockStyle.Alignments.Baseline; break; } } @@ -243,17 +243,17 @@ namespace FlaxEngine.GUI var style = context.StyleStack.Peek(); if (tag.Attributes.TryGetValue(string.Empty, out var valign)) { - style.Alignment &= ~TextBlockStyle.Alignments.VerticalMask; + style.Alignment &= ~TextBlockStyle.Alignments.HorizontalMask; switch (valign) { case "left": - style.Alignment = TextBlockStyle.Alignments.Left; + style.Alignment |= TextBlockStyle.Alignments.Left; break; case "right": - style.Alignment = TextBlockStyle.Alignments.Right; + style.Alignment |= TextBlockStyle.Alignments.Right; break; case "center": - style.Alignment = TextBlockStyle.Alignments.Center; + style.Alignment |= TextBlockStyle.Alignments.Center; break; } } @@ -270,7 +270,8 @@ namespace FlaxEngine.GUI else { var style = context.StyleStack.Peek(); - style.Alignment = TextBlockStyle.Alignments.Center; + style.Alignment &= ~TextBlockStyle.Alignments.HorizontalMask; + style.Alignment |= TextBlockStyle.Alignments.Center; context.StyleStack.Push(style); } } diff --git a/Source/Engine/UI/GUI/Panels/DropPanel.cs b/Source/Engine/UI/GUI/Panels/DropPanel.cs index de80f9fc5..308272218 100644 --- a/Source/Engine/UI/GUI/Panels/DropPanel.cs +++ b/Source/Engine/UI/GUI/Panels/DropPanel.cs @@ -11,6 +11,11 @@ namespace FlaxEngine.GUI [ActorToolbox("GUI")] public class DropPanel : ContainerControl { + /// + /// Size of the drop down icon. + /// + public const float DropDownIconSize = 14.0f; + /// /// The header height. /// @@ -368,7 +373,7 @@ namespace FlaxEngine.GUI var style = Style.Current; var enabled = EnabledInHierarchy; - // Paint Background + // Draw Background var backgroundColor = BackgroundColor; if (backgroundColor.A > 0.0f) { @@ -386,7 +391,7 @@ namespace FlaxEngine.GUI float textLeft = 0; if (EnableDropDownIcon) { - textLeft += 14; + textLeft += DropDownIconSize; var dropDownRect = new Rectangle(2, (HeaderHeight - 12) / 2, 12, 12); var arrowColor = _mouseOverHeader ? style.Foreground : style.ForegroundGrey; if (_isClosed) @@ -395,7 +400,7 @@ namespace FlaxEngine.GUI ArrowImageOpened?.Draw(dropDownRect, arrowColor); } - // Text + // Header text var textRect = new Rectangle(textLeft, 0, Width - textLeft, HeaderHeight); _headerTextMargin.ShrinkRectangle(ref textRect); var textColor = HeaderTextColor; @@ -404,7 +409,9 @@ namespace FlaxEngine.GUI textColor *= 0.6f; } + Render2D.PushClip(textRect); Render2D.DrawText(HeaderTextFont.GetFont(), HeaderTextMaterial, HeaderText, textRect, textColor, TextAlignment.Near, TextAlignment.Center); + Render2D.PopClip(); if (!_isClosed && EnableContainmentLines) { diff --git a/Source/Shaders/GI/DDGI.hlsl b/Source/Shaders/GI/DDGI.hlsl index c116b597a..b88b846a6 100644 --- a/Source/Shaders/GI/DDGI.hlsl +++ b/Source/Shaders/GI/DDGI.hlsl @@ -20,17 +20,23 @@ #define DDGI_PROBE_ATTENTION_MAX 0.98f // Maximum probe attention value that still makes it active (but not activated which is 1.0f). #define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side) #define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side) -#define DDGI_CASCADE_BLEND_SIZE 2.5f // Distance in probes over which cascades blending happens +#define DDGI_CASCADE_BLEND_SIZE 2.0f // Distance in probes over which cascades blending happens #ifndef DDGI_CASCADE_BLEND_SMOOTH #define DDGI_CASCADE_BLEND_SMOOTH 0 // Enables smooth cascade blending, otherwise dithering will be used #endif #define DDGI_SRGB_BLENDING 1 // Enables blending in sRGB color space, otherwise irradiance blending is done in linear space +#define DDGI_DEFAULT_BIAS 0.2f // Default value for DDGI sampling bias +#define DDGI_FALLBACK_COORDS_ENCODE(coord) ((float3)(coord + 1) / 128.0f) +#define DDGI_FALLBACK_COORDS_DECODE(data) (uint3)(data.xyz * 128.0f - 1) +#define DDGI_FALLBACK_COORDS_VALID(data) (length(data.xyz) > 0) +//#define DDGI_DEBUG_CASCADE 0 // Forces a specific cascade to be only in use (for debugging) // DDGI data for a constant buffer struct DDGIData { float4 ProbesOriginAndSpacing[4]; - int4 ProbesScrollOffsets[4]; // w unused + float4 BlendOrigin[4]; // w is unused + int4 ProbesScrollOffsets[4]; // w is unused uint3 ProbesCounts; uint CascadesCount; float IrradianceGamma; @@ -39,8 +45,7 @@ struct DDGIData float IndirectLightingIntensity; float3 ViewPos; uint RaysCount; - float3 FallbackIrradiance; - float Padding0; + float4 FallbackIrradiance; }; uint GetDDGIProbeIndex(DDGIData data, uint3 probeCoords) @@ -159,6 +164,8 @@ float2 GetDDGIProbeUV(DDGIData data, uint cascadeIndex, uint probeIndex, float2 float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probesData, Texture2D probesDistance, Texture2D probesIrradiance, float3 worldPosition, float3 worldNormal, uint cascadeIndex, float3 probesOrigin, float3 probesExtent, float probesSpacing, float3 biasedWorldPosition) { + bool invalidCascade = cascadeIndex >= data.CascadesCount; + cascadeIndex = min(cascadeIndex, data.CascadesCount - 1); uint3 probeCoordsEnd = data.ProbesCounts - uint3(1, 1, 1); uint3 baseProbeCoords = clamp(uint3((worldPosition - probesOrigin + probesExtent) / probesSpacing), uint3(0, 0, 0), probeCoordsEnd); @@ -168,7 +175,6 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes // Loop over the closest probes to accumulate their contributions float4 irradiance = float4(0, 0, 0, 0); - const int3 SearchAxisMasks[3] = { int3(1, 0, 0), int3(0, 1, 0), int3(0, 0, 1) }; for (uint i = 0; i < 8; i++) { uint3 probeCoordsOffset = uint3(i, i >> 1, i >> 2) & 1; @@ -178,33 +184,23 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes // Load probe position and state float4 probeData = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex); uint probeState = DecodeDDGIProbeState(probeData); + uint useVisibility = true; + float minWight = 0.000001f; if (probeState == DDGI_PROBE_STATE_INACTIVE) { - // Search nearby probes to find any nearby GI sample - for (int searchDistance = 1; searchDistance < 3 && probeState == DDGI_PROBE_STATE_INACTIVE; searchDistance++) - for (uint searchAxis = 0; searchAxis < 3; searchAxis++) - { - int searchAxisDir = probeCoordsOffset[searchAxis] ? 1 : -1; - int3 searchCoordsOffset = SearchAxisMasks[searchAxis] * searchAxisDir * searchDistance; - uint3 searchCoords = clamp((int3)probeCoords + searchCoordsOffset, int3(0, 0, 0), (int3)probeCoordsEnd); - uint searchIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, searchCoords); - float4 searchData = LoadDDGIProbeData(data, probesData, cascadeIndex, searchIndex); - uint searchState = DecodeDDGIProbeState(searchData); - if (searchState != DDGI_PROBE_STATE_INACTIVE) - { - // Use nearby probe as a fallback (visibility test might ignore it but with smooth gradient) - probeCoords = searchCoords; - probeIndex = searchIndex; - probeData = searchData; - probeState = searchState; - break; - } - } - if (probeState == DDGI_PROBE_STATE_INACTIVE) - continue; + // Use fallback probe that is closest to this one + uint3 fallbackCoords = DDGI_FALLBACK_COORDS_DECODE(probeData); + float fallbackToProbeDist = length((float3)probeCoords - (float3)fallbackCoords); + useVisibility = fallbackToProbeDist <= 1.0f; // Skip visibility test that blocks too far probes due to limiting max distance to 1.5 of probe spacing + if (fallbackToProbeDist > 2.0f) minWight = 1.0f; + probeCoords = fallbackCoords; + probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, fallbackCoords); + probeData = LoadDDGIProbeData(data, probesData, cascadeIndex, probeIndex); + //if (DecodeDDGIProbeState(probeData) == DDGI_PROBE_STATE_INACTIVE) continue; } - float3 probeBasePosition = baseProbeWorldPosition + ((probeCoords - baseProbeCoords) * probesSpacing); - float3 probePosition = probeBasePosition + probeData.xyz * probesSpacing; // Probe offset is [-1;1] within probes spacing + + // Calculate probe position + float3 probePosition = baseProbeWorldPosition + (((float3)probeCoords - (float3)baseProbeCoords) * probesSpacing) + probeData.xyz * probesSpacing; // Calculate the distance and direction from the (biased and non-biased) shading point and the probe float3 worldPosToProbe = normalize(probePosition - worldPosition); @@ -213,6 +209,7 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes // Smooth backface test float weight = Square(dot(worldPosToProbe, worldNormal) * 0.5f + 0.5f); + weight = max(weight, 0.1f); // Sample distance texture float2 octahedralCoords = GetOctahedralCoords(-biasedPosToProbe); @@ -220,24 +217,23 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes float2 probeDistance = probesDistance.SampleLevel(SamplerLinearClamp, uv, 0).rg * 2.0f; // Visibility weight (Chebyshev) - if (biasedPosToProbeDist > probeDistance.x) + if (biasedPosToProbeDist > probeDistance.x && useVisibility) { float variance = abs(Square(probeDistance.x) - probeDistance.y); float visibilityWeight = variance / (variance + Square(biasedPosToProbeDist - probeDistance.x)); - weight *= max(visibilityWeight * visibilityWeight * visibilityWeight, 0.05f); + weight *= max(visibilityWeight * visibilityWeight * visibilityWeight, 0.0f); } // Avoid a weight of zero - weight = max(weight, 0.000001f); + weight = max(weight, minWight); // Adjust weight curve to inject a small portion of light const float minWeightThreshold = 0.2f; - if (weight < minWeightThreshold) - weight *= Square(weight) / Square(minWeightThreshold); + if (weight < minWeightThreshold) weight *= (weight * weight) * (1.0f / (minWeightThreshold * minWeightThreshold)); // Calculate trilinear weights based on the distance to each probe to smoothly transition between grid of 8 probes float3 trilinear = lerp(1.0f - biasAlpha, biasAlpha, (float3)probeCoordsOffset); - weight *= max(trilinear.x * trilinear.y * trilinear.z, 0.001f); + weight *= saturate(trilinear.x * trilinear.y * trilinear.z * 2.0f); // Sample irradiance texture octahedralCoords = GetOctahedralCoords(worldNormal); @@ -269,7 +265,9 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes if (irradiance.a > 0.0f) { // Normalize irradiance - irradiance.rgb /= irradiance.a; + //irradiance.rgb /= irradiance.a; + //irradiance.rgb /= lerp(1, irradiance.a, saturate(irradiance.a * irradiance.a + 0.9f)); + irradiance.rgb /= invalidCascade ? irradiance.a : lerp(1, irradiance.a, saturate(irradiance.a * irradiance.a + 0.9f)); #if DDGI_SRGB_BLENDING irradiance.rgb *= irradiance.rgb; #endif @@ -281,22 +279,34 @@ float3 SampleDDGIIrradianceCascade(DDGIData data, Texture2D probes float3 GetDDGISurfaceBias(float3 viewDir, float probesSpacing, float3 worldNormal, float bias) { // Bias the world-space position to reduce artifacts - return (worldNormal * 0.2f + viewDir * 0.8f) * (0.75f * probesSpacing * bias); + return (worldNormal * 0.2f + viewDir * 0.8f) * (0.6f * probesSpacing * bias); +} + +// [Inigo Quilez, https://iquilezles.org/articles/distfunctions/] +float sdRoundBox(float3 p, float3 b, float r) +{ + float3 q = abs(p) - b + r; + return length(max(q, 0.0f)) + min(max(q.x, max(q.y, q.z)), 0.0f) - r; } // Samples DDGI probes volume at the given world-space position and returns the irradiance. // bias - scales the bias vector to the initial sample point to reduce self-shading artifacts // dither - randomized per-pixel value in range 0-1, used to smooth dithering for cascades blending -float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, Texture2D probesDistance, Texture2D probesIrradiance, float3 worldPosition, float3 worldNormal, float bias = 0.2f, float dither = 0.0f) +float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, Texture2D probesDistance, Texture2D probesIrradiance, float3 worldPosition, float3 worldNormal, float bias = DDGI_DEFAULT_BIAS, float dither = 0.0f) { // Select the highest cascade that contains the sample location - uint cascadeIndex = 0; float probesSpacing = 0, cascadeWeight = 0; float3 probesOrigin = (float3)0, probesExtent = (float3)0, biasedWorldPosition = (float3)0; float3 viewDir = normalize(data.ViewPos - worldPosition); #if DDGI_CASCADE_BLEND_SMOOTH dither = 0.0f; #endif +#ifdef DDGI_DEBUG_CASCADE + uint cascadeIndex = DDGI_DEBUG_CASCADE; +#else + uint cascadeIndex = 0; + if (data.CascadesCount == 0) + return float3(0, 0, 0); for (; cascadeIndex < data.CascadesCount; cascadeIndex++) { // Get cascade data @@ -306,26 +316,21 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T biasedWorldPosition = worldPosition + GetDDGISurfaceBias(viewDir, probesSpacing, worldNormal, bias); // Calculate cascade blending weight (use input bias to smooth transition) - float cascadeBlendSmooth = frac(max(distance(data.ViewPos, worldPosition) - probesExtent.x, 0) / probesSpacing) * 0.1f; - float3 cascadeBlendPoint = worldPosition - probesOrigin - cascadeBlendSmooth * probesSpacing; float fadeDistance = probesSpacing * DDGI_CASCADE_BLEND_SIZE; -#if DDGI_CASCADE_BLEND_SMOOTH - fadeDistance *= 2.0f; // Make it even smoother when using linear blending -#endif - cascadeWeight = saturate(Min3(probesExtent - abs(cascadeBlendPoint)) / fadeDistance); + float3 blendPos = worldPosition - data.BlendOrigin[cascadeIndex].xyz; + cascadeWeight = sdRoundBox(blendPos, probesExtent - probesSpacing, probesSpacing * 2) + fadeDistance; + cascadeWeight = 1 - saturate(cascadeWeight / fadeDistance); if (cascadeWeight > dither) break; } - if (cascadeIndex == data.CascadesCount) - return data.FallbackIrradiance; +#endif // Sample cascade float3 result = SampleDDGIIrradianceCascade(data, probesData, probesDistance, probesIrradiance, worldPosition, worldNormal, cascadeIndex, probesOrigin, probesExtent, probesSpacing, biasedWorldPosition); // Blend with the next cascade (or fallback irradiance outside the volume) +#if DDGI_CASCADE_BLEND_SMOOTH && !defined(DDGI_DEBUG_CASCADE) cascadeIndex++; -#if DDGI_CASCADE_BLEND_SMOOTH - result *= cascadeWeight; if (cascadeIndex < data.CascadesCount && cascadeWeight < 0.99f) { probesSpacing = data.ProbesOriginAndSpacing[cascadeIndex].w; @@ -333,18 +338,16 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D probesData, T probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f); biasedWorldPosition = worldPosition + GetDDGISurfaceBias(viewDir, probesSpacing, worldNormal, bias); float3 resultNext = SampleDDGIIrradianceCascade(data, probesData, probesDistance, probesIrradiance, worldPosition, worldNormal, cascadeIndex, probesOrigin, probesExtent, probesSpacing, biasedWorldPosition); + result *= cascadeWeight; result += resultNext * (1 - cascadeWeight); } - else - { - result += data.FallbackIrradiance * (1 - cascadeWeight); - } -#else - if (cascadeIndex == data.CascadesCount) - { - result += data.FallbackIrradiance * (1 - cascadeWeight); - } #endif + if (cascadeIndex >= data.CascadesCount) + { + // Blend between the last cascade and the fallback irradiance + float fallbackWeight = (1 - cascadeWeight) * data.FallbackIrradiance.a; + result = lerp(result, data.FallbackIrradiance.rgb, fallbackWeight); + } return result; } diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index daad2018d..b080efc0b 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -13,6 +13,7 @@ #include "./Flax/Math.hlsl" #include "./Flax/Noise.hlsl" #include "./Flax/Quaternion.hlsl" +#include "./Flax/MonteCarlo.hlsl" #include "./Flax/GlobalSignDistanceField.hlsl" #include "./Flax/GI/GlobalSurfaceAtlas.hlsl" #include "./Flax/GI/DDGI.hlsl" @@ -26,6 +27,7 @@ #define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32 #define DDGI_PROBE_RELOCATE_ITERATIVE 1 // If true, probes relocation algorithm tries to move them in additive way, otherwise all nearby locations are checked to find the best position #define DDGI_PROBE_RELOCATE_FIND_BEST 1 // If true, probes relocation algorithm tries to move to the best matching location within nearby area +#define DDGI_PROBE_EMPTY_AREA_DENSITY 8 // Spacing (in probe grid) between fallback probes placed into empty areas to provide valid GI for nearby dynamic objects or transparency #define DDGI_DEBUG_STATS 0 // Enables additional GPU-driven stats for probe/rays count #define DDGI_DEBUG_INSTABILITY 0 // Enables additional probe irradiance instability debugging @@ -42,10 +44,13 @@ float TemporalTime; int4 ProbeScrollClears[4]; float3 ViewDir; float Padding1; +float3 QuantizationError; +uint FrameIndexMod8; META_CB_END META_CB_BEGIN(1, Data1) -float2 Padding2; +float Padding2; +int StepSize; uint CascadeIndex; uint ProbeIndexOffset; META_CB_END @@ -98,6 +103,11 @@ float3 Remap(float3 value, float3 fromMin, float3 fromMax, float3 toMin, float3 return (value - fromMin) / (fromMax - fromMin) * (toMax - toMin) + toMin; } +bool IsProbeAtBorder(uint3 probeCoords) +{ + return min(probeCoords.x, min(probeCoords.y, probeCoords.z)) == 0 || probeCoords.x == DDGI.ProbesCounts.x - 1 || probeCoords.y == DDGI.ProbesCounts.y - 1 || probeCoords.z == DDGI.ProbesCounts.z - 1; +} + // Compute shader for updating probes state between active and inactive and performing probes relocation. META_CS(true, FEATURE_LEVEL_SM5) [numthreads(DDGI_PROBE_CLASSIFY_GROUP_SIZE, 1, 1)] @@ -112,6 +122,14 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w; float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, CascadeIndex, probeCoords); +#ifdef DDGI_DEBUG_CASCADE + // Single cascade-only debugging + if (CascadeIndex != DDGI_DEBUG_CASCADE) + { + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f); + return; + } +#else // Disable probes that are is in the range of higher-quality cascade if (CascadeIndex > 0) { @@ -119,15 +137,15 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float prevProbesSpacing = DDGI.ProbesOriginAndSpacing[prevCascade].w; float3 prevProbesOrigin = DDGI.ProbesScrollOffsets[prevCascade].xyz * prevProbesSpacing + DDGI.ProbesOriginAndSpacing[prevCascade].xyz; float3 prevProbesExtent = (DDGI.ProbesCounts - 1) * (prevProbesSpacing * 0.5f); - prevProbesExtent -= probesSpacing * ceil(DDGI_CASCADE_BLEND_SIZE); // Apply safe margin to allow probes on cascade edges + prevProbesExtent -= probesSpacing * ceil(DDGI_CASCADE_BLEND_SIZE) * 2; // Apply safe margin to allow probes on cascade edges float prevCascadeWeight = Min3(prevProbesExtent - abs(probeBasePosition - prevProbesOrigin)); if (prevCascadeWeight > 0.1f) { - // Disable probe RWProbesData[probeDataCoords] = EncodeDDGIProbeData(float3(0, 0, 0), DDGI_PROBE_STATE_INACTIVE, 0.0f); return; } } +#endif // Check if probe was scrolled int3 probeScrollClears = ProbeScrollClears[CascadeIndex].xyz; @@ -171,9 +189,29 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) float voxelLimit = GlobalSDF.CascadeVoxelSize[CascadeIndex] * 0.8f; float distanceLimit = probesSpacing * ProbesDistanceLimits[CascadeIndex]; float relocateLimit = probesSpacing * ProbesRelocateLimits[CascadeIndex]; - if (sdfDst > distanceLimit + length(probeOffset)) // Probe is too far from geometry (or deep inside) +#ifdef DDGI_PROBE_EMPTY_AREA_DENSITY + uint3 probeCoordsStable = GetDDGIProbeCoords(DDGI, probeIndex); + if (sdf > probesSpacing * DDGI.ProbesCounts.x * 0.3f +#if DDGI_PROBE_EMPTY_AREA_DENSITY > 1 + && ( + // Low-density grid grid + (probeCoordsStable.x % DDGI_PROBE_EMPTY_AREA_DENSITY == 0 && probeCoordsStable.y % DDGI_PROBE_EMPTY_AREA_DENSITY == 0 && probeCoordsStable.z % DDGI_PROBE_EMPTY_AREA_DENSITY == 0) + // Edge probes at the last cascade (for good fallback irradiance outside the GI distance) + //|| (CascadeIndex + 1 == DDGI.CascadesCount && IsProbeAtBorder(probeCoords)) + ) +#endif + ) { - // Disable it + // Addd some fallback probes in empty areas to provide valid GI for nearby dynamic objects or transparency + probeOffset = float3(0, 0, 0); + probeState = wasScrolled || probeStateOld == DDGI_PROBE_STATE_INACTIVE ? DDGI_PROBE_STATE_ACTIVATED : DDGI_PROBE_STATE_ACTIVE; + probeAttention = DDGI_PROBE_ATTENTION_MIN; + } + else +#endif + if (sdfDst > distanceLimit + length(probeOffset)) + { + // Probe is too far from geometry (or deep inside) so disable it probeOffset = float3(0, 0, 0); probeState = DDGI_PROBE_STATE_INACTIVE; probeAttention = 0.0f; @@ -194,6 +232,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) probeAttention = clamp(probeAttention, DDGI_PROBE_ATTENTION_MIN, DDGI_PROBE_ATTENTION_MAX); // Relocate only if probe location is not good enough + BRANCH if (sdf <= voxelLimit) { #if DDGI_PROBE_RELOCATE_ITERATIVE @@ -265,6 +304,7 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID) bool wasActivated = probeStateOld == DDGI_PROBE_STATE_INACTIVE; bool wasRelocated = distance(probeOffset, probeOffsetOld) > 2.0f; #if DDGI_PROBE_RELOCATE_FIND_BEST || DDGI_PROBE_RELOCATE_ITERATIVE + BRANCH if (wasRelocated && !wasActivated) { // If probe was relocated but the previous location is visible from the new one, then don't re-activate it for smoother blend @@ -323,6 +363,78 @@ void CS_UpdateProbesInitArgs() #endif +#ifdef _CS_UpdateInactiveProbes + +RWTexture2D RWProbesData : register(u0); + +void CheckNearbyProbe(inout uint3 fallbackCoords, inout uint probeState, inout float minDistance, uint3 probeCoords, int3 probeCoordsEnd, int3 offset) +{ + uint3 nearbyCoords = (uint3)clamp(((int3)probeCoords + offset), int3(0, 0, 0), probeCoordsEnd); + uint nearbyIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, nearbyCoords); + float4 nearbyData = RWProbesData[GetDDGIProbeTexelCoords(DDGI, CascadeIndex, nearbyIndex)]; + float nearbyDist = distance((float3)nearbyCoords, (float3)probeCoords); + if (DecodeDDGIProbeState(nearbyData) != DDGI_PROBE_STATE_INACTIVE && nearbyDist < minDistance) + { + // Use nearby probe + fallbackCoords = nearbyCoords; + probeState = DDGI_PROBE_STATE_ACTIVE; + minDistance = nearbyDist; + return; + } + nearbyCoords = DDGI_FALLBACK_COORDS_DECODE(nearbyData); + nearbyDist = distance((float3)nearbyCoords, (float3)probeCoords); + if (DDGI_FALLBACK_COORDS_VALID(nearbyData) && nearbyDist < minDistance) + { + // Use fallback probe + fallbackCoords = nearbyCoords; + probeState = DDGI_PROBE_STATE_ACTIVE; + minDistance = nearbyDist; + } +} + +// Compute shader to store closest valid probe coords inside inactive probes data for quick fallback lookup when sampling irradiance. +// Uses Jump Flood algorithm. +META_CS(true, FEATURE_LEVEL_SM5) +[numthreads(DDGI_PROBE_CLASSIFY_GROUP_SIZE, 1, 1)] +void CS_UpdateInactiveProbes(uint3 DispatchThreadId : SV_DispatchThreadID) +{ + uint probeIndex = min(DispatchThreadId.x, ProbesCount - 1); + uint3 fallbackCoords = uint3(1000, 1000, 1000); + + // Load probe data for the current thread + uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex); + probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords); + int2 probeDataCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex); + float4 probeData = RWProbesData[probeDataCoords]; + uint probeState = DecodeDDGIProbeState(probeData); + BRANCH + if (probeState == DDGI_PROBE_STATE_INACTIVE) + { + // Find the closest active probe (Jump Flood) + int3 probeCoordsEnd = (int3)DDGI.ProbesCounts - int3(1, 1, 1); + float minDistance = 1e27f; + UNROLL for (int z = -1; z <= 1; z++) + UNROLL for (int y = -1; y <= 1; y++) + UNROLL for (int x = -1; x <= 1; x++) + { + int3 offset = int3(x, y, z) * StepSize; + CheckNearbyProbe(fallbackCoords, probeState, minDistance, probeCoords, probeCoordsEnd, offset); + } + } + + // Ensure all threads (within dispatch) got proper data before writing back to the same memory + AllMemoryBarrierWithGroupSync(); + + // Write modified probe data back (remain inactive) + BRANCH + if (probeState != DDGI_PROBE_STATE_INACTIVE && DispatchThreadId.x < ProbesCount && fallbackCoords.x != 1000) + { + RWProbesData[probeDataCoords] = EncodeDDGIProbeData(DDGI_FALLBACK_COORDS_ENCODE(fallbackCoords), DDGI_PROBE_STATE_INACTIVE, 0.0f); + } +} + +#endif + #ifdef _CS_TraceRays RWTexture2D RWProbesTrace : register(u0); @@ -392,6 +504,8 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) // Add some bias to prevent self occlusion artifacts in Chebyshev due to Global SDF being very incorrect in small scale radiance.w = max(radiance.w + GlobalSDF.CascadeVoxelSize[hit.HitCascade] * 0.5f, 0); + float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w; + radiance.w += probesSpacing * 0.05f; } } else @@ -639,7 +753,7 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ // Add distance (R), distance^2 (G) and weight (A) float rayDistance = CachedProbesTraceDistance[rayIndex]; - result += float4(rayDistance * rayWeight, (rayDistance * rayDistance) * rayWeight, 0.0f, rayWeight); + result += float4(rayDistance, rayDistance * rayDistance, 0.0f, 1.0f) * rayWeight; #endif } @@ -700,13 +814,17 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ //result.rgb = previous + (irradianceDelta * 0.25f); } result = float4(lerp(result.rgb, previous.rgb, historyWeight), 1.0f); + + // Apply quantization error to reduce yellowish artifacts due to R11G11B10 format + float noise = InterleavedGradientNoise(octahedralCoords, FrameIndexMod8); + result.rgb = QuantizeColor(result.rgb, noise, QuantizationError); #else result = float4(lerp(result.rg, previous.rg, historyWeight), 0.0f, 1.0f); #endif RWOutput[outputCoords] = result; - GroupMemoryBarrierWithGroupSync(); + uint2 baseCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2); #if DDGI_PROBE_UPDATE_MODE == 0 @@ -786,10 +904,10 @@ void PS_IndirectLighting(Quad_VS2PS input, out float4 output : SV_Target0) } // Sample irradiance - float bias = 0.2f; float dither = RandN2(input.TexCoord + TemporalTime).x; - float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesData, ProbesDistance, ProbesIrradiance, gBuffer.WorldPos, gBuffer.Normal, bias, dither); - + float3 samplePos = gBuffer.WorldPos + gBuffer.Normal * (dither * 0.1f + 0.1f); + float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesData, ProbesDistance, ProbesIrradiance, samplePos, gBuffer.Normal, DDGI_DEFAULT_BIAS, dither); + // Calculate lighting float3 diffuseColor = GetDiffuseColor(gBuffer); float3 diffuse = Diffuse_Lambert(diffuseColor); diff --git a/Source/Shaders/GI/GlobalSurfaceAtlas.shader b/Source/Shaders/GI/GlobalSurfaceAtlas.shader index 6778a7cd7..6930107d1 100644 --- a/Source/Shaders/GI/GlobalSurfaceAtlas.shader +++ b/Source/Shaders/GI/GlobalSurfaceAtlas.shader @@ -328,7 +328,6 @@ float4 PS_Debug(Quad_VS2PS input) : SV_Target float3 viewRay = lerp(lerp(ViewFrustumWorldRays[3], ViewFrustumWorldRays[0], input.TexCoord.x), lerp(ViewFrustumWorldRays[2], ViewFrustumWorldRays[1], input.TexCoord.x), 1 - input.TexCoord.y).xyz; viewRay = normalize(viewRay - ViewWorldPos); trace.Init(ViewWorldPos, viewRay, ViewNearPlane, ViewFarPlane); - trace.NeedsHitNormal = true; GlobalSDFHit hit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, trace); float3 color; @@ -337,7 +336,6 @@ float4 PS_Debug(Quad_VS2PS input) : SV_Target // Sample Global Surface Atlas at the hit location float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, hit); color = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold).rgb; - //color = hit.HitNormal * 0.5f + 0.5f; } else { diff --git a/Source/Shaders/GlobalSignDistanceField.hlsl b/Source/Shaders/GlobalSignDistanceField.hlsl index 8075c081d..c1bd4250b 100644 --- a/Source/Shaders/GlobalSignDistanceField.hlsl +++ b/Source/Shaders/GlobalSignDistanceField.hlsl @@ -32,17 +32,13 @@ struct GlobalSDFTrace float MinDistance; float3 WorldDirection; float MaxDistance; - float StepScale; - bool NeedsHitNormal; - void Init(float3 worldPosition, float3 worldDirection, float minDistance, float maxDistance, float stepScale = 1.0f) + void Init(float3 worldPosition, float3 worldDirection, float minDistance, float maxDistance) { WorldPosition = worldPosition; WorldDirection = worldDirection; MinDistance = minDistance; MaxDistance = maxDistance; - StepScale = stepScale; - NeedsHitNormal = false; } }; @@ -75,12 +71,23 @@ void GetGlobalSDFCascadeUV(const GlobalSDFData data, uint cascade, float3 worldP textureUV = float3(((float)cascade + cascadeUV.x) / (float)data.CascadesCount, cascadeUV.y, cascadeUV.z); // Cascades are placed next to each other on X axis } -// Clamps Global SDF cascade UV to ensure it can be sued for gradient sampling (clamps first and last pixels). +void GetGlobalSDFCascadeUV(const GlobalSDFData data, uint cascade, float3 worldPosition, out float3 cascadeUV, out float3 textureUV, out float3 textureMipUV) +{ + float4 cascadePosDistance = data.CascadePosDistance[cascade]; + float3 posInCascade = worldPosition - cascadePosDistance.xyz; + float cascadeSize = cascadePosDistance.w * 2; + cascadeUV = saturate(posInCascade / cascadeSize + 0.5f); + textureUV = float3(((float)cascade + cascadeUV.x) / (float)data.CascadesCount, cascadeUV.y, cascadeUV.z); // Cascades are placed next to each other on X axis + float halfTexelOffsetMip = (GLOBAL_SDF_RASTERIZE_MIP_FACTOR * 0.5f) / data.Resolution; + textureMipUV = textureUV + float3(halfTexelOffsetMip / (float)data.CascadesCount, halfTexelOffsetMip, halfTexelOffsetMip); // Mipmaps are offset by half texel to sample correctly +} + +// Clamps Global SDF cascade UV to ensure it can be used for gradient sampling (clamps first and last pixels). void ClampGlobalSDFTextureGradientUV(const GlobalSDFData data, uint cascade, float texelOffset, inout float3 textureUV) { float cascadeSizeUV = 1.0f / data.CascadesCount; - float cascadeUVStart = cascadeSizeUV * cascade + texelOffset; - float cascadeUVEnd = cascadeUVStart + cascadeSizeUV - texelOffset * 3; + float cascadeUVStart = cascadeSizeUV * cascade + texelOffset * 2; + float cascadeUVEnd = cascadeUVStart + cascadeSizeUV - texelOffset * 4; textureUV.x = clamp(textureUV.x, cascadeUVStart, cascadeUVEnd); } @@ -144,13 +151,13 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D tex, Text startCascade = min(startCascade, data.CascadesCount - 1); for (uint cascade = startCascade; cascade < data.CascadesCount; cascade++) { - float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV); + float3 cascadeUV, textureUV, textureMipUV; + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV, textureMipUV); float voxelSize = data.CascadeVoxelSize[cascade]; float chunkSize = voxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; float chunkMargin = voxelSize * (GLOBAL_SDF_CHUNK_MARGIN_SCALE * GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN); float maxDistanceMip = data.CascadeMaxDistanceMip[cascade]; - float distanceMip = mip.SampleLevel(GLOBAL_SDF_SAMPLER, textureUV, 0); + float distanceMip = mip.SampleLevel(GLOBAL_SDF_SAMPLER, textureMipUV, 0); if (distanceMip < chunkSize && all(cascadeUV > 0) && all(cascadeUV < 1)) { distance = distanceMip * maxDistanceMip; @@ -208,13 +215,13 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D startCascade = min(startCascade, data.CascadesCount - 1); for (uint cascade = startCascade; cascade < data.CascadesCount; cascade++) { - float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV); + float3 cascadeUV, textureUV, textureMipUV; + GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeUV, textureUV, textureMipUV); float voxelSize = data.CascadeVoxelSize[cascade]; float chunkSize = voxelSize * GLOBAL_SDF_RASTERIZE_CHUNK_SIZE; float chunkMargin = voxelSize * (GLOBAL_SDF_CHUNK_MARGIN_SCALE * GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN); float maxDistanceMip = data.CascadeMaxDistanceMip[cascade]; - float distanceMip = mip.SampleLevel(GLOBAL_SDF_SAMPLER, textureUV, 0) * maxDistanceMip; + float distanceMip = mip.SampleLevel(GLOBAL_SDF_SAMPLER, textureMipUV, 0) * maxDistanceMip; if (distanceMip < chunkSize && all(cascadeUV > 0) && all(cascadeUV < 1)) { float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; @@ -236,13 +243,13 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D { distance = distanceMip; float texelOffset = (float)GLOBAL_SDF_RASTERIZE_MIP_FACTOR / data.Resolution; - ClampGlobalSDFTextureGradientUV(data, cascade, texelOffset, textureUV); - float xp = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; - float xn = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x - texelOffset, textureUV.y, textureUV.z), 0).x; - float yp = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x, textureUV.y + texelOffset, textureUV.z), 0).x; - float yn = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x; - float zp = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x; - float zn = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x; + ClampGlobalSDFTextureGradientUV(data, cascade, texelOffset, textureMipUV); + float xp = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureMipUV.x + texelOffset, textureMipUV.y, textureMipUV.z), 0).x; + float xn = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureMipUV.x - texelOffset, textureMipUV.y, textureMipUV.z), 0).x; + float yp = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureMipUV.x, textureMipUV.y + texelOffset, textureMipUV.z), 0).x; + float yn = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureMipUV.x, textureMipUV.y - texelOffset, textureMipUV.z), 0).x; + float zp = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureMipUV.x, textureMipUV.y, textureMipUV.z + texelOffset), 0).x; + float zn = mip.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureMipUV.x, textureMipUV.y, textureMipUV.z - texelOffset), 0).x; gradient = float3(xp - xn, yp - yn, zp - zn) * maxDistanceMip; } break; @@ -290,59 +297,32 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D float maxDistanceTex = data.CascadeMaxDistanceTex[cascade]; float maxDistanceMip = data.CascadeMaxDistanceMip[cascade]; LOOP - for (; step < 250 && stepTime < intersections.y && hit.HitTime < 0.0f; step++) + for (; step < 100 && stepTime < intersections.y && hit.HitTime < 0.0f; step++) { float3 stepPosition = trace.WorldPosition + trace.WorldDirection * stepTime; - float stepScale = trace.StepScale; // Sample SDF - float stepDistance, voxelSizeScale = (float)GLOBAL_SDF_RASTERIZE_MIP_FACTOR; - float3 cascadeUV, textureUV; - GetGlobalSDFCascadeUV(data, cascade, stepPosition, cascadeUV, textureUV); - float distanceMip = mip.SampleLevel(GLOBAL_SDF_SAMPLER, textureUV, 0) * maxDistanceMip; - if (distanceMip < chunkSize) - { - stepDistance = distanceMip; - float distanceTex = tex.SampleLevel(GLOBAL_SDF_SAMPLER, textureUV, 0) * maxDistanceTex; - if (distanceTex < chunkMargin) - { - stepDistance = distanceTex; - voxelSizeScale = 1.0f; - stepScale *= 0.63f; // Perform smaller steps nearby geometry - } - } - else - { - // Assume no SDF nearby so perform a jump to the next chunk - stepDistance = chunkSize; - voxelSizeScale = 1.0f; - } + float stepDistance; + float3 cascadeUV, textureUV, textureMipUV; + GetGlobalSDFCascadeUV(data, cascade, stepPosition, cascadeUV, textureUV, textureMipUV); + stepDistance = min(mip.SampleLevel(GLOBAL_SDF_SAMPLER, textureMipUV, 0) * maxDistanceMip, chunkSize); + float distanceTex = tex.SampleLevel(GLOBAL_SDF_SAMPLER, textureUV, 0) * maxDistanceTex; + FLATTEN + if (distanceTex < chunkMargin) + stepDistance = distanceTex; // Detect surface hit - float minSurfaceThickness = voxelSizeScale * voxelExtent * saturate(stepTime / voxelSize); + float minSurfaceThickness = voxelExtent * saturate(stepTime / voxelSize); if (stepDistance < minSurfaceThickness) { // Surface hit hit.HitTime = max(stepTime + stepDistance - minSurfaceThickness, 0.0f); hit.HitCascade = cascade; hit.HitSDF = stepDistance; - if (trace.NeedsHitNormal) - { - // Calculate hit normal from SDF gradient - float texelOffset = 1.0f / data.Resolution; - ClampGlobalSDFTextureGradientUV(data, cascade, texelOffset, textureUV); - float xp = tex.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x; - float xn = tex.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x - texelOffset, textureUV.y, textureUV.z), 0).x; - float yp = tex.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x, textureUV.y + texelOffset, textureUV.z), 0).x; - float yn = tex.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x; - float zp = tex.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x; - float zn = tex.SampleLevel(GLOBAL_SDF_SAMPLER, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x; - hit.HitNormal = normalize(float3(xp - xn, yp - yn, zp - zn)); - } } // Move forward - stepTime += max(stepDistance * stepScale, voxelSize); + stepTime += max(stepDistance, voxelSize); } hit.StepsCount += step; } diff --git a/Source/Shaders/GlobalSignDistanceField.shader b/Source/Shaders/GlobalSignDistanceField.shader index 461dba08d..fe4bafda5 100644 --- a/Source/Shaders/GlobalSignDistanceField.shader +++ b/Source/Shaders/GlobalSignDistanceField.shader @@ -311,26 +311,39 @@ float4 PS_Debug(Quad_VS2PS input) : SV_Target float3 viewRay = lerp(lerp(ViewFrustumWorldRays[3], ViewFrustumWorldRays[0], input.TexCoord.x), lerp(ViewFrustumWorldRays[2], ViewFrustumWorldRays[1], input.TexCoord.x), 1 - input.TexCoord.y).xyz; viewRay = normalize(viewRay - ViewWorldPos); trace.Init(ViewWorldPos, viewRay, ViewNearPlane, ViewFarPlane); - trace.NeedsHitNormal = true; GlobalSDFHit hit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, trace); // Debug draw - float3 color = saturate(hit.StepsCount / 80.0f).xxx; - if (!hit.IsHit()) - color.rg *= 0.4f; -#if 0 - else - { + float3 color = saturate(hit.StepsCount / 50.0f).xxx; + if (hit.IsHit()) + { +#if 1 + float3 hitPosition = hit.GetHitPosition(trace); + float hitSDF; + float3 hitNormal = SampleGlobalSDFGradient(GlobalSDF, GlobalSDFTex, GlobalSDFMip, hitPosition, hitSDF, hit.HitCascade); +#if 1 + // Composite step count with SDF normals + //color.rgb *= saturate(normalize(hitNormal) * 0.5f + 0.7f) + 0.3f; + color = lerp(normalize(hitNormal) * 0.5f + 0.5f, 1 - color, saturate(hit.StepsCount / 80.0f)); +#else // Debug draw SDF normals - color.rgb = normalize(hit.HitNormal) * 0.5f + 0.5f; - } -#elif 1 + color = normalize(hitNormal) * 0.5f + 0.5f; +#endif +#else + // Heatmap with step count + if (hit.StepsCount > 40) + color = float3(saturate(hit.StepsCount / 80.0f), 0, 0); + else if (hit.StepsCount > 20) + color = float3(saturate(hit.StepsCount / 40.0f).xx, 0); + else + color = float3(0, saturate(hit.StepsCount / 20.0f), 0); +#endif + } else { - // Composite with SDF normals - color.rgb *= saturate(normalize(hit.HitNormal) * 0.5f + 0.7f) + 0.1f; + // Bluish sky + color.rg *= 0.4f; } -#endif return float4(color, 1); } diff --git a/Source/Shaders/Noise.hlsl b/Source/Shaders/Noise.hlsl index dc35f1efc..df5a041fa 100644 --- a/Source/Shaders/Noise.hlsl +++ b/Source/Shaders/Noise.hlsl @@ -54,6 +54,26 @@ float2 PerlinNoiseFade(float2 t) return t * t * t * (t * (t * 6.0 - 15.0) + 10.0); } +// "Next Generation Post Processing in Call of Duty: Advanced Warfare" +// http://advances.realtimerendering.com/s2014/index.html +float InterleavedGradientNoise(float2 uv, uint frameCount) +{ + const float2 magicFrameScale = float2(47, 17) * 0.695; + uv += frameCount * magicFrameScale; + const float3 magic = float3(0.06711056, 0.00583715, 52.9829189); + return frac(magic.z * frac(dot(uv, magic.xy))); +} + +// Removes error from the color to properly store it in lower precision formats (error = 2^(-mantissaBits)) +float3 QuantizeColor(float3 color, float noise, float3 error) +{ + float3 delta = color * error; + delta.x = asfloat(asuint(delta.x) & ~0x007fffff); + delta.y = asfloat(asuint(delta.y) & ~0x007fffff); + delta.z = asfloat(asuint(delta.z) & ~0x007fffff); + return color + delta * noise; +} + float rand2dTo1d(float2 value, float2 dotDir = float2(12.9898, 78.233)) { // https://www.ronja-tutorials.com/post/024-white-noise/ diff --git a/Source/ThirdParty/meshoptimizer/allocator.cpp b/Source/ThirdParty/meshoptimizer/allocator.cpp index 12eda3872..6b6083da2 100644 --- a/Source/ThirdParty/meshoptimizer/allocator.cpp +++ b/Source/ThirdParty/meshoptimizer/allocator.cpp @@ -1,8 +1,17 @@ // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details #include "meshoptimizer.h" -void meshopt_setAllocator(void*(MESHOPTIMIZER_ALLOC_CALLCONV* allocate)(size_t), void(MESHOPTIMIZER_ALLOC_CALLCONV* deallocate)(void*)) +#ifdef MESHOPTIMIZER_ALLOC_EXPORT +meshopt_Allocator::Storage& meshopt_Allocator::storage() { - meshopt_Allocator::Storage::allocate = allocate; - meshopt_Allocator::Storage::deallocate = deallocate; + static Storage s = {::operator new, ::operator delete }; + return s; +} +#endif + +void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV* allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV* deallocate)(void*)) +{ + meshopt_Allocator::Storage& s = meshopt_Allocator::storage(); + s.allocate = allocate; + s.deallocate = deallocate; } diff --git a/Source/ThirdParty/meshoptimizer/clusterizer.cpp b/Source/ThirdParty/meshoptimizer/clusterizer.cpp index 52fe5a362..73cc0ab53 100644 --- a/Source/ThirdParty/meshoptimizer/clusterizer.cpp +++ b/Source/ThirdParty/meshoptimizer/clusterizer.cpp @@ -6,19 +6,39 @@ #include #include +// The block below auto-detects SIMD ISA that can be used on the target platform +#ifndef MESHOPTIMIZER_NO_SIMD +#if defined(__SSE2__) || (defined(_MSC_VER) && defined(_M_X64)) +#define SIMD_SSE +#include +#elif defined(__aarch64__) || (defined(_MSC_VER) && defined(_M_ARM64) && _MSC_VER >= 1922) +#define SIMD_NEON +#include +#endif +#endif // !MESHOPTIMIZER_NO_SIMD + // This work is based on: // Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016 // Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016 // Jack Ritter. An Efficient Bounding Sphere. 1990 +// Thomas Larsson. Fast and Tight Fitting Bounding Spheres. 2008 +// Ingo Wald, Vlastimil Havran. On building fast kd-Trees for Ray Tracing, and on doing that in O(N log N). 2006 namespace meshopt { -// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet -const size_t kMeshletMaxVertices = 255; +// This must be <= 256 since meshlet indices are stored as bytes +const size_t kMeshletMaxVertices = 256; // A reasonable limit is around 2*max_vertices or less const size_t kMeshletMaxTriangles = 512; +// We keep a limited number of seed triangles and add a few triangles per finished meshlet +const size_t kMeshletMaxSeeds = 256; +const size_t kMeshletAddSeeds = 4; + +// To avoid excessive recursion for malformed inputs, we limit the maximum depth of the tree +const int kMeshletMaxTreeDepth = 50; + struct TriangleAdjacency2 { unsigned int* counts; @@ -70,72 +90,190 @@ static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned for (size_t i = 0; i < vertex_count; ++i) { assert(adjacency.offsets[i] >= adjacency.counts[i]); - adjacency.offsets[i] -= adjacency.counts[i]; } } -static void computeBoundingSphere(float result[4], const float points[][3], size_t count) +static void buildTriangleAdjacencySparse(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) { - assert(count > 0); + size_t face_count = index_count / 3; - // find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates - size_t pmin[3] = {0, 0, 0}; - size_t pmax[3] = {0, 0, 0}; + // sparse mode can build adjacency more quickly by ignoring unused vertices, using a bit to mark visited vertices + const unsigned int sparse_seen = 1u << 31; + assert(index_count < sparse_seen); + + // allocate arrays + adjacency.counts = allocator.allocate(vertex_count); + adjacency.offsets = allocator.allocate(vertex_count); + adjacency.data = allocator.allocate(index_count); + + // fill triangle counts + for (size_t i = 0; i < index_count; ++i) + assert(indices[i] < vertex_count); + + for (size_t i = 0; i < index_count; ++i) + adjacency.counts[indices[i]] = 0; + + for (size_t i = 0; i < index_count; ++i) + adjacency.counts[indices[i]]++; + + // fill offset table; uses sparse_seen bit to tag visited vertices + unsigned int offset = 0; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int v = indices[i]; + + if ((adjacency.counts[v] & sparse_seen) == 0) + { + adjacency.offsets[v] = offset; + offset += adjacency.counts[v]; + adjacency.counts[v] |= sparse_seen; + } + } + + assert(offset == index_count); + + // fill triangle data + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + + adjacency.data[adjacency.offsets[a]++] = unsigned(i); + adjacency.data[adjacency.offsets[b]++] = unsigned(i); + adjacency.data[adjacency.offsets[c]++] = unsigned(i); + } + + // fix offsets that have been disturbed by the previous pass + // also fix counts (that were marked with sparse_seen by the first pass) + for (size_t i = 0; i < index_count; ++i) + { + unsigned int v = indices[i]; + + if (adjacency.counts[v] & sparse_seen) + { + adjacency.counts[v] &= ~sparse_seen; + + assert(adjacency.offsets[v] >= adjacency.counts[v]); + adjacency.offsets[v] -= adjacency.counts[v]; + } + } +} + +static void clearUsed(short* used, size_t vertex_count, const unsigned int* indices, size_t index_count) +{ + // for sparse inputs, it's faster to only clear vertices referenced by the index buffer + if (vertex_count <= index_count) + memset(used, -1, vertex_count * sizeof(short)); + else + for (size_t i = 0; i < index_count; ++i) + { + assert(indices[i] < vertex_count); + used[indices[i]] = -1; + } +} + +static void computeBoundingSphere(float result[4], const float* points, size_t count, size_t points_stride, const float* radii, size_t radii_stride, size_t axis_count) +{ + static const float kAxes[7][3] = { + // X, Y, Z + {1, 0, 0}, + {0, 1, 0}, + {0, 0, 1}, + + // XYZ, -XYZ, X-YZ, XY-Z; normalized to unit length + {0.57735026f, 0.57735026f, 0.57735026f}, + {-0.57735026f, 0.57735026f, 0.57735026f}, + {0.57735026f, -0.57735026f, 0.57735026f}, + {0.57735026f, 0.57735026f, -0.57735026f}, + }; + + assert(count > 0); + assert(axis_count <= sizeof(kAxes) / sizeof(kAxes[0])); + + size_t points_stride_float = points_stride / sizeof(float); + size_t radii_stride_float = radii_stride / sizeof(float); + + // find extremum points along all axes; for each axis we get a pair of points with min/max coordinates + size_t pmin[7], pmax[7]; + float tmin[7], tmax[7]; + + for (size_t axis = 0; axis < axis_count; ++axis) + { + pmin[axis] = pmax[axis] = 0; + tmin[axis] = FLT_MAX; + tmax[axis] = -FLT_MAX; + } for (size_t i = 0; i < count; ++i) { - const float* p = points[i]; + const float* p = points + i * points_stride_float; + float r = radii[i * radii_stride_float]; - for (int axis = 0; axis < 3; ++axis) + for (size_t axis = 0; axis < axis_count; ++axis) { - pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis]; - pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis]; + const float* ax = kAxes[axis]; + + float tp = ax[0] * p[0] + ax[1] * p[1] + ax[2] * p[2]; + float tpmin = tp - r, tpmax = tp + r; + + pmin[axis] = (tpmin < tmin[axis]) ? i : pmin[axis]; + pmax[axis] = (tpmax > tmax[axis]) ? i : pmax[axis]; + tmin[axis] = (tpmin < tmin[axis]) ? tpmin : tmin[axis]; + tmax[axis] = (tpmax > tmax[axis]) ? tpmax : tmax[axis]; } } // find the pair of points with largest distance - float paxisd2 = 0; - int paxis = 0; + size_t paxis = 0; + float paxisdr = 0; - for (int axis = 0; axis < 3; ++axis) + for (size_t axis = 0; axis < axis_count; ++axis) { - const float* p1 = points[pmin[axis]]; - const float* p2 = points[pmax[axis]]; + const float* p1 = points + pmin[axis] * points_stride_float; + const float* p2 = points + pmax[axis] * points_stride_float; + float r1 = radii[pmin[axis] * radii_stride_float]; + float r2 = radii[pmax[axis] * radii_stride_float]; float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]); + float dr = sqrtf(d2) + r1 + r2; - if (d2 > paxisd2) + if (dr > paxisdr) { - paxisd2 = d2; + paxisdr = dr; paxis = axis; } } // use the longest segment as the initial sphere diameter - const float* p1 = points[pmin[paxis]]; - const float* p2 = points[pmax[paxis]]; + const float* p1 = points + pmin[paxis] * points_stride_float; + const float* p2 = points + pmax[paxis] * points_stride_float; + float r1 = radii[pmin[paxis] * radii_stride_float]; + float r2 = radii[pmax[paxis] * radii_stride_float]; - float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2}; - float radius = sqrtf(paxisd2) / 2; + float paxisd = sqrtf((p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2])); + float paxisk = paxisd > 0 ? (paxisd + r2 - r1) / (2 * paxisd) : 0.f; + + float center[3] = {p1[0] + (p2[0] - p1[0]) * paxisk, p1[1] + (p2[1] - p1[1]) * paxisk, p1[2] + (p2[2] - p1[2]) * paxisk}; + float radius = paxisdr / 2; // iteratively adjust the sphere up until all points fit for (size_t i = 0; i < count; ++i) { - const float* p = points[i]; + const float* p = points + i * points_stride_float; + float r = radii[i * radii_stride_float]; + float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]); + float d = sqrtf(d2); - if (d2 > radius * radius) + if (d + r > radius) { - float d = sqrtf(d2); - assert(d > 0); + float k = d > 0 ? (d + r - radius) / (2 * d) : 0.f; - float k = 0.5f + (radius / d) / 2; - - center[0] = center[0] * k + p[0] * (1 - k); - center[1] = center[1] * k + p[1] * (1 - k); - center[2] = center[2] * k + p[2] * (1 - k); - radius = (radius + d) / 2; + center[0] += k * (p[0] - center[0]); + center[1] += k * (p[1] - center[1]); + center[2] += k * (p[2] - center[2]); + radius = (radius + d + r) / 2; } } @@ -151,12 +289,12 @@ struct Cone float nx, ny, nz; }; -static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius) +static float getMeshletScore(float distance, float spread, float cone_weight, float expected_radius) { float cone = 1.f - spread * cone_weight; float cone_clamped = cone < 1e-3f ? 1e-3f : cone; - return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped; + return (1 + distance / expected_radius * (1 - cone_weight)) * cone_clamped; } static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count) @@ -221,72 +359,61 @@ static float computeTriangleCones(Cone* triangles, const unsigned int* indices, return mesh_area; } -static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles) +static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, short* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles, bool split = false) { - size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3; - - // fill 4b padding with 0 - while (offset & 3) - meshlet_triangles[offset++] = 0; -} - -static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles) -{ - unsigned char& av = used[a]; - unsigned char& bv = used[b]; - unsigned char& cv = used[c]; + short& av = used[a]; + short& bv = used[b]; + short& cv = used[c]; bool result = false; - unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff); + int used_extra = (av < 0) + (bv < 0) + (cv < 0); - if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles) + if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles || split) { meshlets[meshlet_offset] = meshlet; for (size_t j = 0; j < meshlet.vertex_count; ++j) - used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff; - - finishMeshlet(meshlet, meshlet_triangles); + used[meshlet_vertices[meshlet.vertex_offset + j]] = -1; meshlet.vertex_offset += meshlet.vertex_count; - meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding + meshlet.triangle_offset += meshlet.triangle_count * 3; meshlet.vertex_count = 0; meshlet.triangle_count = 0; result = true; } - if (av == 0xff) + if (av < 0) { - av = (unsigned char)meshlet.vertex_count; + av = short(meshlet.vertex_count); meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a; } - if (bv == 0xff) + if (bv < 0) { - bv = (unsigned char)meshlet.vertex_count; + bv = short(meshlet.vertex_count); meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b; } - if (cv == 0xff) + if (cv < 0) { - cv = (unsigned char)meshlet.vertex_count; + cv = short(meshlet.vertex_count); meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c; } - meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av; - meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv; - meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv; + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = (unsigned char)av; + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = (unsigned char)bv; + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = (unsigned char)cv; meshlet.triangle_count++; return result; } -static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Cone* meshlet_cone, unsigned int* meshlet_vertices, const unsigned int* indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int* live_triangles, const unsigned char* used, float meshlet_expected_radius, float cone_weight, unsigned int* out_extra) +static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Cone& meshlet_cone, const unsigned int* meshlet_vertices, const unsigned int* indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int* live_triangles, const short* used, float meshlet_expected_radius, float cone_weight) { unsigned int best_triangle = ~0u; - unsigned int best_extra = 5; + int best_priority = 5; float best_score = FLT_MAX; for (size_t i = 0; i < meshlet.vertex_count; ++i) @@ -301,61 +428,159 @@ static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Co unsigned int triangle = neighbors[j]; unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; - unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff); + int extra = (used[a] < 0) + (used[b] < 0) + (used[c] < 0); + assert(extra <= 2); + + int priority = -1; // triangles that don't add new vertices to meshlets are max. priority - if (extra != 0) - { - // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets - if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1) - extra = 0; - - extra++; - } + if (extra == 0) + priority = 0; + // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets + else if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1) + priority = 1; + // if two vertices have live count of 2, removing this triangle will make another triangle dangling which is good for overall flow + else if ((live_triangles[a] == 2) + (live_triangles[b] == 2) + (live_triangles[c] == 2) >= 2) + priority = 1 + extra; + // otherwise adjust priority to be after the above cases, 3 or 4 based on used[] count + else + priority = 2 + extra; // since topology-based priority is always more important than the score, we can skip scoring in some cases - if (extra > best_extra) + if (priority > best_priority) continue; - float score = 0; + const Cone& tri_cone = triangles[triangle]; - // caller selects one of two scoring functions: geometrical (based on meshlet cone) or topological (based on remaining triangles) - if (meshlet_cone) - { - const Cone& tri_cone = triangles[triangle]; + float dx = tri_cone.px - meshlet_cone.px, dy = tri_cone.py - meshlet_cone.py, dz = tri_cone.pz - meshlet_cone.pz; + float distance = sqrtf(dx * dx + dy * dy + dz * dz); + float spread = tri_cone.nx * meshlet_cone.nx + tri_cone.ny * meshlet_cone.ny + tri_cone.nz * meshlet_cone.nz; - float distance2 = - (tri_cone.px - meshlet_cone->px) * (tri_cone.px - meshlet_cone->px) + - (tri_cone.py - meshlet_cone->py) * (tri_cone.py - meshlet_cone->py) + - (tri_cone.pz - meshlet_cone->pz) * (tri_cone.pz - meshlet_cone->pz); - - float spread = tri_cone.nx * meshlet_cone->nx + tri_cone.ny * meshlet_cone->ny + tri_cone.nz * meshlet_cone->nz; - - score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius); - } - else - { - // each live_triangles entry is >= 1 since it includes the current triangle we're processing - score = float(live_triangles[a] + live_triangles[b] + live_triangles[c] - 3); - } + float score = getMeshletScore(distance, spread, cone_weight, meshlet_expected_radius); // note that topology-based priority is always more important than the score // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost - if (extra < best_extra || score < best_score) + if (priority < best_priority || score < best_score) { best_triangle = triangle; - best_extra = extra; + best_priority = priority; best_score = score; } } } - if (out_extra) - *out_extra = best_extra; - return best_triangle; } +static size_t appendSeedTriangles(unsigned int* seeds, const meshopt_Meshlet& meshlet, const unsigned int* meshlet_vertices, const unsigned int* indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int* live_triangles, float cornerx, float cornery, float cornerz) +{ + unsigned int best_seeds[kMeshletAddSeeds]; + unsigned int best_live[kMeshletAddSeeds]; + float best_score[kMeshletAddSeeds]; + + for (size_t i = 0; i < kMeshletAddSeeds; ++i) + { + best_seeds[i] = ~0u; + best_live[i] = ~0u; + best_score[i] = FLT_MAX; + } + + for (size_t i = 0; i < meshlet.vertex_count; ++i) + { + unsigned int index = meshlet_vertices[meshlet.vertex_offset + i]; + + unsigned int best_neighbor = ~0u; + unsigned int best_neighbor_live = ~0u; + + // find the neighbor with the smallest live metric + unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbors_size = adjacency.counts[index]; + + for (size_t j = 0; j < neighbors_size; ++j) + { + unsigned int triangle = neighbors[j]; + unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; + + unsigned int live = live_triangles[a] + live_triangles[b] + live_triangles[c]; + + if (live < best_neighbor_live) + { + best_neighbor = triangle; + best_neighbor_live = live; + } + } + + // add the neighbor to the list of seeds; the list is unsorted and the replacement criteria is approximate + if (best_neighbor == ~0u) + continue; + + float dx = triangles[best_neighbor].px - cornerx, dy = triangles[best_neighbor].py - cornery, dz = triangles[best_neighbor].pz - cornerz; + float best_neighbor_score = sqrtf(dx * dx + dy * dy + dz * dz); + + for (size_t j = 0; j < kMeshletAddSeeds; ++j) + { + // non-strict comparison reduces the number of duplicate seeds (triangles adjacent to multiple vertices) + if (best_neighbor_live < best_live[j] || (best_neighbor_live == best_live[j] && best_neighbor_score <= best_score[j])) + { + best_seeds[j] = best_neighbor; + best_live[j] = best_neighbor_live; + best_score[j] = best_neighbor_score; + break; + } + } + } + + // add surviving seeds to the meshlet + size_t seed_count = 0; + + for (size_t i = 0; i < kMeshletAddSeeds; ++i) + if (best_seeds[i] != ~0u) + seeds[seed_count++] = best_seeds[i]; + + return seed_count; +} + +static size_t pruneSeedTriangles(unsigned int* seeds, size_t seed_count, const unsigned char* emitted_flags) +{ + size_t result = 0; + + for (size_t i = 0; i < seed_count; ++i) + { + unsigned int index = seeds[i]; + + seeds[result] = index; + result += emitted_flags[index] == 0; + } + + return result; +} + +static unsigned int selectSeedTriangle(const unsigned int* seeds, size_t seed_count, const unsigned int* indices, const Cone* triangles, const unsigned int* live_triangles, float cornerx, float cornery, float cornerz) +{ + unsigned int best_seed = ~0u; + unsigned int best_live = ~0u; + float best_score = FLT_MAX; + + for (size_t i = 0; i < seed_count; ++i) + { + unsigned int index = seeds[i]; + unsigned int a = indices[index * 3 + 0], b = indices[index * 3 + 1], c = indices[index * 3 + 2]; + + unsigned int live = live_triangles[a] + live_triangles[b] + live_triangles[c]; + float dx = triangles[index].px - cornerx, dy = triangles[index].py - cornery, dz = triangles[index].pz - cornerz; + float score = sqrtf(dx * dx + dy * dy + dz * dz); + + if (live < best_live || (live == best_live && score < best_score)) + { + best_seed = index; + best_live = live; + best_score = score; + } + } + + return best_seed; +} + struct KDNode { union @@ -364,13 +589,13 @@ struct KDNode unsigned int index; }; - // leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point) + // leaves: axis = 3, children = number of points including this one // branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children unsigned int axis : 2; unsigned int children : 30; }; -static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot) +static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, int axis, float pivot) { size_t m = 0; @@ -400,7 +625,7 @@ static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, u result.index = indices[0]; result.axis = 3; - result.children = unsigned(count - 1); + result.children = unsigned(count); // all remaining points are stored in nodes immediately following the leaf for (size_t i = 1; i < count; ++i) @@ -415,7 +640,7 @@ static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, u return offset + count; } -static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size) +static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size, int depth) { assert(count > 0); assert(offset < node_count); @@ -441,13 +666,14 @@ static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const } // split axis is one where the variance is largest - unsigned int axis = (vars[0] >= vars[1] && vars[0] >= vars[2]) ? 0 : (vars[1] >= vars[2] ? 1 : 2); + int axis = (vars[0] >= vars[1] && vars[0] >= vars[2]) ? 0 : (vars[1] >= vars[2] ? 1 : 2); float split = mean[axis]; size_t middle = kdtreePartition(indices, count, points, stride, axis, split); // when the partition is degenerate simply consolidate the points into a single node - if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2) + // this also ensures recursion depth is bounded on pathological inputs + if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2 || depth >= kMeshletMaxTreeDepth) return kdtreeBuildLeaf(offset, nodes, node_count, indices, count); KDNode& result = nodes[offset]; @@ -456,35 +682,40 @@ static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const result.axis = axis; // left subtree is right after our node - size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size); + size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size, depth + 1); // distance to the right subtree is represented explicitly + assert(next_offset - offset > 1); result.children = unsigned(next_offset - offset - 1); - return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size); + return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size, depth + 1); } static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit) { const KDNode& node = nodes[root]; + if (node.children == 0) + return; + if (node.axis == 3) { // leaf - for (unsigned int i = 0; i <= node.children; ++i) + bool inactive = true; + + for (unsigned int i = 0; i < node.children; ++i) { unsigned int index = nodes[root + i].index; if (emitted_flags[index]) continue; + inactive = false; + const float* point = points + index * stride; - float distance2 = - (point[0] - position[0]) * (point[0] - position[0]) + - (point[1] - position[1]) * (point[1] - position[1]) + - (point[2] - position[2]) * (point[2] - position[2]); - float distance = sqrtf(distance2); + float dx = point[0] - position[0], dy = point[1] - position[1], dz = point[2] - position[2]; + float distance = sqrtf(dx * dx + dy * dy + dz * dz); if (distance < limit) { @@ -492,6 +723,10 @@ static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, limit = distance; } } + + // deactivate leaves that no longer have items to emit + if (inactive) + nodes[root].children = 0; } else { @@ -500,6 +735,12 @@ static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, unsigned int first = (delta <= 0) ? 0 : node.children; unsigned int second = first ^ node.children; + // deactivate branches that no longer have items to emit to accelerate traversal + // note that we do this *before* recursing which delays deactivation but keeps tail calls + if ((nodes[root + 1 + first].children | nodes[root + 1 + second].children) == 0) + nodes[root].children = 0; + + // recursion depth is bounded by tree depth (which is limited by construction) kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit); // only process the other node if it can have a match based on closest distance so far @@ -508,6 +749,380 @@ static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, } } +struct BVHBoxT +{ + float min[4]; + float max[4]; +}; + +struct BVHBox +{ + float min[3]; + float max[3]; +}; + +#if defined(SIMD_SSE) +static float boxMerge(BVHBoxT& box, const BVHBox& other) +{ + __m128 min = _mm_loadu_ps(box.min); + __m128 max = _mm_loadu_ps(box.max); + + // note: over-read is safe because BVHBox array is allocated with padding + min = _mm_min_ps(min, _mm_loadu_ps(other.min)); + max = _mm_max_ps(max, _mm_loadu_ps(other.max)); + + _mm_storeu_ps(box.min, min); + _mm_storeu_ps(box.max, max); + + __m128 size = _mm_sub_ps(max, min); + __m128 size_yzx = _mm_shuffle_ps(size, size, _MM_SHUFFLE(0, 0, 2, 1)); + __m128 mul = _mm_mul_ps(size, size_yzx); + __m128 sum_xy = _mm_add_ss(mul, _mm_shuffle_ps(mul, mul, _MM_SHUFFLE(1, 1, 1, 1))); + __m128 sum_xyz = _mm_add_ss(sum_xy, _mm_shuffle_ps(mul, mul, _MM_SHUFFLE(2, 2, 2, 2))); + + return _mm_cvtss_f32(sum_xyz); +} +#elif defined(SIMD_NEON) +static float boxMerge(BVHBoxT& box, const BVHBox& other) +{ + float32x4_t min = vld1q_f32(box.min); + float32x4_t max = vld1q_f32(box.max); + + // note: over-read is safe because BVHBox array is allocated with padding + min = vminq_f32(min, vld1q_f32(other.min)); + max = vmaxq_f32(max, vld1q_f32(other.max)); + + vst1q_f32(box.min, min); + vst1q_f32(box.max, max); + + float32x4_t size = vsubq_f32(max, min); + float32x4_t size_yzx = vextq_f32(vextq_f32(size, size, 3), size, 2); + float32x4_t mul = vmulq_f32(size, size_yzx); + float sum_xy = vgetq_lane_f32(mul, 0) + vgetq_lane_f32(mul, 1); + float sum_xyz = sum_xy + vgetq_lane_f32(mul, 2); + + return sum_xyz; +} +#else +static float boxMerge(BVHBoxT& box, const BVHBox& other) +{ + for (int k = 0; k < 3; ++k) + { + box.min[k] = other.min[k] < box.min[k] ? other.min[k] : box.min[k]; + box.max[k] = other.max[k] > box.max[k] ? other.max[k] : box.max[k]; + } + + float sx = box.max[0] - box.min[0], sy = box.max[1] - box.min[1], sz = box.max[2] - box.min[2]; + return sx * sy + sx * sz + sy * sz; +} +#endif + +inline unsigned int radixFloat(unsigned int v) +{ + // if sign bit is 0, flip sign bit + // if sign bit is 1, flip everything + unsigned int mask = (int(v) >> 31) | 0x80000000; + return v ^ mask; +} + +static void computeHistogram(unsigned int (&hist)[1024][3], const float* data, size_t count) +{ + memset(hist, 0, sizeof(hist)); + + const unsigned int* bits = reinterpret_cast(data); + + // compute 3 10-bit histograms in parallel (dropping 2 LSB) + for (size_t i = 0; i < count; ++i) + { + unsigned int id = radixFloat(bits[i]); + + hist[(id >> 2) & 1023][0]++; + hist[(id >> 12) & 1023][1]++; + hist[(id >> 22) & 1023][2]++; + } + + unsigned int sum0 = 0, sum1 = 0, sum2 = 0; + + // replace histogram data with prefix histogram sums in-place + for (int i = 0; i < 1024; ++i) + { + unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2]; + + hist[i][0] = sum0; + hist[i][1] = sum1; + hist[i][2] = sum2; + + sum0 += hx; + sum1 += hy; + sum2 += hz; + } + + assert(sum0 == count && sum1 == count && sum2 == count); +} + +static void radixPass(unsigned int* destination, const unsigned int* source, const float* keys, size_t count, unsigned int (&hist)[1024][3], int pass) +{ + const unsigned int* bits = reinterpret_cast(keys); + int bitoff = pass * 10 + 2; // drop 2 LSB to be able to use 3 10-bit passes + + for (size_t i = 0; i < count; ++i) + { + unsigned int id = (radixFloat(bits[source[i]]) >> bitoff) & 1023; + + destination[hist[id][pass]++] = source[i]; + } +} + +static void bvhPrepare(BVHBox* boxes, float* centroids, const unsigned int* indices, size_t face_count, const float* vertex_positions, size_t vertex_count, size_t vertex_stride_float) +{ + (void)vertex_count; + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + const float* va = vertex_positions + vertex_stride_float * a; + const float* vb = vertex_positions + vertex_stride_float * b; + const float* vc = vertex_positions + vertex_stride_float * c; + + BVHBox& box = boxes[i]; + + for (int k = 0; k < 3; ++k) + { + box.min[k] = va[k] < vb[k] ? va[k] : vb[k]; + box.min[k] = vc[k] < box.min[k] ? vc[k] : box.min[k]; + + box.max[k] = va[k] > vb[k] ? va[k] : vb[k]; + box.max[k] = vc[k] > box.max[k] ? vc[k] : box.max[k]; + + centroids[i + face_count * k] = (box.min[k] + box.max[k]) / 2.f; + } + } +} + +static size_t bvhCountVertices(const unsigned int* order, size_t count, short* used, const unsigned int* indices, unsigned int* out = NULL) +{ + // count number of unique vertices + size_t used_vertices = 0; + for (size_t i = 0; i < count; ++i) + { + unsigned int index = order[i]; + unsigned int a = indices[index * 3 + 0], b = indices[index * 3 + 1], c = indices[index * 3 + 2]; + + used_vertices += (used[a] < 0) + (used[b] < 0) + (used[c] < 0); + used[a] = used[b] = used[c] = 1; + + if (out) + out[i] = unsigned(used_vertices); + } + + // reset used[] for future invocations + for (size_t i = 0; i < count; ++i) + { + unsigned int index = order[i]; + unsigned int a = indices[index * 3 + 0], b = indices[index * 3 + 1], c = indices[index * 3 + 2]; + + used[a] = used[b] = used[c] = -1; + } + + return used_vertices; +} + +static void bvhPackLeaf(unsigned char* boundary, size_t count) +{ + // mark meshlet boundary for future reassembly + assert(count > 0); + + boundary[0] = 1; + memset(boundary + 1, 0, count - 1); +} + +static void bvhPackTail(unsigned char* boundary, const unsigned int* order, size_t count, short* used, const unsigned int* indices, size_t max_vertices, size_t max_triangles) +{ + for (size_t i = 0; i < count;) + { + size_t chunk = i + max_triangles <= count ? max_triangles : count - i; + + if (bvhCountVertices(order + i, chunk, used, indices) <= max_vertices) + { + bvhPackLeaf(boundary + i, chunk); + i += chunk; + continue; + } + + // chunk is vertex bound, split it into smaller meshlets + assert(chunk > max_vertices / 3); + + bvhPackLeaf(boundary + i, max_vertices / 3); + i += max_vertices / 3; + } +} + +static bool bvhDivisible(size_t count, size_t min, size_t max) +{ + // count is representable as a sum of values in [min..max] if if it in range of [k*min..k*min+k*(max-min)] + // equivalent to ceil(count / max) <= floor(count / min), but the form below allows using idiv (see nv_cluster_builder) + // we avoid expensive integer divisions in the common case where min is <= max/2 + return min * 2 <= max ? count >= min : count % min <= (count / min) * (max - min); +} + +static void bvhComputeArea(float* areas, const BVHBox* boxes, const unsigned int* order, size_t count) +{ + BVHBoxT accuml = {{FLT_MAX, FLT_MAX, FLT_MAX, 0}, {-FLT_MAX, -FLT_MAX, -FLT_MAX, 0}}; + BVHBoxT accumr = accuml; + + for (size_t i = 0; i < count; ++i) + { + float larea = boxMerge(accuml, boxes[order[i]]); + float rarea = boxMerge(accumr, boxes[order[count - 1 - i]]); + + areas[i] = larea; + areas[i + count] = rarea; + } +} + +static size_t bvhPivot(const float* areas, const unsigned int* vertices, size_t count, size_t step, size_t min, size_t max, float fill, size_t maxfill, float* out_cost) +{ + bool aligned = count >= min * 2 && bvhDivisible(count, min, max); + size_t end = aligned ? count - min : count - 1; + + float rmaxfill = 1.f / float(int(maxfill)); + + // find best split that minimizes SAH + size_t bestsplit = 0; + float bestcost = FLT_MAX; + + for (size_t i = min - 1; i < end; i += step) + { + size_t lsplit = i + 1, rsplit = count - (i + 1); + + if (!bvhDivisible(lsplit, min, max)) + continue; + if (aligned && !bvhDivisible(rsplit, min, max)) + continue; + + // areas[x] = inclusive surface area of boxes[0..x] + // areas[count-1-x] = inclusive surface area of boxes[x..count-1] + float larea = areas[i], rarea = areas[(count - 1 - (i + 1)) + count]; + float cost = larea * float(int(lsplit)) + rarea * float(int(rsplit)); + + if (cost > bestcost) + continue; + + // use vertex fill when splitting vertex limited clusters; note that we use the same (left->right) vertex count + // using bidirectional vertex counts is a little more expensive to compute and produces slightly worse results in practice + size_t lfill = vertices ? vertices[i] : lsplit; + size_t rfill = vertices ? vertices[i] : rsplit; + + // fill cost; use floating point math to round up to maxfill to avoid expensive integer modulo + int lrest = int(float(int(lfill + maxfill - 1)) * rmaxfill) * int(maxfill) - int(lfill); + int rrest = int(float(int(rfill + maxfill - 1)) * rmaxfill) * int(maxfill) - int(rfill); + + cost += fill * (float(lrest) * larea + float(rrest) * rarea); + + if (cost < bestcost) + { + bestcost = cost; + bestsplit = i + 1; + } + } + + *out_cost = bestcost; + return bestsplit; +} + +static void bvhPartition(unsigned int* target, const unsigned int* order, const unsigned char* sides, size_t split, size_t count) +{ + size_t l = 0, r = split; + + for (size_t i = 0; i < count; ++i) + { + unsigned char side = sides[order[i]]; + target[side ? r : l] = order[i]; + l += 1; + l -= side; + r += side; + } + + assert(l == split && r == count); +} + +static void bvhSplit(const BVHBox* boxes, unsigned int* orderx, unsigned int* ordery, unsigned int* orderz, unsigned char* boundary, size_t count, int depth, void* scratch, short* used, const unsigned int* indices, size_t max_vertices, size_t min_triangles, size_t max_triangles, float fill_weight) +{ + if (count <= max_triangles && bvhCountVertices(orderx, count, used, indices) <= max_vertices) + return bvhPackLeaf(boundary, count); + + unsigned int* axes[3] = {orderx, ordery, orderz}; + + // we can use step=1 unconditionally but to reduce the cost for min=max case we use step=max + size_t step = min_triangles == max_triangles && count > max_triangles ? max_triangles : 1; + + // if we could not pack the meshlet, we must be vertex bound + size_t mint = count <= max_triangles && max_vertices / 3 < min_triangles ? max_vertices / 3 : min_triangles; + size_t maxfill = count <= max_triangles ? max_vertices : max_triangles; + + // find best split that minimizes SAH + int bestk = -1; + size_t bestsplit = 0; + float bestcost = FLT_MAX; + + for (int k = 0; k < 3; ++k) + { + float* areas = static_cast(scratch); + unsigned int* vertices = NULL; + + bvhComputeArea(areas, boxes, axes[k], count); + + if (count <= max_triangles) + { + // for vertex bound clusters, count number of unique vertices for each split + vertices = reinterpret_cast(areas + 2 * count); + bvhCountVertices(axes[k], count, used, indices, vertices); + } + + float axiscost = FLT_MAX; + size_t axissplit = bvhPivot(areas, vertices, count, step, mint, max_triangles, fill_weight, maxfill, &axiscost); + + if (axissplit && axiscost < bestcost) + { + bestk = k; + bestcost = axiscost; + bestsplit = axissplit; + } + } + + // this may happen if SAH costs along the admissible splits are NaN, or due to imbalanced splits on pathological inputs + if (bestk < 0 || depth >= kMeshletMaxTreeDepth) + return bvhPackTail(boundary, orderx, count, used, indices, max_vertices, max_triangles); + + // mark sides of split for partitioning + unsigned char* sides = static_cast(scratch) + count * sizeof(unsigned int); + + for (size_t i = 0; i < bestsplit; ++i) + sides[axes[bestk][i]] = 0; + + for (size_t i = bestsplit; i < count; ++i) + sides[axes[bestk][i]] = 1; + + // partition all axes into two sides, maintaining order + unsigned int* temp = static_cast(scratch); + + for (int k = 0; k < 3; ++k) + { + if (k == bestk) + continue; + + unsigned int* axis = axes[k]; + memcpy(temp, axis, sizeof(unsigned int) * count); + bvhPartition(axis, temp, sides, bestsplit, count); + } + + // recursion depth is bounded due to max depth check above + bvhSplit(boxes, orderx, ordery, orderz, boundary, bestsplit, depth + 1, scratch, used, indices, max_vertices, min_triangles, max_triangles, fill_weight); + bvhSplit(boxes, orderx + bestsplit, ordery + bestsplit, orderz + bestsplit, boundary + bestsplit, count - bestsplit, depth + 1, scratch, used, indices, max_vertices, min_triangles, max_triangles, fill_weight); +} + } // namespace meshopt size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles) @@ -517,7 +1132,6 @@ size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_ assert(index_count % 3 == 0); assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); - assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned (void)kMeshletMaxVertices; (void)kMeshletMaxTriangles; @@ -532,7 +1146,7 @@ size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_ return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles; } -size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) +size_t meshopt_buildMeshletsFlex(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float cone_weight, float split_factor) { using namespace meshopt; @@ -541,18 +1155,24 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve assert(vertex_positions_stride % sizeof(float) == 0); assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); - assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); - assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned + assert(min_triangles >= 1 && min_triangles <= max_triangles && max_triangles <= kMeshletMaxTriangles); assert(cone_weight >= 0 && cone_weight <= 1); + assert(split_factor >= 0); + + if (index_count == 0) + return 0; meshopt_Allocator allocator; TriangleAdjacency2 adjacency = {}; - buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); + if (vertex_count > index_count && index_count < (1u << 31)) + buildTriangleAdjacencySparse(adjacency, indices, index_count, vertex_count, allocator); + else + buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); - unsigned int* live_triangles = allocator.allocate(vertex_count); - memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); + // live triangle counts; note, we alias adjacency.counts as we remove triangles after emitting them so the counts always match + unsigned int* live_triangles = adjacency.counts; size_t face_count = index_count / 3; @@ -573,11 +1193,45 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve kdindices[i] = unsigned(i); KDNode* nodes = allocator.allocate(face_count * 2); - kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8); + kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8, 0); - // index of the vertex in the meshlet, 0xff if the vertex isn't used - unsigned char* used = allocator.allocate(vertex_count); - memset(used, -1, vertex_count); + // find a specific corner of the mesh to use as a starting point for meshlet flow + float cornerx = FLT_MAX, cornery = FLT_MAX, cornerz = FLT_MAX; + + for (size_t i = 0; i < face_count; ++i) + { + const Cone& tri = triangles[i]; + + cornerx = cornerx > tri.px ? tri.px : cornerx; + cornery = cornery > tri.py ? tri.py : cornery; + cornerz = cornerz > tri.pz ? tri.pz : cornerz; + } + + // index of the vertex in the meshlet, -1 if the vertex isn't used + short* used = allocator.allocate(vertex_count); + clearUsed(used, vertex_count, indices, index_count); + + // initial seed triangle is the one closest to the corner + unsigned int initial_seed = ~0u; + float initial_score = FLT_MAX; + + for (size_t i = 0; i < face_count; ++i) + { + const Cone& tri = triangles[i]; + + float dx = tri.px - cornerx, dy = tri.py - cornery, dz = tri.pz - cornerz; + float score = sqrtf(dx * dx + dy * dy + dz * dz); + + if (initial_seed == ~0u || score < initial_score) + { + initial_seed = unsigned(i); + initial_score = score; + } + } + + // seed triangles to continue meshlet flow + unsigned int seeds[kMeshletMaxSeeds] = {}; + size_t seed_count = 0; meshopt_Meshlet meshlet = {}; size_t meshlet_offset = 0; @@ -588,46 +1242,61 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve { Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count); - unsigned int best_extra = 0; - unsigned int best_triangle = getNeighborTriangle(meshlet, &meshlet_cone, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, cone_weight, &best_extra); + unsigned int best_triangle = ~0u; - // if the best triangle doesn't fit into current meshlet, the spatial scoring we've used is not very meaningful, so we re-select using topological scoring - if (best_triangle != ~0u && (meshlet.vertex_count + best_extra > max_vertices || meshlet.triangle_count >= max_triangles)) - { - best_triangle = getNeighborTriangle(meshlet, NULL, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, 0.f, NULL); - } + // for the first triangle, we don't have a meshlet cone yet, so we use the initial seed + // to continue the meshlet, we select an adjacent triangle based on connectivity and spatial scoring + if (meshlet_offset == 0 && meshlet.triangle_count == 0) + best_triangle = initial_seed; + else + best_triangle = getNeighborTriangle(meshlet, meshlet_cone, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, cone_weight); - // when we run out of neighboring triangles we need to switch to spatial search; we currently just pick the closest triangle irrespective of connectivity + bool split = false; + + // when we run out of adjacent triangles we need to switch to spatial search; we currently just pick the closest triangle irrespective of connectivity if (best_triangle == ~0u) { float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz}; unsigned int index = ~0u; - float limit = FLT_MAX; + float distance = FLT_MAX; - kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit); + kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, distance); best_triangle = index; + split = meshlet.triangle_count >= min_triangles && split_factor > 0 && distance > meshlet_expected_radius * split_factor; } if (best_triangle == ~0u) break; + int best_extra = (used[indices[best_triangle * 3 + 0]] < 0) + (used[indices[best_triangle * 3 + 1]] < 0) + (used[indices[best_triangle * 3 + 2]] < 0); + + // if the best triangle doesn't fit into current meshlet, we re-select using seeds to maintain global flow + if (split || (meshlet.vertex_count + best_extra > max_vertices || meshlet.triangle_count >= max_triangles)) + { + seed_count = pruneSeedTriangles(seeds, seed_count, emitted_flags); + seed_count = (seed_count + kMeshletAddSeeds <= kMeshletMaxSeeds) ? seed_count : kMeshletMaxSeeds - kMeshletAddSeeds; + seed_count += appendSeedTriangles(seeds + seed_count, meshlet, meshlet_vertices, indices, adjacency, triangles, live_triangles, cornerx, cornery, cornerz); + + unsigned int best_seed = selectSeedTriangle(seeds, seed_count, indices, triangles, live_triangles, cornerx, cornery, cornerz); + + // we may not find a valid seed triangle if the mesh is disconnected as seeds are based on adjacency + best_triangle = best_seed != ~0u ? best_seed : best_triangle; + } + unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2]; assert(a < vertex_count && b < vertex_count && c < vertex_count); // add meshlet to the output; when the current meshlet is full we reset the accumulated bounds - if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles)) + if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles, split)) { meshlet_offset++; memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc)); } - live_triangles[a]--; - live_triangles[b]--; - live_triangles[c]--; - // remove emitted triangle from adjacency data // this makes sure that we spend less time traversing these lists on subsequent iterations + // live triangle counts are updated as a byproduct of these adjustments for (size_t k = 0; k < 3; ++k) { unsigned int index = indices[best_triangle * 3 + k]; @@ -656,20 +1325,23 @@ size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_ve meshlet_cone_acc.ny += triangles[best_triangle].ny; meshlet_cone_acc.nz += triangles[best_triangle].nz; + assert(!emitted_flags[best_triangle]); emitted_flags[best_triangle] = 1; } if (meshlet.triangle_count) - { - finishMeshlet(meshlet, meshlet_triangles); - meshlets[meshlet_offset++] = meshlet; - } - assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); + assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, min_triangles)); + assert(meshlet.triangle_offset + meshlet.triangle_count * 3 <= index_count && meshlet.vertex_offset + meshlet.vertex_count <= index_count); return meshlet_offset; } +size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) +{ + return meshopt_buildMeshletsFlex(meshlets, meshlet_vertices, meshlet_triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, max_triangles, cone_weight, 0.0f); +} + size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) { using namespace meshopt; @@ -678,13 +1350,12 @@ size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshle assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); - assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned meshopt_Allocator allocator; - // index of the vertex in the meshlet, 0xff if the vertex isn't used - unsigned char* used = allocator.allocate(vertex_count); - memset(used, -1, vertex_count); + // index of the vertex in the meshlet, -1 if the vertex isn't used + short* used = allocator.allocate(vertex_count); + clearUsed(used, vertex_count, indices, index_count); meshopt_Meshlet meshlet = {}; size_t meshlet_offset = 0; @@ -699,13 +1370,109 @@ size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshle } if (meshlet.triangle_count) - { - finishMeshlet(meshlet, meshlet_triangles); - meshlets[meshlet_offset++] = meshlet; - } assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); + assert(meshlet.triangle_offset + meshlet.triangle_count * 3 <= index_count && meshlet.vertex_offset + meshlet.vertex_count <= index_count); + return meshlet_offset; +} + +size_t meshopt_buildMeshletsSpatial(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float fill_weight) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); + assert(min_triangles >= 1 && min_triangles <= max_triangles && max_triangles <= kMeshletMaxTriangles); + + if (index_count == 0) + return 0; + + size_t face_count = index_count / 3; + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + meshopt_Allocator allocator; + + // 3 floats plus 1 uint for sorting, or + // 2 floats plus 1 uint for pivoting, or + // 1 uint plus 1 byte for partitioning + float* scratch = allocator.allocate(face_count * 4); + + // compute bounding boxes and centroids for sorting + BVHBox* boxes = allocator.allocate(face_count + 1); // padding for SIMD + bvhPrepare(boxes, scratch, indices, face_count, vertex_positions, vertex_count, vertex_stride_float); + memset(boxes + face_count, 0, sizeof(BVHBox)); + + unsigned int* axes = allocator.allocate(face_count * 3); + unsigned int* temp = reinterpret_cast(scratch) + face_count * 3; + + for (int k = 0; k < 3; ++k) + { + unsigned int* order = axes + k * face_count; + const float* keys = scratch + k * face_count; + + unsigned int hist[1024][3]; + computeHistogram(hist, keys, face_count); + + // 3-pass radix sort computes the resulting order into axes + for (size_t i = 0; i < face_count; ++i) + temp[i] = unsigned(i); + + radixPass(order, temp, keys, face_count, hist, 0); + radixPass(temp, order, keys, face_count, hist, 1); + radixPass(order, temp, keys, face_count, hist, 2); + } + + // index of the vertex in the meshlet, -1 if the vertex isn't used + short* used = allocator.allocate(vertex_count); + clearUsed(used, vertex_count, indices, index_count); + + unsigned char* boundary = allocator.allocate(face_count); + + bvhSplit(boxes, &axes[0], &axes[face_count], &axes[face_count * 2], boundary, face_count, 0, scratch, used, indices, max_vertices, min_triangles, max_triangles, fill_weight); + + // compute the desired number of meshlets; note that on some meshes with a lot of vertex bound clusters this might go over the bound + size_t meshlet_count = 0; + for (size_t i = 0; i < face_count; ++i) + { + assert(boundary[i] <= 1); + meshlet_count += boundary[i]; + } + + size_t meshlet_bound = meshopt_buildMeshletsBound(index_count, max_vertices, min_triangles); + + // pack triangles into meshlets according to the order and boundaries marked by bvhSplit + meshopt_Meshlet meshlet = {}; + size_t meshlet_offset = 0; + size_t meshlet_pending = meshlet_count; + + for (size_t i = 0; i < face_count; ++i) + { + assert(boundary[i] <= 1); + bool split = i > 0 && boundary[i] == 1; + + // while we are over the limit, we ignore boundary[] data and disable splits until we free up enough space + if (split && meshlet_count > meshlet_bound && meshlet_offset + meshlet_pending >= meshlet_bound) + split = false; + + unsigned int index = axes[i]; + assert(index < face_count); + + unsigned int a = indices[index * 3 + 0], b = indices[index * 3 + 1], c = indices[index * 3 + 2]; + + // appends triangle to the meshlet and writes previous meshlet to the output if full + meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles, split); + meshlet_pending -= boundary[i]; + } + + if (meshlet.triangle_count) + meshlets[meshlet_offset++] = meshlet; + + assert(meshlet_offset <= meshlet_bound); + assert(meshlet.triangle_offset + meshlet.triangle_count * 3 <= index_count && meshlet.vertex_offset + meshlet.vertex_count <= index_count); return meshlet_offset; } @@ -765,15 +1532,17 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t if (triangles == 0) return bounds; + const float rzero = 0.f; + // compute cluster bounding sphere; we'll use the center to determine normal cone apex as well float psphere[4] = {}; - computeBoundingSphere(psphere, corners[0], triangles * 3); + computeBoundingSphere(psphere, corners[0][0], triangles * 3, sizeof(float) * 3, &rzero, 0, 7); float center[3] = {psphere[0], psphere[1], psphere[2]}; // treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis float nsphere[4] = {}; - computeBoundingSphere(nsphere, normals, triangles); + computeBoundingSphere(nsphere, normals[0], triangles, sizeof(float) * 3, &rzero, 0, 3); float axis[3] = {nsphere[0], nsphere[1], nsphere[2]}; float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]); @@ -883,6 +1652,33 @@ meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride); } +meshopt_Bounds meshopt_computeSphereBounds(const float* positions, size_t count, size_t positions_stride, const float* radii, size_t radii_stride) +{ + using namespace meshopt; + + assert(positions_stride >= 12 && positions_stride <= 256); + assert(positions_stride % sizeof(float) == 0); + assert((radii_stride >= 4 && radii_stride <= 256) || radii == NULL); + assert(radii_stride % sizeof(float) == 0); + + meshopt_Bounds bounds = {}; + + if (count == 0) + return bounds; + + const float rzero = 0.f; + + float psphere[4] = {}; + computeBoundingSphere(psphere, positions, count, positions_stride, radii ? radii : &rzero, radii ? radii_stride : 0, 7); + + bounds.center[0] = psphere[0]; + bounds.center[1] = psphere[1]; + bounds.center[2] = psphere[2]; + bounds.radius = psphere[3]; + + return bounds; +} + void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t triangle_count, size_t vertex_count) { using namespace meshopt; @@ -950,25 +1746,28 @@ void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* mesh // reorder meshlet vertices for access locality assuming index buffer is scanned sequentially unsigned int order[kMeshletMaxVertices]; - unsigned char remap[kMeshletMaxVertices]; - memset(remap, -1, vertex_count); + short remap[kMeshletMaxVertices]; + memset(remap, -1, vertex_count * sizeof(short)); size_t vertex_offset = 0; for (size_t i = 0; i < triangle_count * 3; ++i) { - unsigned char& r = remap[indices[i]]; + short& r = remap[indices[i]]; - if (r == 0xff) + if (r < 0) { - r = (unsigned char)(vertex_offset); + r = short(vertex_offset); order[vertex_offset] = vertices[indices[i]]; vertex_offset++; } - indices[i] = r; + indices[i] = (unsigned char)r; } assert(vertex_offset <= vertex_count); memcpy(vertices, order, vertex_offset * sizeof(unsigned int)); } + +#undef SIMD_SSE +#undef SIMD_NEON diff --git a/Source/ThirdParty/meshoptimizer/vcacheanalyzer.cpp b/Source/ThirdParty/meshoptimizer/indexanalyzer.cpp similarity index 58% rename from Source/ThirdParty/meshoptimizer/vcacheanalyzer.cpp rename to Source/ThirdParty/meshoptimizer/indexanalyzer.cpp index 368274382..87ceeae66 100644 --- a/Source/ThirdParty/meshoptimizer/vcacheanalyzer.cpp +++ b/Source/ThirdParty/meshoptimizer/indexanalyzer.cpp @@ -71,3 +71,56 @@ meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* ind return result; } + +meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size) +{ + assert(index_count % 3 == 0); + assert(vertex_size > 0 && vertex_size <= 256); + + meshopt_Allocator allocator; + + meshopt_VertexFetchStatistics result = {}; + + unsigned char* vertex_visited = allocator.allocate(vertex_count); + memset(vertex_visited, 0, vertex_count); + + const size_t kCacheLine = 64; + const size_t kCacheSize = 128 * 1024; + + // simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway + size_t cache[kCacheSize / kCacheLine] = {}; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + vertex_visited[index] = 1; + + size_t start_address = index * vertex_size; + size_t end_address = start_address + vertex_size; + + size_t start_tag = start_address / kCacheLine; + size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine; + + assert(start_tag < end_tag); + + for (size_t tag = start_tag; tag < end_tag; ++tag) + { + size_t line = tag % (sizeof(cache) / sizeof(cache[0])); + + // we store +1 since cache is filled with 0 by default + result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine; + cache[line] = tag + 1; + } + } + + size_t unique_vertex_count = 0; + + for (size_t i = 0; i < vertex_count; ++i) + unique_vertex_count += vertex_visited[i]; + + result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size); + + return result; +} diff --git a/Source/ThirdParty/meshoptimizer/indexcodec.cpp b/Source/ThirdParty/meshoptimizer/indexcodec.cpp index b30046005..7a8fd6867 100644 --- a/Source/ThirdParty/meshoptimizer/indexcodec.cpp +++ b/Source/ThirdParty/meshoptimizer/indexcodec.cpp @@ -14,6 +14,7 @@ const unsigned char kIndexHeader = 0xe0; const unsigned char kSequenceHeader = 0xd0; static int gEncodeIndexVersion = 1; +const int kDecodeIndexVersion = 1; typedef unsigned int VertexFifo[16]; typedef unsigned int EdgeFifo[16][2]; @@ -209,6 +210,7 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons if (fer >= 0 && (fer >> 2) < 15) { + // note: getEdgeFifo implicitly rotates triangles by matching a/b to existing edge const unsigned int* order = kTriangleIndexOrder[fer & 3]; unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]]; @@ -266,6 +268,7 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); // after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a + // note: decoder implicitly assumes that if feb=fec=0, then fea=0 (reset code); this is enforced by rotation int fea = (a == next) ? (next++, 0) : 15; int feb = (fb >= 0 && fb < 14) ? fb + 1 : (b == next ? (next++, 0) : 15); int fec = (fc >= 0 && fc < 14) ? fc + 1 : (c == next ? (next++, 0) : 15); @@ -354,11 +357,28 @@ size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count) void meshopt_encodeIndexVersion(int version) { - assert(unsigned(version) <= 1); + assert(unsigned(version) <= unsigned(meshopt::kDecodeIndexVersion)); meshopt::gEncodeIndexVersion = version; } +int meshopt_decodeIndexVersion(const unsigned char* buffer, size_t buffer_size) +{ + if (buffer_size < 1) + return -1; + + unsigned char header = buffer[0]; + + if ((header & 0xf0) != meshopt::kIndexHeader && (header & 0xf0) != meshopt::kSequenceHeader) + return -1; + + int version = header & 0x0f; + if (version > meshopt::kDecodeIndexVersion) + return -1; + + return version; +} + int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size) { using namespace meshopt; @@ -374,7 +394,7 @@ int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t inde return -1; int version = buffer[0] & 0x0f; - if (version > 1) + if (version > kDecodeIndexVersion) return -1; EdgeFifo edgefifo; @@ -415,6 +435,7 @@ int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t inde // fifo reads are wrapped around 16 entry buffer unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0]; unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1]; + unsigned int c = 0; int fec = codetri & 15; @@ -424,37 +445,30 @@ int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t inde { // fifo reads are wrapped around 16 entry buffer unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15]; - unsigned int c = (fec == 0) ? next : cf; + c = (fec == 0) ? next : cf; int fec0 = fec == 0; next += fec0; - // output triangle - writeTriangle(destination, i, index_size, a, b, c); - - // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + // push vertex fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0); - - pushEdgeFifo(edgefifo, c, b, edgefifooffset); - pushEdgeFifo(edgefifo, a, c, edgefifooffset); } else { - unsigned int c = 0; - // fec - (fec ^ 3) decodes 13, 14 into -1, 1 // note that we need to update the last index since free indices are delta-encoded last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last); - // output triangle - writeTriangle(destination, i, index_size, a, b, c); - // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly pushVertexFifo(vertexfifo, c, vertexfifooffset); - - pushEdgeFifo(edgefifo, c, b, edgefifooffset); - pushEdgeFifo(edgefifo, a, c, edgefifooffset); } + + // push edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); } else { @@ -627,7 +641,7 @@ int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t in return -1; int version = buffer[0] & 0x0f; - if (version > 1) + if (version > kDecodeIndexVersion) return -1; const unsigned char* data = buffer + 1; diff --git a/Source/ThirdParty/meshoptimizer/indexgenerator.cpp b/Source/ThirdParty/meshoptimizer/indexgenerator.cpp index f6728345a..4bf9fccad 100644 --- a/Source/ThirdParty/meshoptimizer/indexgenerator.cpp +++ b/Source/ThirdParty/meshoptimizer/indexgenerator.cpp @@ -5,7 +5,9 @@ #include // This work is based on: +// Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003 // John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010 +// John Hable. Variable Rate Shading with Visibility Buffer Rendering. 2024 namespace meshopt { @@ -85,6 +87,46 @@ struct VertexStreamHasher } }; +struct VertexCustomHasher +{ + const float* vertex_positions; + size_t vertex_stride_float; + + int (*callback)(void*, unsigned int, unsigned int); + void* context; + + size_t hash(unsigned int index) const + { + const unsigned int* key = reinterpret_cast(vertex_positions + index * vertex_stride_float); + + unsigned int x = key[0], y = key[1], z = key[2]; + + // replace negative zero with zero + x = (x == 0x80000000) ? 0 : x; + y = (y == 0x80000000) ? 0 : y; + z = (z == 0x80000000) ? 0 : z; + + // scramble bits to make sure that integer coordinates have entropy in lower bits + x ^= x >> 17; + y ^= y >> 17; + z ^= z >> 17; + + // Optimized Spatial Hashing for Collision Detection of Deformable Objects + return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791); + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + const float* lp = vertex_positions + lhs * vertex_stride_float; + const float* rp = vertex_positions + rhs * vertex_stride_float; + + if (lp[0] != rp[0] || lp[1] != rp[1] || lp[2] != rp[2]) + return false; + + return callback ? callback(context, lhs, rhs) : true; + } +}; + struct EdgeHasher { const unsigned int* remap; @@ -182,6 +224,43 @@ static void buildPositionRemap(unsigned int* remap, const float* vertex_position allocator.deallocate(vertex_table); } +template +static size_t generateVertexRemap(unsigned int* remap, const unsigned int* indices, size_t index_count, size_t vertex_count, const Hash& hash, meshopt_Allocator& allocator) +{ + memset(remap, -1, vertex_count * sizeof(unsigned int)); + + size_t table_size = hashBuckets(vertex_count); + unsigned int* table = allocator.allocate(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + unsigned int next_vertex = 0; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices ? indices[i] : unsigned(i); + assert(index < vertex_count); + + if (remap[index] != ~0u) + continue; + + unsigned int* entry = hashLookup(table, table_size, hash, index, ~0u); + + if (*entry == ~0u) + { + *entry = index; + remap[index] = next_vertex++; + } + else + { + assert(remap[*entry] != ~0u); + remap[index] = remap[*entry]; + } + } + + assert(next_vertex <= vertex_count); + return next_vertex; +} + template static void remapVertices(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap) { @@ -196,6 +275,35 @@ static void remapVertices(void* destination, const void* vertices, size_t vertex } } +template +static void generateShadowBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const Hash& hash, meshopt_Allocator& allocator) +{ + unsigned int* remap = allocator.allocate(vertex_count); + memset(remap, -1, vertex_count * sizeof(unsigned int)); + + size_t table_size = hashBuckets(vertex_count); + unsigned int* table = allocator.allocate(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + if (remap[index] == ~0u) + { + unsigned int* entry = hashLookup(table, table_size, hash, index, ~0u); + + if (*entry == ~0u) + *entry = index; + + remap[index] = *entry; + } + + destination[i] = remap[index]; + } +} + } // namespace meshopt size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) @@ -207,44 +315,9 @@ size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int assert(vertex_size > 0 && vertex_size <= 256); meshopt_Allocator allocator; - - memset(destination, -1, vertex_count * sizeof(unsigned int)); - VertexHasher hasher = {static_cast(vertices), vertex_size, vertex_size}; - size_t table_size = hashBuckets(vertex_count); - unsigned int* table = allocator.allocate(table_size); - memset(table, -1, table_size * sizeof(unsigned int)); - - unsigned int next_vertex = 0; - - for (size_t i = 0; i < index_count; ++i) - { - unsigned int index = indices ? indices[i] : unsigned(i); - assert(index < vertex_count); - - if (destination[index] == ~0u) - { - unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); - - if (*entry == ~0u) - { - *entry = index; - - destination[index] = next_vertex++; - } - else - { - assert(destination[*entry] != ~0u); - - destination[index] = destination[*entry]; - } - } - } - - assert(next_vertex <= vertex_count); - - return next_vertex; + return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator); } size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count) @@ -262,44 +335,24 @@ size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigne } meshopt_Allocator allocator; - - memset(destination, -1, vertex_count * sizeof(unsigned int)); - VertexStreamHasher hasher = {streams, stream_count}; - size_t table_size = hashBuckets(vertex_count); - unsigned int* table = allocator.allocate(table_size); - memset(table, -1, table_size * sizeof(unsigned int)); + return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator); +} - unsigned int next_vertex = 0; +size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, int (*callback)(void*, unsigned int, unsigned int), void* context) +{ + using namespace meshopt; - for (size_t i = 0; i < index_count; ++i) - { - unsigned int index = indices ? indices[i] : unsigned(i); - assert(index < vertex_count); + assert(indices || index_count == vertex_count); + assert(!indices || index_count % 3 == 0); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); - if (destination[index] == ~0u) - { - unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); + meshopt_Allocator allocator; + VertexCustomHasher hasher = {vertex_positions, vertex_positions_stride / sizeof(float), callback, context}; - if (*entry == ~0u) - { - *entry = index; - - destination[index] = next_vertex++; - } - else - { - assert(destination[*entry] != ~0u); - - destination[index] = destination[*entry]; - } - } - } - - assert(next_vertex <= vertex_count); - - return next_vertex; + return generateVertexRemap(destination, indices, index_count, vertex_count, hasher, allocator); } void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap) @@ -361,33 +414,9 @@ void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned assert(vertex_size <= vertex_stride); meshopt_Allocator allocator; - - unsigned int* remap = allocator.allocate(vertex_count); - memset(remap, -1, vertex_count * sizeof(unsigned int)); - VertexHasher hasher = {static_cast(vertices), vertex_size, vertex_stride}; - size_t table_size = hashBuckets(vertex_count); - unsigned int* table = allocator.allocate(table_size); - memset(table, -1, table_size * sizeof(unsigned int)); - - for (size_t i = 0; i < index_count; ++i) - { - unsigned int index = indices[i]; - assert(index < vertex_count); - - if (remap[index] == ~0u) - { - unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); - - if (*entry == ~0u) - *entry = index; - - remap[index] = *entry; - } - - destination[i] = remap[index]; - } + generateShadowBuffer(destination, indices, index_count, vertex_count, hasher, allocator); } void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count) @@ -405,32 +434,33 @@ void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const uns } meshopt_Allocator allocator; - - unsigned int* remap = allocator.allocate(vertex_count); - memset(remap, -1, vertex_count * sizeof(unsigned int)); - VertexStreamHasher hasher = {streams, stream_count}; + generateShadowBuffer(destination, indices, index_count, vertex_count, hasher, allocator); +} + +void meshopt_generatePositionRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + VertexCustomHasher hasher = {vertex_positions, vertex_positions_stride / sizeof(float), NULL, NULL}; + size_t table_size = hashBuckets(vertex_count); unsigned int* table = allocator.allocate(table_size); memset(table, -1, table_size * sizeof(unsigned int)); - for (size_t i = 0; i < index_count; ++i) + for (size_t i = 0; i < vertex_count; ++i) { - unsigned int index = indices[i]; - assert(index < vertex_count); + unsigned int* entry = hashLookup(table, table_size, hasher, unsigned(i), ~0u); - if (remap[index] == ~0u) - { - unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); + if (*entry == ~0u) + *entry = unsigned(i); - if (*entry == ~0u) - *entry = index; - - remap[index] = *entry; - } - - destination[i] = remap[index]; + destination[i] = *entry; } } @@ -576,3 +606,99 @@ void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const un memcpy(destination + i * 4, patch, sizeof(patch)); } } + +size_t meshopt_generateProvokingIndexBuffer(unsigned int* destination, unsigned int* reorder, const unsigned int* indices, size_t index_count, size_t vertex_count) +{ + assert(index_count % 3 == 0); + + meshopt_Allocator allocator; + + unsigned int* remap = allocator.allocate(vertex_count); + memset(remap, -1, vertex_count * sizeof(unsigned int)); + + // compute vertex valence; this is used to prioritize least used corner + // note: we use 8-bit counters for performance; for outlier vertices the valence is incorrect but that just affects the heuristic + unsigned char* valence = allocator.allocate(vertex_count); + memset(valence, 0, vertex_count); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + valence[index]++; + } + + unsigned int reorder_offset = 0; + + // assign provoking vertices; leave the rest for the next pass + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + // try to rotate triangle such that provoking vertex hasn't been seen before + // if multiple vertices are new, prioritize the one with least valence + // this reduces the risk that a future triangle will have all three vertices seen + unsigned int va = remap[a] == ~0u ? valence[a] : ~0u; + unsigned int vb = remap[b] == ~0u ? valence[b] : ~0u; + unsigned int vc = remap[c] == ~0u ? valence[c] : ~0u; + + if (vb != ~0u && vb <= va && vb <= vc) + { + // abc -> bca + unsigned int t = a; + a = b, b = c, c = t; + } + else if (vc != ~0u && vc <= va && vc <= vb) + { + // abc -> cab + unsigned int t = c; + c = b, b = a, a = t; + } + + unsigned int newidx = reorder_offset; + + // now remap[a] = ~0u or all three vertices are old + // recording remap[a] makes it possible to remap future references to the same index, conserving space + if (remap[a] == ~0u) + remap[a] = newidx; + + // we need to clone the provoking vertex to get a unique index + // if all three are used the choice is arbitrary since no future triangle will be able to reuse any of these + reorder[reorder_offset++] = a; + + // note: first vertex is final, the other two will be fixed up in next pass + destination[i + 0] = newidx; + destination[i + 1] = b; + destination[i + 2] = c; + + // update vertex valences for corner heuristic + valence[a]--; + valence[b]--; + valence[c]--; + } + + // remap or clone non-provoking vertices (iterating to skip provoking vertices) + int step = 1; + + for (size_t i = 1; i < index_count; i += step, step ^= 3) + { + unsigned int index = destination[i]; + + if (remap[index] == ~0u) + { + // we haven't seen the vertex before as a provoking vertex + // to maintain the reference to the original vertex we need to clone it + unsigned int newidx = reorder_offset; + + remap[index] = newidx; + reorder[reorder_offset++] = index; + } + + destination[i] = remap[index]; + } + + assert(reorder_offset <= vertex_count + index_count / 3); + return reorder_offset; +} diff --git a/Source/ThirdParty/meshoptimizer/meshoptimizer.h b/Source/ThirdParty/meshoptimizer/meshoptimizer.h index 6c8dcd7e8..c9239bc30 100644 --- a/Source/ThirdParty/meshoptimizer/meshoptimizer.h +++ b/Source/ThirdParty/meshoptimizer/meshoptimizer.h @@ -1,7 +1,7 @@ /** - * meshoptimizer - version 0.21 + * meshoptimizer - version 1.0 * - * Copyright (C) 2016-2024, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2016-2025, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at https://github.com/zeux/meshoptimizer * * This library is distributed under the MIT License. See notice at the end of this file. @@ -12,7 +12,7 @@ #include /* Version macro; major * 1000 + minor * 10 + patch */ -#define MESHOPTIMIZER_VERSION 210 /* 0.21 */ +#define MESHOPTIMIZER_VERSION 1000 /* 1.0 */ /* If no API is defined, assume default */ #ifndef MESHOPTIMIZER_API @@ -29,11 +29,14 @@ #endif /* Experimental APIs have unstable interface and might have implementation that's not fully tested or optimized */ +#ifndef MESHOPTIMIZER_EXPERIMENTAL #define MESHOPTIMIZER_EXPERIMENTAL MESHOPTIMIZER_API +#endif /* C interface */ #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif /** @@ -71,6 +74,19 @@ MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination, */ MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); +/** + * Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices + * As a result, all vertices that are equivalent map to the same (new) location, with no gaps in the resulting sequence. + * Equivalence is checked in two steps: vertex positions are compared for equality, and then the user-specified equality function is called (if provided). + * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer. + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + * indices can be NULL if the input is unindexed + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * callback can be NULL if no additional equality check is needed; otherwise, it should return 1 if vertices with specified indices are equivalent and 0 if they are not + */ +MESHOPTIMIZER_API size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, int (*callback)(void*, unsigned int, unsigned int), void* context); + /** * Generates vertex buffer from the source vertex buffer and remap table generated by meshopt_generateVertexRemap * @@ -108,6 +124,16 @@ MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destinati */ MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); +/** + * Generates a remap table that maps all vertices with the same position to the same (existing) index. + * Similarly to meshopt_generateShadowIndexBuffer, this can be helpful to pre-process meshes for position-only rendering. + * This can also be used to implement algorithms that require positional-only connectivity, such as hierarchical simplification. + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex + */ +MESHOPTIMIZER_API void meshopt_generatePositionRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + /** * Generate index buffer that can be used as a geometry shader input with triangle adjacency topology * Each triangle is converted into a 6-vertex patch with the following layout: @@ -137,10 +163,23 @@ MESHOPTIMIZER_API void meshopt_generateAdjacencyIndexBuffer(unsigned int* destin */ MESHOPTIMIZER_API void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +/** + * Generate index buffer that can be used for visibility buffer rendering and returns the size of the reorder table + * Each triangle's provoking vertex index is equal to primitive id; this allows passing it to the fragment shader using flat/nointerpolation attribute. + * This is important for performance on hardware where primitive id can't be accessed efficiently in fragment shader. + * The reorder table stores the original vertex id for each vertex in the new index buffer, and should be used in the vertex shader to load vertex data. + * The provoking vertex is assumed to be the first vertex in the triangle; if this is not the case (OpenGL), rotate each triangle (abc -> bca) before rendering. + * For maximum efficiency the input index buffer should be optimized for vertex cache first. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * reorder must contain enough space for the worst case reorder table (vertex_count + index_count/3 elements) + */ +MESHOPTIMIZER_API size_t meshopt_generateProvokingIndexBuffer(unsigned int* destination, unsigned int* reorder, const unsigned int* indices, size_t index_count, size_t vertex_count); + /** * Vertex transform cache optimizer * Reorders indices to reduce the number of GPU vertex shader invocations - * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * If index buffer contains multiple ranges for multiple draw calls, this function needs to be called on each range individually. * * destination must contain enough space for the resulting index buffer (index_count elements) */ @@ -159,7 +198,7 @@ MESHOPTIMIZER_API void meshopt_optimizeVertexCacheStrip(unsigned int* destinatio * Vertex transform cache optimizer for FIFO caches * Reorders indices to reduce the number of GPU vertex shader invocations * Generally takes ~3x less time to optimize meshes but produces inferior results compared to meshopt_optimizeVertexCache - * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * If index buffer contains multiple ranges for multiple draw calls, this function needs to be called on each range individually. * * destination must contain enough space for the resulting index buffer (index_count elements) * cache_size should be less than the actual GPU cache size to avoid cache thrashing @@ -169,7 +208,7 @@ MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination /** * Overdraw optimizer * Reorders indices to reduce the number of GPU vertex shader invocations and the pixel overdraw - * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * If index buffer contains multiple ranges for multiple draw calls, this function needs to be called on each range individually. * * destination must contain enough space for the resulting index buffer (index_count elements) * indices must contain index data that is the result of meshopt_optimizeVertexCache (*not* the original mesh indices!) @@ -182,7 +221,7 @@ MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const * Vertex fetch cache optimizer * Reorders vertices and changes indices to reduce the amount of GPU memory fetches during vertex processing * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused - * This functions works for a single vertex stream; for multiple vertex streams, use meshopt_optimizeVertexFetchRemap + meshopt_remapVertexBuffer for each stream. + * This function works for a single vertex stream; for multiple vertex streams, use meshopt_optimizeVertexFetchRemap + meshopt_remapVertexBuffer for each stream. * * destination must contain enough space for the resulting vertex buffer (vertex_count elements) * indices is used both as an input and as an output index buffer @@ -212,7 +251,8 @@ MESHOPTIMIZER_API size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t MESHOPTIMIZER_API size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count); /** - * Set index encoder format version + * Set index encoder format version (defaults to 1) + * * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) and 1 (decodable by 0.14+) */ MESHOPTIMIZER_API void meshopt_encodeIndexVersion(int version); @@ -227,6 +267,13 @@ MESHOPTIMIZER_API void meshopt_encodeIndexVersion(int version); */ MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size); +/** + * Get encoded index format version + * Returns format version of the encoded index buffer/sequence, or -1 if the buffer header is invalid + * Note that a non-negative value doesn't guarantee that the buffer will be decoded correctly if the input is malformed. + */ +MESHOPTIMIZER_API int meshopt_decodeIndexVersion(const unsigned char* buffer, size_t buffer_size); + /** * Index sequence encoder * Encodes index sequence into an array of bytes that is generally smaller and compresses better compared to original. @@ -254,15 +301,31 @@ MESHOPTIMIZER_API int meshopt_decodeIndexSequence(void* destination, size_t inde * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space * This function works for a single vertex stream; for multiple vertex streams, call meshopt_encodeVertexBuffer for each stream. * Note that all vertex_size bytes of each vertex are encoded verbatim, including padding which should be zero-initialized. + * For maximum efficiency the vertex buffer being encoded has to be quantized and optimized for locality of reference (cache/fetch) first. * * buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to compute worst case size) + * vertex_size must be a multiple of 4 (and <= 256) */ MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size); MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size); /** - * Set vertex encoder format version - * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) + * Vertex buffer encoder + * Encodes vertex data just like meshopt_encodeVertexBuffer, but allows to override compression level. + * For compression level to take effect, the vertex encoding version must be set to 1. + * The default compression level implied by meshopt_encodeVertexBuffer is 2. + * + * buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to compute worst case size) + * vertex_size must be a multiple of 4 (and <= 256) + * level should be in the range [0, 3] with 0 being the fastest and 3 being the slowest and producing the best compression ratio. + * version should be -1 to use the default version (specified via meshopt_encodeVertexVersion), or 0/1 to override the version; per above, level won't take effect if version is 0. + */ +MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level, int version); + +/** + * Set vertex encoder format version (defaults to 1) + * + * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) and 1 (decodable by 0.23+) */ MESHOPTIMIZER_API void meshopt_encodeVertexVersion(int version); @@ -273,32 +336,44 @@ MESHOPTIMIZER_API void meshopt_encodeVertexVersion(int version); * The decoder is safe to use for untrusted input, but it may produce garbage data. * * destination must contain enough space for the resulting vertex buffer (vertex_count * vertex_size bytes) + * vertex_size must be a multiple of 4 (and <= 256) */ MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size); +/** + * Get encoded vertex format version + * Returns format version of the encoded vertex buffer, or -1 if the buffer header is invalid + * Note that a non-negative value doesn't guarantee that the buffer will be decoded correctly if the input is malformed. + */ +MESHOPTIMIZER_API int meshopt_decodeVertexVersion(const unsigned char* buffer, size_t buffer_size); + /** * Vertex buffer filters * These functions can be used to filter output of meshopt_decodeVertexBuffer in-place. * - * meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f. + * meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit signed X/Y as an input; Z must store 1.0f. * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is. * - * meshopt_decodeFilterQuat decodes 3-component quaternion encoding with K-bit (4 <= K <= 16) component encoding and a 2-bit component index indicating which component to reconstruct. + * meshopt_decodeFilterQuat decodes 3-component quaternion encoding with K-bit component encoding and a 2-bit component index indicating which component to reconstruct. * Each component is stored as an 16-bit integer; stride must be equal to 8. * * meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M. * Each 32-bit component is decoded in isolation; stride must be divisible by 4. + * + * meshopt_decodeFilterColor decodes RGBA colors from YCoCg (+A) color encoding where RGB is converted to YCoCg space with K-bit component encoding, and A is stored using K-1 bits. + * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. */ -MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride); -MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride); -MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride); +MESHOPTIMIZER_API void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride); +MESHOPTIMIZER_API void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride); +MESHOPTIMIZER_API void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride); +MESHOPTIMIZER_API void meshopt_decodeFilterColor(void* buffer, size_t count, size_t stride); /** * Vertex buffer filter encoders * These functions can be used to encode data in a format that meshopt_decodeFilter can decode * - * meshopt_encodeFilterOct encodes unit vectors with K-bit (K <= 16) signed X/Y as an output. - * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is. + * meshopt_encodeFilterOct encodes unit vectors with K-bit (2 <= K <= 16) signed X/Y as an output. + * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. Z will store 1.0f, W is preserved as is. * Input data must contain 4 floats for every vector (count*4 total). * * meshopt_encodeFilterQuat encodes unit quaternions with K-bit (4 <= K <= 16) component encoding. @@ -308,6 +383,10 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t cou * meshopt_encodeFilterExp encodes arbitrary (finite) floating-point data with 8-bit exponent and K-bit integer mantissa (1 <= K <= 24). * Exponent can be shared between all components of a given vector as defined by stride or all values of a given component; stride must be divisible by 4. * Input data must contain stride/4 floats for every vector (count*stride/4 total). + * + * meshopt_encodeFilterColor encodes RGBA color data by converting RGB to YCoCg color space with K-bit (2 <= K <= 16) component encoding; A is stored using K-1 bits. + * Each component is stored as an 8-bit or 16-bit integer; stride must be equal to 4 or 8. + * Input data must contain 4 floats for every color (count*4 total). */ enum meshopt_EncodeExpMode { @@ -317,11 +396,14 @@ enum meshopt_EncodeExpMode meshopt_EncodeExpSharedVector, /* When encoding exponents, use shared value for each component of all vectors (best compression) */ meshopt_EncodeExpSharedComponent, + /* When encoding exponents, use separate values for each component, but clamp to 0 (good quality if very small values are not important) */ + meshopt_EncodeExpClamped, }; -MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data); -MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterQuat(void* destination, size_t count, size_t stride, int bits, const float* data); -MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data, enum meshopt_EncodeExpMode mode); +MESHOPTIMIZER_API void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data); +MESHOPTIMIZER_API void meshopt_encodeFilterQuat(void* destination, size_t count, size_t stride, int bits, const float* data); +MESHOPTIMIZER_API void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data, enum meshopt_EncodeExpMode mode); +MESHOPTIMIZER_API void meshopt_encodeFilterColor(void* destination, size_t count, size_t stride, int bits, const float* data); /** * Simplification options @@ -334,16 +416,34 @@ enum meshopt_SimplifySparse = 1 << 1, /* Treat error limit and resulting error as absolute instead of relative to mesh extents. */ meshopt_SimplifyErrorAbsolute = 1 << 2, + /* Remove disconnected parts of the mesh during simplification incrementally, regardless of the topological restrictions inside components. */ + meshopt_SimplifyPrune = 1 << 3, + /* Produce more regular triangle sizes and shapes during simplification, at some cost to geometric and attribute quality. */ + meshopt_SimplifyRegularize = 1 << 4, + /* Experimental: Allow collapses across attribute discontinuities, except for vertices that are tagged with meshopt_SimplifyVertex_Protect in vertex_lock. */ + meshopt_SimplifyPermissive = 1 << 5, +}; + +/** + * Experimental: Simplification vertex flags/locks, for use in `vertex_lock` arrays in simplification APIs + */ +enum +{ + /* Do not move this vertex. */ + meshopt_SimplifyVertex_Lock = 1 << 0, + /* Protect attribute discontinuity at this vertex; must be used together with meshopt_SimplifyPermissive option. */ + meshopt_SimplifyVertex_Protect = 1 << 1, }; /** * Mesh simplifier * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible * The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error. - * If not all attributes from the input mesh are required, it's recommended to reindex the mesh using meshopt_generateShadowIndexBuffer prior to simplification. + * If not all attributes from the input mesh are needed, it's recommended to reindex the mesh without them prior to simplification. * Returns the number of indices after simplification, with destination containing new index data + * * The resulting index buffer references vertices from the original vertex buffer. - * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * If the original vertex data isn't needed, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. * * destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)! * vertex_positions should have float3 position in the first 12 bytes of each vertex @@ -354,45 +454,94 @@ enum MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error); /** - * Experimental: Mesh simplifier with attribute metric - * The algorithm ehnahces meshopt_simplify by incorporating attribute values into the error metric used to prioritize simplification order; see meshopt_simplify documentation for details. - * Note that the number of attributes affects memory requirements and running time; this algorithm requires ~1.5x more memory and time compared to meshopt_simplify when using 4 scalar attributes. + * Mesh simplifier with attribute metric + * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible. + * Similar to meshopt_simplify, but incorporates attribute values into the error metric used to prioritize simplification order. + * The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error. + * If not all attributes from the input mesh are needed, it's recommended to reindex the mesh without them prior to simplification. + * Returns the number of indices after simplification, with destination containing new index data * + * The resulting index buffer references vertices from the original vertex buffer. + * If the original vertex data isn't needed, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * Note that the number of attributes with non-zero weights affects memory requirements and running time. + * + * destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)! + * vertex_positions should have float3 position in the first 12 bytes of each vertex * vertex_attributes should have attribute_count floats for each vertex - * attribute_weights should have attribute_count floats in total; the weights determine relative priority of attributes between each other and wrt position. The recommended weight range is [1e-3..1e-1], assuming attribute data is in [0..1] range. - * attribute_count must be <= 16 + * attribute_weights should have attribute_count floats in total; the weights determine relative priority of attributes between each other and wrt position + * attribute_count must be <= 32 * vertex_lock can be NULL; when it's not NULL, it should have a value for each vertex; 1 denotes vertices that can't be moved - * TODO target_error/result_error currently use combined distance+attribute error; this may change in the future + * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1] + * options must be a bitmask composed of meshopt_SimplifyX options; 0 is a safe default + * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error); +MESHOPTIMIZER_API size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error); /** - * Experimental: Mesh simplifier (sloppy) + * Mesh simplifier with position/attribute update + * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible. + * Similar to meshopt_simplifyWithAttributes, but destructively updates positions and attribute values for optimal appearance. + * The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error. + * If not all attributes from the input mesh are needed, it's recommended to reindex the mesh without them prior to simplification. + * Returns the number of indices after simplification, indices are destructively updated with new index data + * + * The updated index buffer references vertices from the original vertex buffer, however the vertex positions and attributes are updated in-place. + * Creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended; if the original vertex data is needed, it should be copied before simplification. + * Note that the number of attributes with non-zero weights affects memory requirements and running time. Attributes with zero weights are not updated. + * + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * vertex_attributes should have attribute_count floats for each vertex + * attribute_weights should have attribute_count floats in total; the weights determine relative priority of attributes between each other and wrt position + * attribute_count must be <= 32 + * vertex_lock can be NULL; when it's not NULL, it should have a value for each vertex; 1 denotes vertices that can't be moved + * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1] + * options must be a bitmask composed of meshopt_SimplifyX options; 0 is a safe default + * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification + */ +MESHOPTIMIZER_API size_t meshopt_simplifyWithUpdate(unsigned int* indices, size_t index_count, float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error); + +/** + * Mesh simplifier (sloppy) * Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance * The algorithm doesn't preserve mesh topology but can stop short of the target goal based on target error. * Returns the number of indices after simplification, with destination containing new index data * The resulting index buffer references vertices from the original vertex buffer. - * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * If the original vertex data isn't needed, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. * * destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)! * vertex_positions should have float3 position in the first 12 bytes of each vertex + * vertex_lock can be NULL; when it's not NULL, it should have a value for each vertex; vertices that can't be moved should set 1 consistently for all indices with the same position * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1] * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); +MESHOPTIMIZER_API size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const unsigned char* vertex_lock, size_t target_index_count, float target_error, float* result_error); /** - * Experimental: Point cloud simplifier + * Mesh simplifier (pruner) + * Reduces the number of triangles in the mesh by removing small isolated parts of the mesh + * Returns the number of indices after simplification, with destination containing new index data + * The resulting index buffer references vertices from the original vertex buffer. + * If the original vertex data isn't needed, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * + * destination must contain enough space for the target index buffer, worst case is index_count elements + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1] + */ +MESHOPTIMIZER_API size_t meshopt_simplifyPrune(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float target_error); + +/** + * Point cloud simplifier * Reduces the number of points in the cloud to reach the given target * Returns the number of points after simplification, with destination containing new index data * The resulting index buffer references vertices from the original vertex buffer. - * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * If the original vertex data isn't needed, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. * * destination must contain enough space for the target index buffer (target_vertex_count elements) * vertex_positions should have float3 position in the first 12 bytes of each vertex - * vertex_colors should can be NULL; when it's not NULL, it should have float3 color in the first 12 bytes of each vertex + * vertex_colors can be NULL; when it's not NULL, it should have float3 color in the first 12 bytes of each vertex + * color_weight determines relative priority of color wrt position; 1.0 is a safe default */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t target_vertex_count); +MESHOPTIMIZER_API size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t target_vertex_count); /** * Returns the error scaling factor used by the simplifier to convert between absolute and relative extents @@ -440,6 +589,19 @@ struct meshopt_VertexCacheStatistics */ MESHOPTIMIZER_API struct meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size); +struct meshopt_VertexFetchStatistics +{ + unsigned int bytes_fetched; + float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */ +}; + +/** + * Vertex fetch cache analyzer + * Returns cache hit statistics using a simplified direct mapped model + * Results may not match actual GPU performance + */ +MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size); + struct meshopt_OverdrawStatistics { unsigned int pixels_covered; @@ -456,26 +618,34 @@ struct meshopt_OverdrawStatistics */ MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); -struct meshopt_VertexFetchStatistics +struct meshopt_CoverageStatistics { - unsigned int bytes_fetched; - float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */ + float coverage[3]; + float extent; /* viewport size in mesh coordinates */ }; /** - * Vertex fetch cache analyzer - * Returns cache hit statistics using a simplified direct mapped model - * Results may not match actual GPU performance + * Coverage analyzer + * Returns coverage statistics (ratio of viewport pixels covered from each axis) using a software rasterizer + * + * vertex_positions should have float3 position in the first 12 bytes of each vertex */ -MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size); +MESHOPTIMIZER_API struct meshopt_CoverageStatistics meshopt_analyzeCoverage(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +/** + * Meshlet is a small mesh cluster (subset) that consists of: + * - triangles, an 8-bit micro triangle (index) buffer, that for each triangle specifies three local vertices to use; + * - vertices, a 32-bit vertex indirection buffer, that for each local vertex specifies which mesh vertex to fetch vertex attributes from. + * + * For efficiency, meshlet triangles and vertices are packed into two large arrays; this structure contains offsets and counts to access the data. + */ struct meshopt_Meshlet { /* offsets within meshlet_vertices and meshlet_triangles arrays with meshlet data */ unsigned int vertex_offset; unsigned int triangle_offset; - /* number of vertices and triangles used in the meshlet; data is stored in consecutive range defined by offset and count */ + /* number of vertices and triangles used in the meshlet; data is stored in consecutive range [offset..offset+count) for vertices and [offset..offset+count*3) for triangles */ unsigned int vertex_count; unsigned int triangle_count; }; @@ -484,14 +654,15 @@ struct meshopt_Meshlet * Meshlet builder * Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer * The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers. + * When targeting mesh shading hardware, for maximum efficiency meshlets should be further optimized using meshopt_optimizeMeshlet. * When using buildMeshlets, vertex positions need to be provided to minimize the size of the resulting clusters. * When using buildMeshletsScan, for maximum efficiency the index buffer being converted has to be optimized for vertex cache first. * * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound - * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices - * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3 + * meshlet_vertices must contain enough space for all meshlets, worst case is index_count elements (*not* vertex_count!) + * meshlet_triangles must contain enough space for all meshlets, worst case is index_count elements * vertex_positions should have float3 position in the first 12 bytes of each vertex - * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512; max_triangles must be divisible by 4) + * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 256, max_triangles <= 512) * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency */ MESHOPTIMIZER_API size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); @@ -499,14 +670,41 @@ MESHOPTIMIZER_API size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshl MESHOPTIMIZER_API size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles); /** - * Experimental: Meshlet optimizer - * Reorders meshlet vertices and triangles to maximize locality to improve rasterizer throughput + * Meshlet builder with flexible cluster sizes + * Splits the mesh into a set of meshlets, similarly to meshopt_buildMeshlets, but allows to specify minimum and maximum number of triangles per meshlet. + * Clusters between min and max triangle counts are split when the cluster size would have exceeded the expected cluster size by more than split_factor. * - * meshlet_triangles and meshlet_vertices must refer to meshlet triangle and vertex index data; when buildMeshlets* is used, these - * need to be computed from meshlet's vertex_offset and triangle_offset - * triangle_count and vertex_count must not exceed implementation limits (vertex_count <= 255 - not 256!, triangle_count <= 512) + * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound using min_triangles (*not* max!) + * meshlet_vertices must contain enough space for all meshlets, worst case is index_count elements (*not* vertex_count!) + * meshlet_triangles must contain enough space for all meshlets, worst case is index_count elements + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * max_vertices, min_triangles and max_triangles must not exceed implementation limits (max_vertices <= 256, max_triangles <= 512; min_triangles <= max_triangles) + * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency + * split_factor should be set to a non-negative value; when greater than 0, clusters that have large bounds may be split unless they are under the min_triangles threshold */ -MESHOPTIMIZER_EXPERIMENTAL void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t triangle_count, size_t vertex_count); +MESHOPTIMIZER_API size_t meshopt_buildMeshletsFlex(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float cone_weight, float split_factor); + +/** + * Meshlet builder that produces clusters optimized for raytracing + * Splits the mesh into a set of meshlets, similarly to meshopt_buildMeshlets, but optimizes cluster subdivision for raytracing and allows to specify minimum and maximum number of triangles per meshlet. + * + * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound using min_triangles (*not* max!) + * meshlet_vertices must contain enough space for all meshlets, worst case is index_count elements (*not* vertex_count!) + * meshlet_triangles must contain enough space for all meshlets, worst case is index_count elements + * vertex_positions should have float3 position in the first 12 bytes of each vertex + * max_vertices, min_triangles and max_triangles must not exceed implementation limits (max_vertices <= 256, max_triangles <= 512; min_triangles <= max_triangles) + * fill_weight allows to prioritize clusters that are closer to maximum size at some cost to SAH quality; 0.5 is a safe default + */ +MESHOPTIMIZER_API size_t meshopt_buildMeshletsSpatial(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float fill_weight); + +/** + * Meshlet optimizer + * Reorders meshlet vertices and triangles to maximize locality which can improve rasterizer throughput or ray tracing performance when using fast-build modes. + * + * meshlet_triangles and meshlet_vertices must refer to meshlet data; when buildMeshlets* is used, these need to be computed from meshlet's vertex_offset and triangle_offset + * triangle_count and vertex_count must not exceed implementation limits (vertex_count <= 256, triangle_count <= 512) + */ +MESHOPTIMIZER_API void meshopt_optimizeMeshlet(unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t triangle_count, size_t vertex_count); struct meshopt_Bounds { @@ -544,11 +742,35 @@ struct meshopt_Bounds * Real-Time Rendering 4th Edition, section 19.3). * * vertex_positions should have float3 position in the first 12 bytes of each vertex - * index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size) + * vertex_count should specify the number of vertices in the entire mesh, not cluster or meshlet + * index_count/3 and triangle_count must not exceed implementation limits (<= 512) */ MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +/** + * Sphere bounds generator + * Creates bounding sphere around a set of points or a set of spheres; returns the center and radius of the sphere, with other fields of the result set to 0. + * + * positions should have float3 position in the first 12 bytes of each element + * radii can be NULL; when it's not NULL, it should have a non-negative float radius in the first 4 bytes of each element + */ +MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeSphereBounds(const float* positions, size_t count, size_t positions_stride, const float* radii, size_t radii_stride); + +/** + * Cluster partitioner + * Partitions clusters into groups of similar size, prioritizing grouping clusters that share vertices or are close to each other. + * When vertex positions are not provided, only clusters that share vertices will be grouped together, which may result in small partitions for some inputs. + * + * destination must contain enough space for the resulting partition data (cluster_count elements) + * destination[i] will contain the partition id for cluster i, with the total number of partitions returned by the function + * cluster_indices should have the vertex indices referenced by each cluster, stored sequentially + * cluster_index_counts should have the number of indices in each cluster; sum of all cluster_index_counts must be equal to total_index_count + * vertex_positions can be NULL; when it's not NULL, it should have float3 position in the first 12 bytes of each vertex + * target_partition_size is a target size for each partition, in clusters; the resulting partitions may be smaller or larger (up to target + target/3) + */ +MESHOPTIMIZER_API size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size); + /** * Spatial sorter * Generates a remap table that can be used to reorder points for spatial locality. @@ -560,13 +782,44 @@ MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeMeshletBounds(const unsig MESHOPTIMIZER_API void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); /** - * Experimental: Spatial sorter + * Spatial sorter * Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache. * * destination must contain enough space for the resulting index buffer (index_count elements) * vertex_positions should have float3 position in the first 12 bytes of each vertex */ -MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +MESHOPTIMIZER_API void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Spatial clusterizer + * Reorders points into clusters optimized for spatial locality, and generates a new index buffer. + * Ensures the output can be split into cluster_size chunks where each chunk has good positional locality. Only the last chunk will be smaller than cluster_size. + * + * destination must contain enough space for the resulting index buffer (vertex_count elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex + */ +MESHOPTIMIZER_API void meshopt_spatialClusterPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t cluster_size); + +/** + * Quantize a float into half-precision (as defined by IEEE-754 fp16) floating point value + * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest + * Representable magnitude range: [6e-5; 65504] + * Maximum relative reconstruction error: 5e-4 + */ +MESHOPTIMIZER_API unsigned short meshopt_quantizeHalf(float v); + +/** + * Quantize a float into a floating point value with a limited number of significant mantissa bits, preserving the IEEE-754 fp32 binary representation + * Preserves infinities/NaN, flushes denormals to zero, rounds to nearest + * Assumes N is in a valid mantissa precision range, which is 1..23 + */ +MESHOPTIMIZER_API float meshopt_quantizeFloat(float v, int N); + +/** + * Reverse quantization of a half-precision (as defined by IEEE-754 fp16) floating point value + * Preserves Inf/NaN, flushes denormals to zero + */ +MESHOPTIMIZER_API float meshopt_dequantizeHalf(unsigned short h); /** * Set allocation callbacks @@ -574,13 +827,13 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* desti * Note that all algorithms only allocate memory for temporary use. * allocate/deallocate are always called in a stack-like order - last pointer to be allocated is deallocated first. */ -MESHOPTIMIZER_API void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*)); +MESHOPTIMIZER_API void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV* allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV* deallocate)(void*)); #ifdef __cplusplus } /* extern "C" */ #endif -/* Quantization into commonly supported data formats */ +/* Quantization into fixed point normalized formats; these are only available as inline C++ functions */ #ifdef __cplusplus /** * Quantize a float in [0..1] range into an N-bit fixed point unorm value @@ -595,27 +848,6 @@ inline int meshopt_quantizeUnorm(float v, int N); * Maximum reconstruction error: 1/2^N */ inline int meshopt_quantizeSnorm(float v, int N); - -/** - * Quantize a float into half-precision (as defined by IEEE-754 fp16) floating point value - * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest - * Representable magnitude range: [6e-5; 65504] - * Maximum relative reconstruction error: 5e-4 - */ -MESHOPTIMIZER_API unsigned short meshopt_quantizeHalf(float v); - -/** - * Quantize a float into a floating point value with a limited number of significant mantissa bits, preserving the IEEE-754 fp32 binary representation - * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest - * Assumes N is in a valid mantissa precision range, which is 1..23 - */ -MESHOPTIMIZER_API float meshopt_quantizeFloat(float v, int N); - -/** - * Reverse quantization of a half-precision (as defined by IEEE-754 fp16) floating point value - * Preserves Inf/NaN, flushes denormals to zero - */ -MESHOPTIMIZER_API float meshopt_dequantizeHalf(unsigned short h); #endif /** @@ -631,6 +863,10 @@ template inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); template inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count); +template +inline size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, F callback); +template +inline size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, F callback); template inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap); template @@ -642,6 +878,8 @@ inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indice template inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); template +inline size_t meshopt_generateProvokingIndexBuffer(T* destination, unsigned int* reorder, const T* indices, size_t index_count, size_t vertex_count); +template inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count); template inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count); @@ -661,29 +899,44 @@ template inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count); template inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); +inline size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level); template inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL); template inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL); template +inline size_t meshopt_simplifyWithUpdate(T* indices, size_t index_count, float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL); +template inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = NULL); template +inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const unsigned char* vertex_lock, size_t target_index_count, float target_error, float* result_error = NULL); +template +inline size_t meshopt_simplifyPrune(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float target_error); +template inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index); template inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index); template -inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size); +inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size); +template +inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size); template inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); template -inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size); +inline meshopt_CoverageStatistics meshopt_analyzeCoverage(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); template inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); template inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); template +inline size_t meshopt_buildMeshletsFlex(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float cone_weight, float split_factor); +template +inline size_t meshopt_buildMeshletsSpatial(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float fill_weight); +template inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); template +inline size_t meshopt_partitionClusters(unsigned int* destination, const T* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size); +template inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); #endif @@ -717,31 +970,39 @@ inline int meshopt_quantizeSnorm(float v, int N) class meshopt_Allocator { public: - template - struct StorageT + struct Storage { - static void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t); - static void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*); + void* (MESHOPTIMIZER_ALLOC_CALLCONV* allocate)(size_t); + void (MESHOPTIMIZER_ALLOC_CALLCONV* deallocate)(void*); }; - typedef StorageT Storage; +#ifdef MESHOPTIMIZER_ALLOC_EXPORT + MESHOPTIMIZER_API static Storage& storage(); +#else + static Storage& storage() + { + static Storage s = {::operator new, ::operator delete }; + return s; + } +#endif meshopt_Allocator() - : blocks() - , count(0) + : blocks() + , count(0) { } ~meshopt_Allocator() { for (size_t i = count; i > 0; --i) - Storage::deallocate(blocks[i - 1]); + storage().deallocate(blocks[i - 1]); } - template T* allocate(size_t size) + template + T* allocate(size_t size) { assert(count < sizeof(blocks) / sizeof(blocks[0])); - T* result = static_cast(Storage::allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T))); + T* result = static_cast(storage().allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T))); blocks[count++] = result; return result; } @@ -749,7 +1010,7 @@ public: void deallocate(void* ptr) { assert(count > 0 && blocks[count - 1] == ptr); - Storage::deallocate(ptr); + storage().deallocate(ptr); count--; } @@ -757,10 +1018,6 @@ private: void* blocks[24]; size_t count; }; - -// This makes sure that allocate/deallocate are lazily generated in translation units that need them and are deduplicated by the linker -template void* (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT::allocate)(size_t) = operator new; -template void (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT::deallocate)(void*) = operator delete; #endif /* Inline implementation for C++ templated wrappers */ @@ -782,7 +1039,7 @@ struct meshopt_IndexAdapter { size_t size = count > size_t(-1) / sizeof(unsigned int) ? size_t(-1) : count * sizeof(unsigned int); - data = static_cast(meshopt_Allocator::Storage::allocate(size)); + data = static_cast(meshopt_Allocator::storage().allocate(size)); if (input) { @@ -799,7 +1056,7 @@ struct meshopt_IndexAdapter result[i] = T(data[i]); } - meshopt_Allocator::Storage::deallocate(data); + meshopt_Allocator::storage().deallocate(data); } }; @@ -830,6 +1087,30 @@ inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const return meshopt_generateVertexRemapMulti(destination, indices ? in.data : NULL, index_count, vertex_count, streams, stream_count); } +template +inline size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, F callback) +{ + struct Call + { + static int compare(void* context, unsigned int lhs, unsigned int rhs) { return (*static_cast(context))(lhs, rhs) ? 1 : 0; } + }; + + return meshopt_generateVertexRemapCustom(destination, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride, &Call::compare, &callback); +} + +template +inline size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, F callback) +{ + struct Call + { + static int compare(void* context, unsigned int lhs, unsigned int rhs) { return (*static_cast(context))(lhs, rhs) ? 1 : 0; } + }; + + meshopt_IndexAdapter in(NULL, indices, indices ? index_count : 0); + + return meshopt_generateVertexRemapCustom(destination, indices ? in.data : NULL, index_count, vertex_positions, vertex_count, vertex_positions_stride, &Call::compare, &callback); +} + template inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap) { @@ -875,6 +1156,19 @@ inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* ind meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); } +template +inline size_t meshopt_generateProvokingIndexBuffer(T* destination, unsigned int* reorder, const T* indices, size_t index_count, size_t vertex_count) +{ + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); + + size_t bound = vertex_count + (index_count / 3); + assert(size_t(T(bound - 1)) == bound - 1); // bound - 1 must fit in T + (void)bound; + + return meshopt_generateProvokingIndexBuffer(out.data, reorder, in.data, index_count, vertex_count); +} + template inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count) { @@ -961,6 +1255,11 @@ inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const return meshopt_decodeIndexSequence(destination, index_count, sizeof(T), buffer, buffer_size); } +inline size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level) +{ + return meshopt_encodeVertexBufferLevel(buffer, buffer_size, vertices, vertex_count, vertex_size, level, -1); +} + template inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error) { @@ -979,13 +1278,39 @@ inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, s return meshopt_simplifyWithAttributes(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, vertex_attributes, vertex_attributes_stride, attribute_weights, attribute_count, vertex_lock, target_index_count, target_error, options, result_error); } +template +inline size_t meshopt_simplifyWithUpdate(T* indices, size_t index_count, float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error) +{ + meshopt_IndexAdapter inout(indices, indices, index_count); + + return meshopt_simplifyWithUpdate(inout.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, vertex_attributes, vertex_attributes_stride, attribute_weights, attribute_count, vertex_lock, target_index_count, target_error, options, result_error); +} + template inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error) { meshopt_IndexAdapter in(NULL, indices, index_count); meshopt_IndexAdapter out(destination, NULL, index_count); - return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, result_error); + return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, NULL, target_index_count, target_error, result_error); +} + +template +inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const unsigned char* vertex_lock, size_t target_index_count, float target_error, float* result_error) +{ + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); + + return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, vertex_lock, target_index_count, target_error, result_error); +} + +template +inline size_t meshopt_simplifyPrune(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float target_error) +{ + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); + + return meshopt_simplifyPrune(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_error); } template @@ -1007,11 +1332,19 @@ inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_ } template -inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size) +inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size) { meshopt_IndexAdapter in(NULL, indices, index_count); - return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size); + return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, primgroup_size); +} + +template +inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size) +{ + meshopt_IndexAdapter in(NULL, indices, index_count); + + return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size); } template @@ -1023,11 +1356,11 @@ inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size } template -inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size) +inline meshopt_CoverageStatistics meshopt_analyzeCoverage(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { meshopt_IndexAdapter in(NULL, indices, index_count); - return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size); + return meshopt_analyzeCoverage(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); } template @@ -1046,6 +1379,22 @@ inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles); } +template +inline size_t meshopt_buildMeshletsFlex(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float cone_weight, float split_factor) +{ + meshopt_IndexAdapter in(NULL, indices, index_count); + + return meshopt_buildMeshletsFlex(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, min_triangles, max_triangles, cone_weight, split_factor); +} + +template +inline size_t meshopt_buildMeshletsSpatial(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t min_triangles, size_t max_triangles, float fill_weight) +{ + meshopt_IndexAdapter in(NULL, indices, index_count); + + return meshopt_buildMeshletsSpatial(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, min_triangles, max_triangles, fill_weight); +} + template inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { @@ -1054,6 +1403,14 @@ inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t inde return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); } +template +inline size_t meshopt_partitionClusters(unsigned int* destination, const T* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size) +{ + meshopt_IndexAdapter in(NULL, cluster_indices, total_index_count); + + return meshopt_partitionClusters(destination, in.data, total_index_count, cluster_index_counts, cluster_count, vertex_positions, vertex_count, vertex_positions_stride, target_partition_size); +} + template inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { @@ -1065,7 +1422,7 @@ inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_ #endif /** - * Copyright (c) 2016-2024 Arseny Kapoulkine + * Copyright (c) 2016-2025 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/Source/ThirdParty/meshoptimizer/overdrawoptimizer.cpp b/Source/ThirdParty/meshoptimizer/overdrawoptimizer.cpp index cc22dbcff..682b924a9 100644 --- a/Source/ThirdParty/meshoptimizer/overdrawoptimizer.cpp +++ b/Source/ThirdParty/meshoptimizer/overdrawoptimizer.cpp @@ -10,24 +10,24 @@ namespace meshopt { -static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count) +static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count) { size_t vertex_stride_float = vertex_positions_stride / sizeof(float); float mesh_centroid[3] = {}; - for (size_t i = 0; i < index_count; ++i) + for (size_t i = 0; i < vertex_count; ++i) { - const float* p = vertex_positions + vertex_stride_float * indices[i]; + const float* p = vertex_positions + vertex_stride_float * i; mesh_centroid[0] += p[0]; mesh_centroid[1] += p[1]; mesh_centroid[2] += p[2]; } - mesh_centroid[0] /= index_count; - mesh_centroid[1] /= index_count; - mesh_centroid[2] /= index_count; + mesh_centroid[0] /= float(vertex_count); + mesh_centroid[1] /= float(vertex_count); + mesh_centroid[2] /= float(vertex_count); for (size_t cluster = 0; cluster < cluster_count; ++cluster) { @@ -306,7 +306,7 @@ void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* ind // fill sort data float* sort_data = allocator.allocate(cluster_count); - calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count); + calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride, clusters, cluster_count); // sort clusters using sort data unsigned short* sort_keys = allocator.allocate(cluster_count); diff --git a/Source/ThirdParty/meshoptimizer/partition.cpp b/Source/ThirdParty/meshoptimizer/partition.cpp new file mode 100644 index 000000000..4119a53ed --- /dev/null +++ b/Source/ThirdParty/meshoptimizer/partition.cpp @@ -0,0 +1,624 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include +#include +#include + +// This work is based on: +// Takio Kurita. An efficient agglomerative clustering algorithm using a heap. 1991 +namespace meshopt +{ + +// To avoid excessive recursion for malformed inputs, we switch to bisection after some depth +const int kMergeDepthCutoff = 40; + +struct ClusterAdjacency +{ + unsigned int* offsets; + unsigned int* clusters; + unsigned int* shared; +}; + +static void filterClusterIndices(unsigned int* data, unsigned int* offsets, const unsigned int* cluster_indices, const unsigned int* cluster_index_counts, size_t cluster_count, unsigned char* used, size_t vertex_count, size_t total_index_count) +{ + (void)vertex_count; + (void)total_index_count; + + size_t cluster_start = 0; + size_t cluster_write = 0; + + for (size_t i = 0; i < cluster_count; ++i) + { + offsets[i] = unsigned(cluster_write); + + // copy cluster indices, skipping duplicates + for (size_t j = 0; j < cluster_index_counts[i]; ++j) + { + unsigned int v = cluster_indices[cluster_start + j]; + assert(v < vertex_count); + + data[cluster_write] = v; + cluster_write += 1 - used[v]; + used[v] = 1; + } + + // reset used flags for the next cluster + for (size_t j = offsets[i]; j < cluster_write; ++j) + used[data[j]] = 0; + + cluster_start += cluster_index_counts[i]; + } + + assert(cluster_start == total_index_count); + assert(cluster_write <= total_index_count); + offsets[cluster_count] = unsigned(cluster_write); +} + +static float computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, float* out_center) +{ + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + float center[3] = {0, 0, 0}; + + // approximate center of the cluster by averaging all vertex positions + for (size_t j = 0; j < index_count; ++j) + { + const float* p = vertex_positions + indices[j] * vertex_stride_float; + + center[0] += p[0]; + center[1] += p[1]; + center[2] += p[2]; + } + + // note: technically clusters can't be empty per meshopt_partitionCluster but we check for a division by zero in case that changes + if (index_count) + { + center[0] /= float(index_count); + center[1] /= float(index_count); + center[2] /= float(index_count); + } + + // compute radius of the bounding sphere for each cluster + float radiussq = 0; + + for (size_t j = 0; j < index_count; ++j) + { + const float* p = vertex_positions + indices[j] * vertex_stride_float; + + float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]); + + radiussq = radiussq < d2 ? d2 : radiussq; + } + + memcpy(out_center, center, sizeof(center)); + return sqrtf(radiussq); +} + +static void buildClusterAdjacency(ClusterAdjacency& adjacency, const unsigned int* cluster_indices, const unsigned int* cluster_offsets, size_t cluster_count, size_t vertex_count, meshopt_Allocator& allocator) +{ + unsigned int* ref_offsets = allocator.allocate(vertex_count + 1); + + // compute number of clusters referenced by each vertex + memset(ref_offsets, 0, vertex_count * sizeof(unsigned int)); + + for (size_t i = 0; i < cluster_count; ++i) + { + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + ref_offsets[cluster_indices[j]]++; + } + + // compute (worst-case) number of adjacent clusters for each cluster + size_t total_adjacency = 0; + + for (size_t i = 0; i < cluster_count; ++i) + { + size_t count = 0; + + // worst case is every vertex has a disjoint cluster list + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + count += ref_offsets[cluster_indices[j]] - 1; + + // ... but only every other cluster can be adjacent in the end + total_adjacency += count < cluster_count - 1 ? count : cluster_count - 1; + } + + // we can now allocate adjacency buffers + adjacency.offsets = allocator.allocate(cluster_count + 1); + adjacency.clusters = allocator.allocate(total_adjacency); + adjacency.shared = allocator.allocate(total_adjacency); + + // convert ref counts to offsets + size_t total_refs = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + size_t count = ref_offsets[i]; + ref_offsets[i] = unsigned(total_refs); + total_refs += count; + } + + unsigned int* ref_data = allocator.allocate(total_refs); + + // fill cluster refs for each vertex + for (size_t i = 0; i < cluster_count; ++i) + { + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + ref_data[ref_offsets[cluster_indices[j]]++] = unsigned(i); + } + + // after the previous pass, ref_offsets contain the end of the data for each vertex; shift it forward to get the start + memmove(ref_offsets + 1, ref_offsets, vertex_count * sizeof(unsigned int)); + ref_offsets[0] = 0; + + // fill cluster adjacency for each cluster... + adjacency.offsets[0] = 0; + + for (size_t i = 0; i < cluster_count; ++i) + { + unsigned int* adj = adjacency.clusters + adjacency.offsets[i]; + unsigned int* shd = adjacency.shared + adjacency.offsets[i]; + size_t count = 0; + + for (size_t j = cluster_offsets[i]; j < cluster_offsets[i + 1]; ++j) + { + unsigned int v = cluster_indices[j]; + + // merge the entire cluster list of each vertex into current list + for (size_t k = ref_offsets[v]; k < ref_offsets[v + 1]; ++k) + { + unsigned int c = ref_data[k]; + assert(c < cluster_count); + + if (c == unsigned(i)) + continue; + + // if the cluster is already in the list, increment the shared count + bool found = false; + for (size_t l = 0; l < count; ++l) + if (adj[l] == c) + { + found = true; + shd[l]++; + break; + } + + // .. or append a new cluster + if (!found) + { + adj[count] = c; + shd[count] = 1; + count++; + } + } + } + + // mark the end of the adjacency list; the next cluster will start there as well + adjacency.offsets[i + 1] = adjacency.offsets[i] + unsigned(count); + } + + assert(adjacency.offsets[cluster_count] <= total_adjacency); + + // ref_offsets can't be deallocated as it was allocated before adjacency + allocator.deallocate(ref_data); +} + +struct ClusterGroup +{ + int group; + int next; + unsigned int size; // 0 unless root + unsigned int vertices; + + float center[3]; + float radius; +}; + +struct GroupOrder +{ + unsigned int id; + int order; +}; + +static void heapPush(GroupOrder* heap, size_t size, GroupOrder item) +{ + // insert a new element at the end (breaks heap invariant) + heap[size++] = item; + + // bubble up the new element to its correct position + size_t i = size - 1; + while (i > 0 && heap[i].order < heap[(i - 1) / 2].order) + { + size_t p = (i - 1) / 2; + + GroupOrder temp = heap[i]; + heap[i] = heap[p]; + heap[p] = temp; + i = p; + } +} + +static GroupOrder heapPop(GroupOrder* heap, size_t size) +{ + assert(size > 0); + GroupOrder top = heap[0]; + + // move the last element to the top (breaks heap invariant) + heap[0] = heap[--size]; + + // bubble down the new top element to its correct position + size_t i = 0; + while (i * 2 + 1 < size) + { + // find the smallest child + size_t j = i * 2 + 1; + j += (j + 1 < size && heap[j + 1].order < heap[j].order); + + // if the parent is already smaller than both children, we're done + if (heap[j].order >= heap[i].order) + break; + + // otherwise, swap the parent and child and continue + GroupOrder temp = heap[i]; + heap[i] = heap[j]; + heap[j] = temp; + i = j; + } + + return top; +} + +static unsigned int countShared(const ClusterGroup* groups, int group1, int group2, const ClusterAdjacency& adjacency) +{ + unsigned int total = 0; + + for (int i1 = group1; i1 >= 0; i1 = groups[i1].next) + for (int i2 = group2; i2 >= 0; i2 = groups[i2].next) + { + for (unsigned int adj = adjacency.offsets[i1]; adj < adjacency.offsets[i1 + 1]; ++adj) + if (adjacency.clusters[adj] == unsigned(i2)) + { + total += adjacency.shared[adj]; + break; + } + } + + return total; +} + +static void mergeBounds(ClusterGroup& target, const ClusterGroup& source) +{ + float r1 = target.radius, r2 = source.radius; + float dx = source.center[0] - target.center[0], dy = source.center[1] - target.center[1], dz = source.center[2] - target.center[2]; + float d = sqrtf(dx * dx + dy * dy + dz * dz); + + if (d + r1 < r2) + { + target.center[0] = source.center[0]; + target.center[1] = source.center[1]; + target.center[2] = source.center[2]; + target.radius = source.radius; + return; + } + + if (d + r2 > r1) + { + float k = d > 0 ? (d + r2 - r1) / (2 * d) : 0.f; + + target.center[0] += dx * k; + target.center[1] += dy * k; + target.center[2] += dz * k; + target.radius = (d + r2 + r1) / 2; + } +} + +static float boundsScore(const ClusterGroup& target, const ClusterGroup& source) +{ + float r1 = target.radius, r2 = source.radius; + float dx = source.center[0] - target.center[0], dy = source.center[1] - target.center[1], dz = source.center[2] - target.center[2]; + float d = sqrtf(dx * dx + dy * dy + dz * dz); + + float mr = d + r1 < r2 ? r2 : (d + r2 < r1 ? r1 : (d + r2 + r1) / 2); + + return mr > 0 ? r1 / mr : 0.f; +} + +static int pickGroupToMerge(const ClusterGroup* groups, int id, const ClusterAdjacency& adjacency, size_t max_partition_size, bool use_bounds) +{ + assert(groups[id].size > 0); + + float group_rsqrt = 1.f / sqrtf(float(int(groups[id].vertices))); + + int best_group = -1; + float best_score = 0; + + for (int ci = id; ci >= 0; ci = groups[ci].next) + { + for (unsigned int adj = adjacency.offsets[ci]; adj != adjacency.offsets[ci + 1]; ++adj) + { + int other = groups[adjacency.clusters[adj]].group; + if (other < 0) + continue; + + assert(groups[other].size > 0); + if (groups[id].size + groups[other].size > max_partition_size) + continue; + + unsigned int shared = countShared(groups, id, other, adjacency); + float other_rsqrt = 1.f / sqrtf(float(int(groups[other].vertices))); + + // normalize shared count by the expected boundary of each group (+ keeps scoring symmetric) + float score = float(int(shared)) * (group_rsqrt + other_rsqrt); + + // incorporate spatial score to favor merging nearby groups + if (use_bounds) + score *= 1.f + 0.4f * boundsScore(groups[id], groups[other]); + + if (score > best_score) + { + best_group = other; + best_score = score; + } + } + } + + return best_group; +} + +static void mergeLeaf(ClusterGroup* groups, unsigned int* order, size_t count, size_t target_partition_size, size_t max_partition_size) +{ + for (size_t i = 0; i < count; ++i) + { + unsigned int id = order[i]; + if (groups[id].size == 0 || groups[id].size >= target_partition_size) + continue; + + float best_score = -1.f; + int best_group = -1; + + for (size_t j = 0; j < count; ++j) + { + unsigned int other = order[j]; + if (id == other || groups[other].size == 0) + continue; + + if (groups[id].size + groups[other].size > max_partition_size) + continue; + + // favor merging nearby groups + float score = boundsScore(groups[id], groups[other]); + + if (score > best_score) + { + best_score = score; + best_group = other; + } + } + + // merge id *into* best_group; that way, we may merge more groups into the same best_group, maximizing the chance of reaching target + if (best_group != -1) + { + // combine groups by linking them together + unsigned int tail = best_group; + while (groups[tail].next >= 0) + tail = groups[tail].next; + + groups[tail].next = id; + + // update group sizes; note, we omit vertices update for simplicity as it's not used for spatial merge + groups[best_group].size += groups[id].size; + groups[id].size = 0; + + // merge bounding spheres + mergeBounds(groups[best_group], groups[id]); + groups[id].radius = 0.f; + } + } +} + +static size_t mergePartition(unsigned int* order, size_t count, const ClusterGroup* groups, int axis, float pivot) +{ + size_t m = 0; + + // invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot + for (size_t i = 0; i < count; ++i) + { + float v = groups[order[i]].center[axis]; + + // swap(m, i) unconditionally + unsigned int t = order[m]; + order[m] = order[i]; + order[i] = t; + + // when v >= pivot, we swap i with m without advancing it, preserving invariants + m += v < pivot; + } + + return m; +} + +static void mergeSpatial(ClusterGroup* groups, unsigned int* order, size_t count, size_t target_partition_size, size_t max_partition_size, size_t leaf_size, int depth) +{ + size_t total = 0; + for (size_t i = 0; i < count; ++i) + total += groups[order[i]].size; + + if (total <= max_partition_size || count <= leaf_size) + return mergeLeaf(groups, order, count, target_partition_size, max_partition_size); + + float mean[3] = {}; + float vars[3] = {}; + float runc = 1, runs = 1; + + // gather statistics on the points in the subtree using Welford's algorithm + for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc) + { + const float* point = groups[order[i]].center; + + for (int k = 0; k < 3; ++k) + { + float delta = point[k] - mean[k]; + mean[k] += delta * runs; + vars[k] += delta * (point[k] - mean[k]); + } + } + + // split axis is one where the variance is largest + int axis = (vars[0] >= vars[1] && vars[0] >= vars[2]) ? 0 : (vars[1] >= vars[2] ? 1 : 2); + + float split = mean[axis]; + size_t middle = mergePartition(order, count, groups, axis, split); + + // enforce balance for degenerate partitions + // this also ensures recursion depth is bounded on pathological inputs + if (middle <= leaf_size / 2 || count - middle <= leaf_size / 2 || depth >= kMergeDepthCutoff) + middle = count / 2; + + // recursion depth is logarithmic and bounded due to max depth check above + mergeSpatial(groups, order, middle, target_partition_size, max_partition_size, leaf_size, depth + 1); + mergeSpatial(groups, order + middle, count - middle, target_partition_size, max_partition_size, leaf_size, depth + 1); +} + +} // namespace meshopt + +size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size) +{ + using namespace meshopt; + + assert((vertex_positions == NULL || vertex_positions_stride >= 12) && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + assert(target_partition_size > 0); + + size_t max_partition_size = target_partition_size + target_partition_size / 3; + + meshopt_Allocator allocator; + + unsigned char* used = allocator.allocate(vertex_count); + memset(used, 0, vertex_count); + + unsigned int* cluster_newindices = allocator.allocate(total_index_count); + unsigned int* cluster_offsets = allocator.allocate(cluster_count + 1); + + // make new cluster index list that filters out duplicate indices + filterClusterIndices(cluster_newindices, cluster_offsets, cluster_indices, cluster_index_counts, cluster_count, used, vertex_count, total_index_count); + cluster_indices = cluster_newindices; + + // build cluster adjacency along with edge weights (shared vertex count) + ClusterAdjacency adjacency = {}; + buildClusterAdjacency(adjacency, cluster_indices, cluster_offsets, cluster_count, vertex_count, allocator); + + ClusterGroup* groups = allocator.allocate(cluster_count); + memset(groups, 0, sizeof(ClusterGroup) * cluster_count); + + GroupOrder* order = allocator.allocate(cluster_count); + size_t pending = 0; + + // create a singleton group for each cluster and order them by priority + for (size_t i = 0; i < cluster_count; ++i) + { + groups[i].group = int(i); + groups[i].next = -1; + groups[i].size = 1; + groups[i].vertices = cluster_offsets[i + 1] - cluster_offsets[i]; + assert(groups[i].vertices > 0); + + // compute bounding sphere for each cluster if positions are provided + if (vertex_positions) + groups[i].radius = computeClusterBounds(cluster_indices + cluster_offsets[i], cluster_offsets[i + 1] - cluster_offsets[i], vertex_positions, vertex_positions_stride, groups[i].center); + + GroupOrder item = {}; + item.id = unsigned(i); + item.order = groups[i].vertices; + + heapPush(order, pending++, item); + } + + // iteratively merge the smallest group with the best group + while (pending) + { + GroupOrder top = heapPop(order, pending--); + + // this group was merged into another group earlier + if (groups[top.id].size == 0) + continue; + + // disassociate clusters from the group to prevent them from being merged again; we will re-associate them if the group is reinserted + for (int i = top.id; i >= 0; i = groups[i].next) + { + assert(groups[i].group == int(top.id)); + groups[i].group = -1; + } + + // the group is large enough, emit as is + if (groups[top.id].size >= target_partition_size) + continue; + + int best_group = pickGroupToMerge(groups, top.id, adjacency, max_partition_size, /* use_bounds= */ vertex_positions); + + // we can't grow the group any more, emit as is + if (best_group == -1) + continue; + + // compute shared vertices to adjust the total vertices estimate after merging + unsigned int shared = countShared(groups, top.id, best_group, adjacency); + + // combine groups by linking them together + unsigned int tail = top.id; + while (groups[tail].next >= 0) + tail = groups[tail].next; + + groups[tail].next = best_group; + + // update group sizes; note, the vertex update is a O(1) approximation which avoids recomputing the true size + groups[top.id].size += groups[best_group].size; + groups[top.id].vertices += groups[best_group].vertices; + groups[top.id].vertices = (groups[top.id].vertices > shared) ? groups[top.id].vertices - shared : 1; + + groups[best_group].size = 0; + groups[best_group].vertices = 0; + + // merge bounding spheres if bounds are available + if (vertex_positions) + { + mergeBounds(groups[top.id], groups[best_group]); + groups[best_group].radius = 0; + } + + // re-associate all clusters back to the merged group + for (int i = top.id; i >= 0; i = groups[i].next) + groups[i].group = int(top.id); + + top.order = groups[top.id].vertices; + heapPush(order, pending++, top); + } + + // if vertex positions are provided, we do a final pass to see if we can merge small groups based on spatial locality alone + if (vertex_positions) + { + unsigned int* merge_order = reinterpret_cast(order); + size_t merge_offset = 0; + + for (size_t i = 0; i < cluster_count; ++i) + if (groups[i].size) + merge_order[merge_offset++] = unsigned(i); + + mergeSpatial(groups, merge_order, merge_offset, target_partition_size, max_partition_size, /* leaf_size= */ 8, 0); + } + + // output each remaining group + size_t next_group = 0; + + for (size_t i = 0; i < cluster_count; ++i) + { + if (groups[i].size == 0) + continue; + + for (int j = int(i); j >= 0; j = groups[j].next) + destination[j] = unsigned(next_group); + + next_group++; + } + + assert(next_group <= cluster_count); + return next_group; +} diff --git a/Source/ThirdParty/meshoptimizer/overdrawanalyzer.cpp b/Source/ThirdParty/meshoptimizer/rasterizer.cpp similarity index 62% rename from Source/ThirdParty/meshoptimizer/overdrawanalyzer.cpp rename to Source/ThirdParty/meshoptimizer/rasterizer.cpp index 31cf6f146..bd788ffdb 100644 --- a/Source/ThirdParty/meshoptimizer/overdrawanalyzer.cpp +++ b/Source/ThirdParty/meshoptimizer/rasterizer.cpp @@ -18,14 +18,6 @@ struct OverdrawBuffer unsigned int overdraw[kViewport][kViewport][2]; }; -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif - -#ifndef max -#define max(a, b) ((a) > (b) ? (a) : (b)) -#endif - static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3) { // z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1) @@ -36,8 +28,8 @@ static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float det = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1); float invdet = (det == 0) ? 0 : 1 / det; - dzdx = (z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1) * invdet; - dzdy = (x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1) * invdet; + dzdx = ((z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1)) * invdet; + dzdy = ((x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1)) * invdet; return det; } @@ -76,11 +68,26 @@ static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, f // bounding rectangle, clipped against viewport // since we rasterize pixels with covered centers, min >0.5 should round up // as for max, due to top-left filling convention we will never rasterize right/bottom edges - // so max >= 0.5 should round down - int minx = max((min(X1, min(X2, X3)) + 7) >> 4, 0); - int maxx = min((max(X1, max(X2, X3)) + 7) >> 4, kViewport); - int miny = max((min(Y1, min(Y2, Y3)) + 7) >> 4, 0); - int maxy = min((max(Y1, max(Y2, Y3)) + 7) >> 4, kViewport); + // so max >= 0.5 should round down for inclusive bounds, and up for exclusive (in our case) + int minx = X1 < X2 ? X1 : X2; + minx = minx < X3 ? minx : X3; + minx = (minx + 7) >> 4; + minx = minx < 0 ? 0 : minx; + + int miny = Y1 < Y2 ? Y1 : Y2; + miny = miny < Y3 ? miny : Y3; + miny = (miny + 7) >> 4; + miny = miny < 0 ? 0 : miny; + + int maxx = X1 > X2 ? X1 : X2; + maxx = maxx > X3 ? maxx : X3; + maxx = (maxx + 7) >> 4; + maxx = maxx > kViewport ? kViewport : maxx; + + int maxy = Y1 > Y2 ? Y1 : Y2; + maxy = maxy > Y3 ? maxy : Y3; + maxy = (maxy + 7) >> 4; + maxy = maxy > kViewport ? kViewport : maxy; // deltas, 28.4 fixed point int DX12 = X1 - X2; @@ -139,22 +146,10 @@ static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, f } } -} // namespace meshopt - -meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +static float transformTriangles(float* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { - using namespace meshopt; - - assert(index_count % 3 == 0); - assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); - assert(vertex_positions_stride % sizeof(float) == 0); - - meshopt_Allocator allocator; - size_t vertex_stride_float = vertex_positions_stride / sizeof(float); - meshopt_OverdrawStatistics result = {}; - float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX}; float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; @@ -164,15 +159,20 @@ meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, for (int j = 0; j < 3; ++j) { - minv[j] = min(minv[j], v[j]); - maxv[j] = max(maxv[j], v[j]); + float vj = v[j]; + + minv[j] = minv[j] > vj ? vj : minv[j]; + maxv[j] = maxv[j] < vj ? vj : maxv[j]; } } - float extent = max(maxv[0] - minv[0], max(maxv[1] - minv[1], maxv[2] - minv[2])); - float scale = kViewport / extent; + float extent = 0.f; - float* triangles = allocator.allocate(index_count * 3); + extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]); + extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]); + extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]); + + float scale = kViewport / extent; for (size_t i = 0; i < index_count; ++i) { @@ -186,31 +186,55 @@ meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, triangles[i * 3 + 2] = (v[2] - minv[2]) * scale; } + return extent; +} + +static void rasterizeTriangles(OverdrawBuffer* buffer, const float* triangles, size_t index_count, int axis) +{ + for (size_t i = 0; i < index_count; i += 3) + { + const float* vn0 = &triangles[3 * (i + 0)]; + const float* vn1 = &triangles[3 * (i + 1)]; + const float* vn2 = &triangles[3 * (i + 2)]; + + switch (axis) + { + case 0: + rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]); + break; + case 1: + rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]); + break; + case 2: + rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]); + break; + } + } +} + +} // namespace meshopt + +meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + meshopt_OverdrawStatistics result = {}; + + float* triangles = allocator.allocate(index_count * 3); + transformTriangles(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride); + OverdrawBuffer* buffer = allocator.allocate(1); for (int axis = 0; axis < 3; ++axis) { memset(buffer, 0, sizeof(OverdrawBuffer)); - - for (size_t i = 0; i < index_count; i += 3) - { - const float* vn0 = &triangles[3 * (i + 0)]; - const float* vn1 = &triangles[3 * (i + 1)]; - const float* vn2 = &triangles[3 * (i + 2)]; - - switch (axis) - { - case 0: - rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]); - break; - case 1: - rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]); - break; - case 2: - rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]); - break; - } - } + rasterizeTriangles(buffer, triangles, index_count, axis); for (int y = 0; y < kViewport; ++y) for (int x = 0; x < kViewport; ++x) @@ -227,3 +251,39 @@ meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, return result; } + +meshopt_CoverageStatistics meshopt_analyzeCoverage(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + meshopt_CoverageStatistics result = {}; + + float* triangles = allocator.allocate(index_count * 3); + float extent = transformTriangles(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride); + + OverdrawBuffer* buffer = allocator.allocate(1); + + for (int axis = 0; axis < 3; ++axis) + { + memset(buffer, 0, sizeof(OverdrawBuffer)); + rasterizeTriangles(buffer, triangles, index_count, axis); + + unsigned int covered = 0; + + for (int y = 0; y < kViewport; ++y) + for (int x = 0; x < kViewport; ++x) + covered += (buffer->overdraw[y][x][0] | buffer->overdraw[y][x][1]) > 0; + + result.coverage[axis] = float(covered) / float(kViewport * kViewport); + } + + result.extent = extent; + + return result; +} diff --git a/Source/ThirdParty/meshoptimizer/simplifier.cpp b/Source/ThirdParty/meshoptimizer/simplifier.cpp index e59b4afcd..14d4d42fe 100644 --- a/Source/ThirdParty/meshoptimizer/simplifier.cpp +++ b/Source/ThirdParty/meshoptimizer/simplifier.cpp @@ -27,6 +27,7 @@ // Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003 // Peter Van Sandt, Yannis Chronis, Jignesh M. Patel. Efficiently Searching In-Memory Sorted Arrays: Revenge of the Interpolation Search? 2019 // Hugues Hoppe. New Quadric Metric for Simplifying Meshes with Appearance Attributes. 1999 +// Hugues Hoppe, Steve Marschner. Efficient Minimization of New Quadric Metric for Simplifying Meshes with Appearance Attributes. 2000 namespace meshopt { @@ -118,10 +119,17 @@ struct PositionHasher unsigned int ri = sparse_remap ? sparse_remap[index] : index; const unsigned int* key = reinterpret_cast(vertex_positions + ri * vertex_stride_float); + unsigned int x = key[0], y = key[1], z = key[2]; + + // replace negative zero with zero + x = (x == 0x80000000) ? 0 : x; + y = (y == 0x80000000) ? 0 : y; + z = (z == 0x80000000) ? 0 : z; + // scramble bits to make sure that integer coordinates have entropy in lower bits - unsigned int x = key[0] ^ (key[0] >> 17); - unsigned int y = key[1] ^ (key[1] >> 17); - unsigned int z = key[2] ^ (key[2] >> 17); + x ^= x >> 17; + y ^= y >> 17; + z ^= z >> 17; // Optimized Spatial Hashing for Collision Detection of Deformable Objects return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791); @@ -132,7 +140,10 @@ struct PositionHasher unsigned int li = sparse_remap ? sparse_remap[lhs] : lhs; unsigned int ri = sparse_remap ? sparse_remap[rhs] : rhs; - return memcmp(vertex_positions + li * vertex_stride_float, vertex_positions + ri * vertex_stride_float, sizeof(float) * 3) == 0; + const float* lv = vertex_positions + li * vertex_stride_float; + const float* rv = vertex_positions + ri * vertex_stride_float; + + return lv[0] == rv[0] && lv[1] == rv[1] && lv[2] == rv[2]; } }; @@ -208,6 +219,11 @@ static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const f remap[index] = *entry; } + allocator.deallocate(table); + + if (!wedge) + return; + // build wedge table: for each vertex, which other vertex is the next wedge that also maps to the same vertex? // entries in table form a (cyclic) wedge loop per vertex; for manifold vertices, wedge[i] == remap[i] == i for (size_t i = 0; i < vertex_count; ++i) @@ -221,22 +237,24 @@ static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const f wedge[i] = wedge[r]; wedge[r] = unsigned(i); } - - allocator.deallocate(table); } static unsigned int* buildSparseRemap(unsigned int* indices, size_t index_count, size_t vertex_count, size_t* out_vertex_count, meshopt_Allocator& allocator) { // use a bit set to compute the precise number of unique vertices unsigned char* filter = allocator.allocate((vertex_count + 7) / 8); - memset(filter, 0, (vertex_count + 7) / 8); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + filter[index / 8] = 0; + } size_t unique = 0; for (size_t i = 0; i < index_count; ++i) { unsigned int index = indices[i]; - assert(index < vertex_count); - unique += (filter[index / 8] & (1 << (index % 8))) == 0; filter[index / 8] |= 1 << (index % 8); } @@ -255,7 +273,6 @@ static unsigned int* buildSparseRemap(unsigned int* indices, size_t index_count, for (size_t i = 0; i < index_count; ++i) { unsigned int index = indices[i]; - unsigned int* entry = hashLookup2(revremap, revremap_size, hasher, index, ~0u); if (*entry == ~0u) @@ -288,14 +305,14 @@ enum VertexKind }; // manifold vertices can collapse onto anything -// border/seam vertices can only be collapsed onto border/seam respectively +// border/seam vertices can collapse onto border/seam respectively, or locked // complex vertices can collapse onto complex/locked // a rule of thumb is that collapsing kind A into kind B preserves the kind B in the target vertex // for example, while we could collapse Complex into Manifold, this would mean the target vertex isn't Manifold anymore const unsigned char kCanCollapse[Kind_Count][Kind_Count] = { {1, 1, 1, 1, 1}, - {0, 1, 0, 0, 0}, - {0, 0, 1, 0, 0}, + {0, 1, 0, 0, 1}, + {0, 0, 1, 0, 1}, {0, 0, 0, 1, 1}, {0, 0, 0, 0, 0}, }; @@ -303,11 +320,13 @@ const unsigned char kCanCollapse[Kind_Count][Kind_Count] = { // if a vertex is manifold or seam, adjoining edges are guaranteed to have an opposite edge // note that for seam edges, the opposite edge isn't present in the attribute-based topology // but is present if you consider a position-only mesh variant +// while many complex collapses have the opposite edge, since complex vertices collapse to the +// same wedge, keeping opposite edges separate improves the quality by considering both targets const unsigned char kHasOpposite[Kind_Count][Kind_Count] = { - {1, 1, 1, 0, 1}, + {1, 1, 1, 1, 1}, {1, 0, 1, 0, 0}, {1, 1, 1, 0, 1}, - {0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0}, {1, 0, 1, 0, 0}, }; @@ -323,14 +342,33 @@ static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int return false; } +static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int b, const unsigned int* remap, const unsigned int* wedge) +{ + unsigned int v = a; + + do + { + unsigned int count = adjacency.offsets[v + 1] - adjacency.offsets[v]; + const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[v]; + + for (size_t i = 0; i < count; ++i) + if (remap[edges[i].next] == remap[b]) + return true; + + v = wedge[v]; + } while (v != a); + + return false; +} + static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_lock, const unsigned int* sparse_remap, unsigned int options) { memset(loop, -1, vertex_count * sizeof(unsigned int)); memset(loopback, -1, vertex_count * sizeof(unsigned int)); // incoming & outgoing open edges: ~0u if no open edges, i if there are more than 1 - // note that this is the same data as required in loop[] arrays; loop[] data is only valid for border/seam - // but here it's okay to fill the data out for other types of vertices as well + // note that this is the same data as required in loop[] arrays; loop[] data is only used for border/seam by default + // in permissive mode we also use it to guide complex-complex collapses, so we fill it for all vertices unsigned int* openinc = loopback; unsigned int* openout = loop; @@ -369,12 +407,7 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned { if (remap[i] == i) { - if (vertex_lock && vertex_lock[sparse_remap ? sparse_remap[i] : i]) - { - // vertex is explicitly locked - result[i] = Kind_Locked; - } - else if (wedge[i] == i) + if (wedge[i] == i) { // no attribute seam, need to check if it's manifold unsigned int openi = openinc[i], openo = openout[i]; @@ -386,6 +419,13 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned { result[i] = Kind_Manifold; } + else if (openi != ~0u && openo != ~0u && remap[openi] == remap[openo] && openi != i) + { + // classify half-seams as seams (the branch below would mis-classify them as borders) + // half-seam is a single vertex that connects to both vertices of a potential seam + // treating these as seams allows collapsing the "full" seam vertex onto them + result[i] = Kind_Seam; + } else if (openi != i && openo != i) { result[i] = Kind_Border; @@ -407,7 +447,7 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned if (openiv != ~0u && openiv != i && openov != ~0u && openov != i && openiw != ~0u && openiw != w && openow != ~0u && openow != w) { - if (remap[openiv] == remap[openow] && remap[openov] == remap[openiw]) + if (remap[openiv] == remap[openow] && remap[openov] == remap[openiw] && remap[openiv] != remap[openov]) { result[i] = Kind_Seam; } @@ -438,6 +478,58 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned } } + if (options & meshopt_SimplifyPermissive) + for (size_t i = 0; i < vertex_count; ++i) + if (result[i] == Kind_Seam || result[i] == Kind_Locked) + { + if (remap[i] != i) + { + // only process primary vertices; wedges will be updated to match the primary vertex + result[i] = result[remap[i]]; + continue; + } + + bool protect = false; + + // vertex_lock may protect any wedge, not just the primary vertex, so we switch to complex only if no wedges are protected + unsigned int v = unsigned(i); + do + { + unsigned int rv = sparse_remap ? sparse_remap[v] : v; + protect |= vertex_lock && (vertex_lock[rv] & meshopt_SimplifyVertex_Protect) != 0; + v = wedge[v]; + } while (v != i); + + // protect if any adjoining edge doesn't have an opposite edge (indicating vertex is on the border) + do + { + const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[v]]; + size_t count = adjacency.offsets[v + 1] - adjacency.offsets[v]; + + for (size_t j = 0; j < count; ++j) + protect |= !hasEdge(adjacency, edges[j].next, v, remap, wedge); + v = wedge[v]; + } while (v != i); + + result[i] = protect ? result[i] : int(Kind_Complex); + } + + if (vertex_lock) + { + // vertex_lock may lock any wedge, not just the primary vertex, so we need to lock the primary vertex and relock any wedges + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int ri = sparse_remap ? sparse_remap[i] : unsigned(i); + + if (vertex_lock[ri] & meshopt_SimplifyVertex_Lock) + result[remap[i]] = Kind_Locked; + } + + for (size_t i = 0; i < vertex_count; ++i) + if (result[remap[i]] == Kind_Locked) + result[i] = Kind_Locked; + } + if (options & meshopt_SimplifyLockBorder) for (size_t i = 0; i < vertex_count; ++i) if (result[i] == Kind_Border) @@ -454,7 +546,7 @@ struct Vector3 float x, y, z; }; -static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const unsigned int* sparse_remap = NULL) +static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const unsigned int* sparse_remap = NULL, float* out_offset = NULL) { size_t vertex_stride_float = vertex_positions_stride / sizeof(float); @@ -500,10 +592,17 @@ static float rescalePositions(Vector3* result, const float* vertex_positions_dat } } + if (out_offset) + { + out_offset[0] = minv[0]; + out_offset[1] = minv[1]; + out_offset[2] = minv[2]; + } + return extent; } -static void rescaleAttributes(float* result, const float* vertex_attributes_data, size_t vertex_count, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned int* sparse_remap) +static void rescaleAttributes(float* result, const float* vertex_attributes_data, size_t vertex_count, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned int* attribute_remap, const unsigned int* sparse_remap) { size_t vertex_attributes_stride_float = vertex_attributes_stride / sizeof(float); @@ -513,18 +612,61 @@ static void rescaleAttributes(float* result, const float* vertex_attributes_data for (size_t k = 0; k < attribute_count; ++k) { - float a = vertex_attributes_data[ri * vertex_attributes_stride_float + k]; + unsigned int rk = attribute_remap[k]; + float a = vertex_attributes_data[ri * vertex_attributes_stride_float + rk]; - result[i * attribute_count + k] = a * attribute_weights[k]; + result[i * attribute_count + k] = a * attribute_weights[rk]; } } } -static const size_t kMaxAttributes = 16; +static void finalizeVertices(float* vertex_positions_data, size_t vertex_positions_stride, float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t vertex_count, const Vector3* vertex_positions, const float* vertex_attributes, const unsigned int* sparse_remap, const unsigned int* attribute_remap, float vertex_scale, const float* vertex_offset, const unsigned char* vertex_kind, const unsigned char* vertex_update, const unsigned char* vertex_lock) +{ + size_t vertex_positions_stride_float = vertex_positions_stride / sizeof(float); + size_t vertex_attributes_stride_float = vertex_attributes_stride / sizeof(float); + + for (size_t i = 0; i < vertex_count; ++i) + { + if (!vertex_update[i]) + continue; + + unsigned int ri = sparse_remap ? sparse_remap[i] : unsigned(i); + + // updating externally locked vertices is not allowed + if (vertex_lock && (vertex_lock[ri] & meshopt_SimplifyVertex_Lock) != 0) + continue; + + // moving locked vertices may result in floating point drift + if (vertex_kind[i] != Kind_Locked) + { + const Vector3& p = vertex_positions[i]; + float* v = vertex_positions_data + ri * vertex_positions_stride_float; + + v[0] = p.x * vertex_scale + vertex_offset[0]; + v[1] = p.y * vertex_scale + vertex_offset[1]; + v[2] = p.z * vertex_scale + vertex_offset[2]; + } + + if (attribute_count) + { + const float* sa = vertex_attributes + i * attribute_count; + float* va = vertex_attributes_data + ri * vertex_attributes_stride_float; + + for (size_t k = 0; k < attribute_count; ++k) + { + unsigned int rk = attribute_remap[k]; + + va[rk] = sa[k] / attribute_weights[rk]; + } + } + } +} + +static const size_t kMaxAttributes = 32; struct Quadric { - // a00*x^2 + a11*y^2 + a22*z^2 + 2*(a10*xy + a20*xz + a21*yz) + b0*x + b1*y + b2*z + c + // a00*x^2 + a11*y^2 + a22*z^2 + 2*a10*xy + 2*a20*xz + 2*a21*yz + 2*b0*x + 2*b1*y + 2*b2*z + c float a00, a11, a22; float a10, a20, a21; float b0, b1, b2, c; @@ -586,6 +728,14 @@ static void quadricAdd(Quadric& Q, const Quadric& R) Q.w += R.w; } +static void quadricAdd(QuadricGrad& G, const QuadricGrad& R) +{ + G.gx += R.gx; + G.gy += R.gy; + G.gz += R.gz; + G.gw += R.gw; +} + static void quadricAdd(QuadricGrad* G, const QuadricGrad* R, size_t attribute_count) { for (size_t k = 0; k < attribute_count; ++k) @@ -597,7 +747,7 @@ static void quadricAdd(QuadricGrad* G, const QuadricGrad* R, size_t attribute_co } } -static float quadricError(const Quadric& Q, const Vector3& v) +static float quadricEval(const Quadric& Q, const Vector3& v) { float rx = Q.b0; float ry = Q.b1; @@ -620,6 +770,12 @@ static float quadricError(const Quadric& Q, const Vector3& v) r += ry * v.y; r += rz * v.z; + return r; +} + +static float quadricError(const Quadric& Q, const Vector3& v) +{ + float r = quadricEval(Q, v); float s = Q.w == 0.f ? 0.f : 1.f / Q.w; return fabsf(r) * s; @@ -627,26 +783,7 @@ static float quadricError(const Quadric& Q, const Vector3& v) static float quadricError(const Quadric& Q, const QuadricGrad* G, size_t attribute_count, const Vector3& v, const float* va) { - float rx = Q.b0; - float ry = Q.b1; - float rz = Q.b2; - - rx += Q.a10 * v.y; - ry += Q.a21 * v.z; - rz += Q.a20 * v.x; - - rx *= 2; - ry *= 2; - rz *= 2; - - rx += Q.a00 * v.x; - ry += Q.a11 * v.y; - rz += Q.a22 * v.z; - - float r = Q.c; - r += rx * v.x; - r += ry * v.y; - r += rz * v.z; + float r = quadricEval(Q, v); // see quadricFromAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr for (size_t k = 0; k < attribute_count; ++k) @@ -654,14 +791,11 @@ static float quadricError(const Quadric& Q, const QuadricGrad* G, size_t attribu float a = va[k]; float g = v.x * G[k].gx + v.y * G[k].gy + v.z * G[k].gz + G[k].gw; - r += a * a * Q.w; - r -= 2 * a * g; + r += a * (a * Q.w - 2 * g); } - // TODO: weight normalization is breaking attribute error somehow - float s = 1; // Q.w == 0.f ? 0.f : 1.f / Q.w; - - return fabsf(r) * s; + // note: unlike position error, we do not normalize by Q.w to retain edge scaling as described in quadricFromAttributes + return fabsf(r); } static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w) @@ -684,6 +818,17 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo Q.w = w; } +static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w) +{ + Q.a00 = Q.a11 = Q.a22 = w; + Q.a10 = Q.a20 = Q.a21 = 0; + Q.b0 = -x * w; + Q.b1 = -y * w; + Q.b2 = -z * w; + Q.c = (x * x + y * y + z * z) * w; + Q.w = w; +} + static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight) { Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; @@ -702,20 +847,24 @@ static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1 static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight) { Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; - float length = normalize(p10); - // p20p = length of projection of p2-p0 onto normalize(p1 - p0) + // edge length; keep squared length around for projection correction + float lengthsq = p10.x * p10.x + p10.y * p10.y + p10.z * p10.z; + float length = sqrtf(lengthsq); + + // p20p = length of projection of p2-p0 onto p1-p0; note that p10 is unnormalized so we need to correct it later Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; float p20p = p20.x * p10.x + p20.y * p10.y + p20.z * p10.z; - // normal = altitude of triangle from point p2 onto edge p1-p0 - Vector3 normal = {p20.x - p10.x * p20p, p20.y - p10.y * p20p, p20.z - p10.z * p20p}; - normalize(normal); + // perp = perpendicular vector from p2 to line segment p1-p0 + // note: since p10 is unnormalized we need to correct the projection; we scale p20 instead to take advantage of normalize below + Vector3 perp = {p20.x * lengthsq - p10.x * p20p, p20.y * lengthsq - p10.y * p20p, p20.z * lengthsq - p10.z * p20p}; + normalize(perp); - float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z; + float distance = perp.x * p0.x + perp.y * p0.y + perp.z * p0.z; // note: the weight is scaled linearly with edge length; this has to match the triangle weight - quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight); + quadricFromPlane(Q, perp.x, perp.y, perp.z, -distance, length * weight); } static void quadricFromAttributes(Quadric& Q, QuadricGrad* G, const Vector3& p0, const Vector3& p1, const Vector3& p2, const float* va0, const float* va1, const float* va2, size_t attribute_count) @@ -728,16 +877,21 @@ static void quadricFromAttributes(Quadric& Q, QuadricGrad* G, const Vector3& p0, Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; - // weight is scaled linearly with edge length + // normal = cross(p1 - p0, p2 - p0) Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x}; - float area = sqrtf(normal.x * normal.x + normal.y * normal.y + normal.z * normal.z); - float w = sqrtf(area); // TODO this needs more experimentation + float area = sqrtf(normal.x * normal.x + normal.y * normal.y + normal.z * normal.z) * 0.5f; + + // quadric is weighted with the square of edge length (= area) + // this equalizes the units with the positional error (which, after normalization, is a square of distance) + // as a result, a change in weighted attribute of 1 along distance d is approximately equivalent to a change in position of d + float w = area; // we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows: // v = (d11 * d20 - d01 * d21) / denom // w = (d00 * d21 - d01 * d20) / denom // u = 1 - v - w // here v0, v1 are triangle edge vectors, v2 is a vector from point to triangle corner, and dij = dot(vi, vj) + // note: v2 and d20/d21 can not be evaluated here as v2 is effectively an unknown variable; we need these only as variables for derivation of gradients const Vector3& v0 = p10; const Vector3& v1 = p20; float d00 = v0.x * v0.x + v0.y * v0.y + v0.z * v0.z; @@ -747,7 +901,7 @@ static void quadricFromAttributes(Quadric& Q, QuadricGrad* G, const Vector3& p0, float denomr = denom == 0 ? 0.f : 1.f / denom; // precompute gradient factors - // these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out common factors that are shared between attributes + // these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out expressions that are shared between attributes float gx1 = (d11 * v0.x - d01 * v1.x) * denomr; float gx2 = (d00 * v1.x - d01 * v0.x) * denomr; float gy1 = (d11 * v0.y - d01 * v1.y) * denomr; @@ -772,6 +926,7 @@ static void quadricFromAttributes(Quadric& Q, QuadricGrad* G, const Vector3& p0, // quadric encodes (eval(pos)-attr)^2; this means that the resulting expansion needs to compute, for example, pos.x * pos.y * K // since quadrics already encode factors for pos.x * pos.y, we can accumulate almost everything in basic quadric fields + // note: for simplicity we scale all factors by weight here instead of outside the loop Q.a00 += w * (gx * gx); Q.a11 += w * (gy * gy); Q.a22 += w * (gz * gz); @@ -794,7 +949,112 @@ static void quadricFromAttributes(Quadric& Q, QuadricGrad* G, const Vector3& p0, } } -static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) +static void quadricVolumeGradient(QuadricGrad& G, const Vector3& p0, const Vector3& p1, const Vector3& p2) +{ + Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; + Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; + + // normal = cross(p1 - p0, p2 - p0) + Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x}; + float area = normalize(normal) * 0.5f; + + G.gx = normal.x * area; + G.gy = normal.y * area; + G.gz = normal.z * area; + G.gw = (-p0.x * normal.x - p0.y * normal.y - p0.z * normal.z) * area; +} + +static bool quadricSolve(Vector3& p, const Quadric& Q, const QuadricGrad& GV) +{ + // solve A*p = -b where A is the quadric matrix and b is the linear term + float a00 = Q.a00, a11 = Q.a11, a22 = Q.a22; + float a10 = Q.a10, a20 = Q.a20, a21 = Q.a21; + float x0 = -Q.b0, x1 = -Q.b1, x2 = -Q.b2; + + float eps = 1e-6f * Q.w; + + // LDL decomposition: A = LDL^T + float d0 = a00; + float l10 = a10 / d0; + float l20 = a20 / d0; + + float d1 = a11 - a10 * l10; + float dl21 = a21 - a20 * l10; + float l21 = dl21 / d1; + + float d2 = a22 - a20 * l20 - dl21 * l21; + + // solve L*y = x + float y0 = x0; + float y1 = x1 - l10 * y0; + float y2 = x2 - l20 * y0 - l21 * y1; + + // solve D*z = y + float z0 = y0 / d0; + float z1 = y1 / d1; + float z2 = y2 / d2; + + // augment system with linear constraint GV using Lagrange multiplier + float a30 = GV.gx, a31 = GV.gy, a32 = GV.gz; + float x3 = -GV.gw; + + float l30 = a30 / d0; + float dl31 = a31 - a30 * l10; + float l31 = dl31 / d1; + float dl32 = a32 - a30 * l20 - dl31 * l21; + float l32 = dl32 / d2; + float d3 = 0.f - a30 * l30 - dl31 * l31 - dl32 * l32; + + float y3 = x3 - l30 * y0 - l31 * y1 - l32 * y2; + float z3 = fabsf(d3) > eps ? y3 / d3 : 0.f; // if d3 is zero, we can ignore the constraint + + // substitute L^T*p = z + float lambda = z3; + float pz = z2 - l32 * lambda; + float py = z1 - l21 * pz - l31 * lambda; + float px = z0 - l10 * py - l20 * pz - l30 * lambda; + + p.x = px; + p.y = py; + p.z = pz; + + return fabsf(d0) > eps && fabsf(d1) > eps && fabsf(d2) > eps; +} + +static void quadricReduceAttributes(Quadric& Q, const Quadric& A, const QuadricGrad* G, size_t attribute_count) +{ + // update vertex quadric with attribute quadric; multiply by vertex weight to minimize normalized error + Q.a00 += A.a00 * Q.w; + Q.a11 += A.a11 * Q.w; + Q.a22 += A.a22 * Q.w; + Q.a10 += A.a10 * Q.w; + Q.a20 += A.a20 * Q.w; + Q.a21 += A.a21 * Q.w; + Q.b0 += A.b0 * Q.w; + Q.b1 += A.b1 * Q.w; + Q.b2 += A.b2 * Q.w; + + float iaw = A.w == 0 ? 0.f : Q.w / A.w; + + // update linear system based on attribute gradients (BB^T/a) + for (size_t k = 0; k < attribute_count; ++k) + { + const QuadricGrad& g = G[k]; + + Q.a00 -= (g.gx * g.gx) * iaw; + Q.a11 -= (g.gy * g.gy) * iaw; + Q.a22 -= (g.gz * g.gz) * iaw; + Q.a10 -= (g.gx * g.gy) * iaw; + Q.a20 -= (g.gx * g.gz) * iaw; + Q.a21 -= (g.gy * g.gz) * iaw; + + Q.b0 -= (g.gx * g.gw) * iaw; + Q.b1 -= (g.gy * g.gw) * iaw; + Q.b2 -= (g.gz * g.gw) * iaw; + } +} + +static void fillFaceQuadrics(Quadric* vertex_quadrics, QuadricGrad* volume_gradients, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) { for (size_t i = 0; i < index_count; i += 3) { @@ -808,6 +1068,36 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic quadricAdd(vertex_quadrics[remap[i0]], Q); quadricAdd(vertex_quadrics[remap[i1]], Q); quadricAdd(vertex_quadrics[remap[i2]], Q); + + if (volume_gradients) + { + QuadricGrad GV; + quadricVolumeGradient(GV, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2]); + + quadricAdd(volume_gradients[remap[i0]], GV); + quadricAdd(volume_gradients[remap[i1]], GV); + quadricAdd(volume_gradients[remap[i2]], GV); + } + } +} + +static void fillVertexQuadrics(Quadric* vertex_quadrics, const Vector3* vertex_positions, size_t vertex_count, const unsigned int* remap, unsigned int options) +{ + // by default, we use a very small weight to improve triangulation and numerical stability without affecting the shape or error + float factor = (options & meshopt_SimplifyRegularize) ? 1e-1f : 1e-7f; + + for (size_t i = 0; i < vertex_count; ++i) + { + if (remap[i] != i) + continue; + + const Vector3& p = vertex_positions[i]; + float w = vertex_quadrics[i].w * factor; + + Quadric Q; + quadricFromPoint(Q, p.x, p.y, p.z, w); + + quadricAdd(vertex_quadrics[i], Q); } } @@ -837,15 +1127,11 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic if ((k1 == Kind_Border || k1 == Kind_Seam) && loopback[i1] != i0) continue; - // seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges - if (kHasOpposite[k0][k1] && remap[i1] > remap[i0]) - continue; - unsigned int i2 = indices[i + next[e + 1]]; // we try hard to maintain border edge geometry; seam edges can move more freely // due to topological restrictions on collapses, seam quadrics slightly improves collapse structure but aren't critical - const float kEdgeWeightSeam = 1.f; + const float kEdgeWeightSeam = 0.5f; // applied twice due to opposite edges const float kEdgeWeightBorder = 10.f; float edgeWeight = (k0 == Kind_Border || k1 == Kind_Border) ? kEdgeWeightBorder : kEdgeWeightSeam; @@ -853,13 +1139,20 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic Quadric Q; quadricFromTriangleEdge(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], edgeWeight); + Quadric QT; + quadricFromTriangle(QT, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], edgeWeight); + + // mix edge quadric with triangle quadric to stabilize collapses in both directions; both quadrics inherit edge weight so that their error is added + QT.w = 0; + quadricAdd(Q, QT); + quadricAdd(vertex_quadrics[remap[i0]], Q); quadricAdd(vertex_quadrics[remap[i1]], Q); } } } -static void fillAttributeQuadrics(Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const float* vertex_attributes, size_t attribute_count, const unsigned int* remap) +static void fillAttributeQuadrics(Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const float* vertex_attributes, size_t attribute_count) { for (size_t i = 0; i < index_count; i += 3) { @@ -871,14 +1164,13 @@ static void fillAttributeQuadrics(Quadric* attribute_quadrics, QuadricGrad* attr QuadricGrad G[kMaxAttributes]; quadricFromAttributes(QA, G, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], &vertex_attributes[i0 * attribute_count], &vertex_attributes[i1 * attribute_count], &vertex_attributes[i2 * attribute_count], attribute_count); - // TODO: This blends together attribute weights across attribute discontinuities, which is probably not a great idea - quadricAdd(attribute_quadrics[remap[i0]], QA); - quadricAdd(attribute_quadrics[remap[i1]], QA); - quadricAdd(attribute_quadrics[remap[i2]], QA); + quadricAdd(attribute_quadrics[i0], QA); + quadricAdd(attribute_quadrics[i1], QA); + quadricAdd(attribute_quadrics[i2], QA); - quadricAdd(&attribute_gradients[remap[i0] * attribute_count], G, attribute_count); - quadricAdd(&attribute_gradients[remap[i1] * attribute_count], G, attribute_count); - quadricAdd(&attribute_gradients[remap[i2] * attribute_count], G, attribute_count); + quadricAdd(&attribute_gradients[i0 * attribute_count], G, attribute_count); + quadricAdd(&attribute_gradients[i1 * attribute_count], G, attribute_count); + quadricAdd(&attribute_gradients[i2 * attribute_count], G, attribute_count); } } @@ -922,6 +1214,30 @@ static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vert continue; // early-out when at least one triangle flips due to a collapse + if (hasTriangleFlip(vertex_positions[a], vertex_positions[b], v0, v1)) + { +#if TRACE >= 2 + printf("edge block %d -> %d: flip welded %d %d %d\n", i0, i1, a, i0, b); +#endif + + return true; + } + } + + return false; +} + +static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vertex_positions, unsigned int i0, const Vector3& v1) +{ + const Vector3& v0 = vertex_positions[i0]; + + const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[i0]]; + size_t count = adjacency.offsets[i0 + 1] - adjacency.offsets[i0]; + + for (size_t i = 0; i < count; ++i) + { + unsigned int a = edges[i].next, b = edges[i].prev; + if (hasTriangleFlip(vertex_positions[a], vertex_positions[b], v0, v1)) return true; } @@ -929,6 +1245,46 @@ static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vert return false; } +static float getNeighborhoodRadius(const EdgeAdjacency& adjacency, const Vector3* vertex_positions, unsigned int i0) +{ + const Vector3& v0 = vertex_positions[i0]; + + const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[i0]]; + size_t count = adjacency.offsets[i0 + 1] - adjacency.offsets[i0]; + + float result = 0.f; + + for (size_t i = 0; i < count; ++i) + { + unsigned int a = edges[i].next, b = edges[i].prev; + + const Vector3& va = vertex_positions[a]; + const Vector3& vb = vertex_positions[b]; + + float da = (va.x - v0.x) * (va.x - v0.x) + (va.y - v0.y) * (va.y - v0.y) + (va.z - v0.z) * (va.z - v0.z); + float db = (vb.x - v0.x) * (vb.x - v0.x) + (vb.y - v0.y) * (vb.y - v0.y) + (vb.z - v0.z) * (vb.z - v0.z); + + result = result < da ? da : result; + result = result < db ? db : result; + } + + return sqrtf(result); +} + +static unsigned int getComplexTarget(unsigned int v, unsigned int target, const unsigned int* remap, const unsigned int* loop, const unsigned int* loopback) +{ + unsigned int r = remap[target]; + + // use loop metadata to guide complex collapses towards the correct wedge + // this works for edges on attribute discontinuities because loop/loopback track the single half-edge without a pair, similar to seams + if (loop[v] != ~0u && remap[loop[v]] == r) + return loop[v]; + else if (loopback[v] != ~0u && remap[loopback[v]] == r) + return loopback[v]; + else + return target; +} + static size_t boundEdgeCollapses(const EdgeAdjacency& adjacency, size_t vertex_count, size_t index_count, unsigned char* vertex_kind) { size_t dual_count = 0; @@ -947,7 +1303,7 @@ static size_t boundEdgeCollapses(const EdgeAdjacency& adjacency, size_t vertex_c return (index_count - dual_count / 2) + 3; } -static size_t pickEdgeCollapses(Collapse* collapses, size_t collapse_capacity, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop) +static size_t pickEdgeCollapses(Collapse* collapses, size_t collapse_capacity, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback) { size_t collapse_count = 0; @@ -983,8 +1339,10 @@ static size_t pickEdgeCollapses(Collapse* collapses, size_t collapse_capacity, c // two vertices are on a border or a seam, but there's no direct edge between them // this indicates that they belong to two different edge loops and we should not collapse this edge - // loop[] tracks half edges so we only need to check i0->i1 - if (k0 == k1 && (k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1) + // loop[] and loopback[] track half edges so we only need to check one of them + if ((k0 == Kind_Border || k0 == Kind_Seam) && k1 != Kind_Manifold && loop[i0] != i1) + continue; + if ((k1 == Kind_Border || k1 == Kind_Seam) && k0 != Kind_Manifold && loopback[i1] != i0) continue; // edge can be collapsed in either direction - we will pick the one with minimum error @@ -1009,7 +1367,7 @@ static size_t pickEdgeCollapses(Collapse* collapses, size_t collapse_capacity, c return collapse_count; } -static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const float* vertex_attributes, const Quadric* vertex_quadrics, const Quadric* attribute_quadrics, const QuadricGrad* attribute_gradients, size_t attribute_count, const unsigned int* remap) +static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const float* vertex_attributes, const Quadric* vertex_quadrics, const Quadric* attribute_quadrics, const QuadricGrad* attribute_gradients, size_t attribute_count, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback) { for (size_t i = 0; i < collapse_count; ++i) { @@ -1017,40 +1375,94 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const unsigned int i0 = c.v0; unsigned int i1 = c.v1; - - // most edges are bidirectional which means we need to evaluate errors for two collapses - // to keep this code branchless we just use the same edge for unidirectional edges - unsigned int j0 = c.bidi ? i1 : i0; - unsigned int j1 = c.bidi ? i0 : i1; + bool bidi = c.bidi; float ei = quadricError(vertex_quadrics[remap[i0]], vertex_positions[i1]); - float ej = quadricError(vertex_quadrics[remap[j0]], vertex_positions[j1]); + float ej = bidi ? quadricError(vertex_quadrics[remap[i1]], vertex_positions[i0]) : FLT_MAX; + +#if TRACE >= 3 + float di = ei, dj = ej; +#endif if (attribute_count) { - ei += quadricError(attribute_quadrics[remap[i0]], &attribute_gradients[remap[i0] * attribute_count], attribute_count, vertex_positions[i1], &vertex_attributes[i1 * attribute_count]); - ej += quadricError(attribute_quadrics[remap[j0]], &attribute_gradients[remap[j0] * attribute_count], attribute_count, vertex_positions[j1], &vertex_attributes[j1 * attribute_count]); + ei += quadricError(attribute_quadrics[i0], &attribute_gradients[i0 * attribute_count], attribute_count, vertex_positions[i1], &vertex_attributes[i1 * attribute_count]); + ej += bidi ? quadricError(attribute_quadrics[i1], &attribute_gradients[i1 * attribute_count], attribute_count, vertex_positions[i0], &vertex_attributes[i0 * attribute_count]) : 0; + + // seam edges need to aggregate attribute errors between primary and secondary edges, as attribute quadrics are separate + if (vertex_kind[i0] == Kind_Seam) + { + // for seam collapses we need to find the seam pair; this is a bit tricky since we need to rely on edge loops as target vertex may be locked (and thus have more than two wedges) + unsigned int s0 = wedge[i0]; + unsigned int s1 = loop[i0] == i1 ? loopback[s0] : loop[s0]; + + assert(wedge[s0] == i0); // s0 may be equal to i0 for half-seams + assert(s1 != ~0u && remap[s1] == remap[i1]); + + // note: this should never happen due to the assertion above, but when disabled if we ever hit this case we'll get a memory safety issue; for now play it safe + s1 = (s1 != ~0u) ? s1 : wedge[i1]; + + ei += quadricError(attribute_quadrics[s0], &attribute_gradients[s0 * attribute_count], attribute_count, vertex_positions[s1], &vertex_attributes[s1 * attribute_count]); + ej += bidi ? quadricError(attribute_quadrics[s1], &attribute_gradients[s1 * attribute_count], attribute_count, vertex_positions[s0], &vertex_attributes[s0 * attribute_count]) : 0; + } + else + { + // complex edges can have multiple wedges, so we need to aggregate errors for all wedges based on the selected target + if (vertex_kind[i0] == Kind_Complex) + for (unsigned int v = wedge[i0]; v != i0; v = wedge[v]) + { + unsigned int t = getComplexTarget(v, i1, remap, loop, loopback); + + ei += quadricError(attribute_quadrics[v], &attribute_gradients[v * attribute_count], attribute_count, vertex_positions[t], &vertex_attributes[t * attribute_count]); + } + + if (vertex_kind[i1] == Kind_Complex && bidi) + for (unsigned int v = wedge[i1]; v != i1; v = wedge[v]) + { + unsigned int t = getComplexTarget(v, i0, remap, loop, loopback); + + ej += quadricError(attribute_quadrics[v], &attribute_gradients[v * attribute_count], attribute_count, vertex_positions[t], &vertex_attributes[t * attribute_count]); + } + } } - // pick edge direction with minimal error - c.v0 = ei <= ej ? i0 : j0; - c.v1 = ei <= ej ? i1 : j1; - c.error = ei <= ej ? ei : ej; + // pick edge direction with minimal error (branchless) + bool rev = bidi & (ej < ei); + + c.v0 = rev ? i1 : i0; + c.v1 = rev ? i0 : i1; + c.error = ej < ei ? ej : ei; + +#if TRACE >= 3 + if (bidi) + printf("edge eval %d -> %d: error %f (pos %f, attr %f); reverse %f (pos %f, attr %f)\n", + rev ? i1 : i0, rev ? i0 : i1, + sqrtf(rev ? ej : ei), sqrtf(rev ? dj : di), sqrtf(rev ? ej - dj : ei - di), + sqrtf(rev ? ei : ej), sqrtf(rev ? di : dj), sqrtf(rev ? ei - di : ej - dj)); + else + printf("edge eval %d -> %d: error %f (pos %f, attr %f)\n", i0, i1, sqrtf(c.error), sqrtf(di), sqrtf(ei - di)); +#endif } } static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapses, size_t collapse_count) { - const int sort_bits = 11; + // we use counting sort to order collapses by error; since the exact sort order is not as critical, + // only top 12 bits of exponent+mantissa (8 bits of exponent and 4 bits of mantissa) are used. + // to avoid excessive stack usage, we clamp the exponent range as collapses with errors much higher than 1 are not useful. + const unsigned int sort_bits = 12; + const unsigned int sort_bins = 2048 + 512; // exponent range [-127, 32) // fill histogram for counting sort - unsigned int histogram[1 << sort_bits]; + unsigned int histogram[sort_bins]; memset(histogram, 0, sizeof(histogram)); for (size_t i = 0; i < collapse_count; ++i) { // skip sign bit since error is non-negative - unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits); + unsigned int error = collapses[i].errorui; + unsigned int key = (error << 1) >> (32 - sort_bits); + key = key < sort_bins ? key : sort_bins - 1; histogram[key]++; } @@ -1058,7 +1470,7 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse // compute offsets based on histogram data size_t histogram_sum = 0; - for (size_t i = 0; i < 1 << sort_bits; ++i) + for (size_t i = 0; i < sort_bins; ++i) { size_t count = histogram[i]; histogram[i] = unsigned(histogram_sum); @@ -1071,13 +1483,15 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse for (size_t i = 0; i < collapse_count; ++i) { // skip sign bit since error is non-negative - unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits); + unsigned int error = collapses[i].errorui; + unsigned int key = (error << 1) >> (32 - sort_bits); + key = key < sort_bins ? key : sort_bins - 1; sort_order[histogram[key]++] = unsigned(i); } } -static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, size_t attribute_count, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error) +static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error) { size_t edge_collapses = 0; size_t triangle_collapses = 0; @@ -1087,7 +1501,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* size_t edge_collapse_goal = triangle_collapse_goal / 2; #if TRACE - size_t stats[4] = {}; + size_t stats[7] = {}; #endif for (size_t i = 0; i < collapse_count; ++i) @@ -1097,10 +1511,16 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* TRACESTATS(0); if (c.error > error_limit) + { + TRACESTATS(4); break; + } if (triangle_collapses >= triangle_collapse_goal) + { + TRACESTATS(5); break; + } // we limit the error in each pass based on the error of optimal last collapse; since many collapses will be locked // as they will share vertices with other successfull collapses, we need to increase the acceptable error by some factor @@ -1108,8 +1528,11 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* // on average, each collapse is expected to lock 6 other collapses; to avoid degenerate passes on meshes with odd // topology, we only abort if we got over 1/6 collapses accordingly. - if (c.error > error_goal && triangle_collapses > triangle_collapse_goal / 6) + if (c.error > error_goal && c.error > result_error && triangle_collapses > triangle_collapse_goal / 6) + { + TRACESTATS(6); break; + } unsigned int i0 = c.v0; unsigned int i1 = c.v1; @@ -1117,6 +1540,8 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* unsigned int r0 = remap[i0]; unsigned int r1 = remap[i1]; + unsigned char kind = vertex_kind[i0]; + // we don't collapse vertices that had source or target vertex involved in a collapse // it's important to not move the vertices twice since it complicates the tracking/remapping logic // it's important to not move other vertices towards a moved vertex to preserve error since we don't re-rank collapses mid-pass @@ -1135,35 +1560,41 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* continue; } +#if TRACE >= 2 + printf("edge commit %d -> %d: kind %d->%d, error %f\n", i0, i1, vertex_kind[i0], vertex_kind[i1], sqrtf(c.error)); +#endif + assert(collapse_remap[r0] == r0); assert(collapse_remap[r1] == r1); - quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]); - - if (attribute_count) - { - quadricAdd(attribute_quadrics[r1], attribute_quadrics[r0]); - quadricAdd(&attribute_gradients[r1 * attribute_count], &attribute_gradients[r0 * attribute_count], attribute_count); - } - - if (vertex_kind[i0] == Kind_Complex) + if (kind == Kind_Complex) { + // remap all vertices in the complex to the target vertex unsigned int v = i0; do { - collapse_remap[v] = r1; + unsigned int t = getComplexTarget(v, i1, remap, loop, loopback); + + collapse_remap[v] = t; v = wedge[v]; } while (v != i0); } - else if (vertex_kind[i0] == Kind_Seam) + else if (kind == Kind_Seam) { - // remap v0 to v1 and seam pair of v0 to seam pair of v1 + // for seam collapses we need to move the seam pair together; this is a bit tricky since we need to rely on edge loops as target vertex may be locked (and thus have more than two wedges) unsigned int s0 = wedge[i0]; - unsigned int s1 = wedge[i1]; + unsigned int s1 = loop[i0] == i1 ? loopback[s0] : loop[s0]; + assert(wedge[s0] == i0); // s0 may be equal to i0 for half-seams + assert(s1 != ~0u && remap[s1] == r1); - assert(s0 != i0 && s1 != i1); - assert(wedge[s0] == i0 && wedge[s1] == i1); + // additional asserts to verify that the seam pair is consistent + assert(kind != vertex_kind[i1] || s1 == wedge[i1]); + assert(loop[i0] == i1 || loopback[i0] == i1); + assert(loop[s0] == s1 || loopback[s0] == s1); + + // note: this should never happen due to the assertion above, but when disabled if we ever hit this case we'll get a memory safety issue; for now play it safe + s1 = (s1 != ~0u) ? s1 : wedge[i1]; collapse_remap[i0] = i1; collapse_remap[s0] = s1; @@ -1175,28 +1606,205 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_remap[i0] = i1; } + // note: we technically don't need to lock r1 if it's a locked vertex, as it can't move and its quadric won't be used + // however, this results in slightly worse error on some meshes because the locked collapses get an unfair advantage wrt scheduling collapse_locked[r0] = 1; collapse_locked[r1] = 1; // border edges collapse 1 triangle, other edges collapse 2 or more - triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2; + triangle_collapses += (kind == Kind_Border) ? 1 : 2; edge_collapses++; result_error = result_error < c.error ? c.error : result_error; } #if TRACE - float error_goal_perfect = edge_collapse_goal < collapse_count ? collapses[collapse_order[edge_collapse_goal]].error : 0.f; + float error_goal_last = edge_collapse_goal < collapse_count ? 1.5f * collapses[collapse_order[edge_collapse_goal]].error : FLT_MAX; + float error_goal_limit = error_goal_last < error_limit ? error_goal_last : error_limit; - printf("removed %d triangles, error %e (goal %e); evaluated %d/%d collapses (done %d, skipped %d, invalid %d)\n", - int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_perfect), - int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2])); + printf("removed %d triangles, error %e (goal %e); evaluated %d/%d collapses (done %d, skipped %d, invalid %d); %s\n", + int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_limit), + int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2]), + stats[4] ? "error limit" : (stats[5] ? "count limit" : (stats[6] ? "error goal" : "out of collapses"))); #endif return edge_collapses; } -static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const unsigned int* collapse_remap) +static void updateQuadrics(const unsigned int* collapse_remap, size_t vertex_count, Quadric* vertex_quadrics, QuadricGrad* volume_gradients, Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, size_t attribute_count, const Vector3* vertex_positions, const unsigned int* remap, float& vertex_error) +{ + for (size_t i = 0; i < vertex_count; ++i) + { + if (collapse_remap[i] == i) + continue; + + unsigned int i0 = unsigned(i); + unsigned int i1 = collapse_remap[i]; + + unsigned int r0 = remap[i0]; + unsigned int r1 = remap[i1]; + + // ensure we only update vertex_quadrics once: primary vertex must be moved if any wedge is moved + if (i0 == r0) + { + quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]); + + if (volume_gradients) + quadricAdd(volume_gradients[r1], volume_gradients[r0]); + } + + if (attribute_count) + { + quadricAdd(attribute_quadrics[i1], attribute_quadrics[i0]); + quadricAdd(&attribute_gradients[i1 * attribute_count], &attribute_gradients[i0 * attribute_count], attribute_count); + + if (i0 == r0) + { + // when attributes are used, distance error needs to be recomputed as collapses don't track it; it is safe to do this after the quadric adjustment + float derr = quadricError(vertex_quadrics[r0], vertex_positions[r1]); + vertex_error = vertex_error < derr ? derr : vertex_error; + } + } + } +} + +static void solvePositions(Vector3* vertex_positions, size_t vertex_count, const Quadric* vertex_quadrics, const QuadricGrad* volume_gradients, const Quadric* attribute_quadrics, const QuadricGrad* attribute_gradients, size_t attribute_count, const unsigned int* remap, const unsigned int* wedge, const EdgeAdjacency& adjacency, const unsigned char* vertex_kind, const unsigned char* vertex_update) +{ +#if TRACE + size_t stats[6] = {}; +#endif + + for (size_t i = 0; i < vertex_count; ++i) + { + if (!vertex_update[i]) + continue; + + // moving vertices on an attribute discontinuity may result in extrapolating UV outside of the chart bounds + // moving vertices on a border requires a stronger edge quadric to preserve the border geometry + if (vertex_kind[i] == Kind_Locked || vertex_kind[i] == Kind_Seam || vertex_kind[i] == Kind_Border) + continue; + + if (remap[i] != i) + { + vertex_positions[i] = vertex_positions[remap[i]]; + continue; + } + + TRACESTATS(0); + + const Vector3& vp = vertex_positions[i]; + + Quadric Q = vertex_quadrics[i]; + QuadricGrad GV = {}; + + // add a point quadric for regularization to stabilize the solution + Quadric R; + quadricFromPoint(R, vp.x, vp.y, vp.z, Q.w * 1e-4f); + quadricAdd(Q, R); + + if (attribute_count) + { + // optimal point simultaneously minimizes attribute quadrics for all wedges + unsigned int v = unsigned(i); + do + { + quadricReduceAttributes(Q, attribute_quadrics[v], &attribute_gradients[v * attribute_count], attribute_count); + v = wedge[v]; + } while (v != i); + + // minimizing attribute quadrics results in volume loss so we incorporate volume gradient as a constraint + if (volume_gradients) + GV = volume_gradients[i]; + } + + Vector3 p; + if (!quadricSolve(p, Q, GV)) + { + TRACESTATS(2); + continue; + } + + // reject updates that move the vertex too far from its neighborhood + // this detects and fixes most cases when the quadric is not well-defined + float nr = getNeighborhoodRadius(adjacency, vertex_positions, unsigned(i)); + float dp = (p.x - vp.x) * (p.x - vp.x) + (p.y - vp.y) * (p.y - vp.y) + (p.z - vp.z) * (p.z - vp.z); + + if (dp > nr * nr) + { + TRACESTATS(3); + continue; + } + + // reject updates that would flip a neighboring triangle, as we do for edge collapse + if (hasTriangleFlips(adjacency, vertex_positions, unsigned(i), p)) + { + TRACESTATS(4); + continue; + } + + // reject updates that increase positional error too much; allow some tolerance to improve attribute quality + if (quadricError(vertex_quadrics[i], p) > quadricError(vertex_quadrics[i], vp) * 1.5f + 1e-6f) + { + TRACESTATS(5); + continue; + } + + TRACESTATS(1); + vertex_positions[i] = p; + } + +#if TRACE + printf("updated %d/%d positions; failed solve %d bounds %d flip %d error %d\n", int(stats[1]), int(stats[0]), int(stats[2]), int(stats[3]), int(stats[4]), int(stats[5])); +#endif +} + +static void solveAttributes(Vector3* vertex_positions, float* vertex_attributes, size_t vertex_count, const Quadric* attribute_quadrics, const QuadricGrad* attribute_gradients, size_t attribute_count, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const unsigned char* vertex_update) +{ + for (size_t i = 0; i < vertex_count; ++i) + { + if (!vertex_update[i]) + continue; + + if (remap[i] != i) + continue; + + for (size_t k = 0; k < attribute_count; ++k) + { + unsigned int shared = ~0u; + + // for complex vertices, preserve attribute continuity and use highest weight wedge if values were shared + if (vertex_kind[i] == Kind_Complex) + { + shared = unsigned(i); + + for (unsigned int v = wedge[i]; v != i; v = wedge[v]) + if (vertex_attributes[v * attribute_count + k] != vertex_attributes[i * attribute_count + k]) + shared = ~0u; + else if (shared != ~0u && attribute_quadrics[v].w > attribute_quadrics[shared].w) + shared = v; + } + + // update attributes for all wedges + unsigned int v = unsigned(i); + do + { + unsigned int r = (shared == ~0u) ? v : shared; + + const Vector3& p = vertex_positions[i]; // same for all wedges + const Quadric& A = attribute_quadrics[r]; + const QuadricGrad& G = attribute_gradients[r * attribute_count + k]; + + float iw = A.w == 0 ? 0.f : 1.f / A.w; + float av = (G.gx * p.x + G.gy * p.y + G.gz * p.z + G.gw) * iw; + + vertex_attributes[v * attribute_count + k] = av; + v = wedge[v]; + } while (v != i); + } + } +} + +static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const unsigned int* collapse_remap, const unsigned int* remap) { size_t write = 0; @@ -1211,7 +1819,14 @@ static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const assert(collapse_remap[v1] == v1); assert(collapse_remap[v2] == v2); - if (v0 != v1 && v0 != v2 && v1 != v2) + // collapse zero area triangles even if they are not topologically degenerate + // this is required to cleanup manifold->seam collapses when a vertex is collapsed onto a seam pair + // as well as complex collapses and some other cases where cross wedge collapses are performed + unsigned int r0 = remap[v0]; + unsigned int r1 = remap[v1]; + unsigned int r2 = remap[v2]; + + if (r0 != r1 && r0 != r2 && r1 != r2) { indices[write + 0] = v0; indices[write + 1] = v1; @@ -1227,17 +1842,183 @@ static void remapEdgeLoops(unsigned int* loop, size_t vertex_count, const unsign { for (size_t i = 0; i < vertex_count; ++i) { + // note: this is a no-op for vertices that were remapped + // ideally we would clear the loop entries for those for consistency, even though they aren't going to be used + // however, the remapping process needs loop information for remapped vertices, so this would require a separate pass if (loop[i] != ~0u) { unsigned int l = loop[i]; unsigned int r = collapse_remap[l]; // i == r is a special case when the seam edge is collapsed in a direction opposite to where loop goes - loop[i] = (i == r) ? loop[l] : r; + if (i == r) + loop[i] = (loop[l] != ~0u) ? collapse_remap[loop[l]] : ~0u; + else + loop[i] = r; } } } +static unsigned int follow(unsigned int* parents, unsigned int index) +{ + while (index != parents[index]) + { + unsigned int parent = parents[index]; + parents[index] = parents[parent]; + index = parent; + } + + return index; +} + +static size_t buildComponents(unsigned int* components, size_t vertex_count, const unsigned int* indices, size_t index_count, const unsigned int* remap) +{ + for (size_t i = 0; i < vertex_count; ++i) + components[i] = unsigned(i); + + // compute a unique (but not sequential!) index for each component via union-find + for (size_t i = 0; i < index_count; i += 3) + { + static const int next[4] = {1, 2, 0, 1}; + + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + + unsigned int r0 = remap[i0]; + unsigned int r1 = remap[i1]; + + r0 = follow(components, r0); + r1 = follow(components, r1); + + // merge components with larger indices into components with smaller indices + // this guarantees that the root of the component is always the one with the smallest index + if (r0 != r1) + components[r0 < r1 ? r1 : r0] = r0 < r1 ? r0 : r1; + } + } + + // make sure each element points to the component root *before* we renumber the components + for (size_t i = 0; i < vertex_count; ++i) + if (remap[i] == i) + components[i] = follow(components, unsigned(i)); + + unsigned int next_component = 0; + + // renumber components using sequential indices + // a sequential pass is sufficient because component root always has the smallest index + // note: it is unsafe to use follow() in this pass because we're replacing component links with sequential indices inplace + for (size_t i = 0; i < vertex_count; ++i) + { + if (remap[i] == i) + { + unsigned int root = components[i]; + assert(root <= i); // make sure we already computed the component for non-roots + components[i] = (root == i) ? next_component++ : components[root]; + } + else + { + assert(remap[i] < i); // make sure we already computed the component + components[i] = components[remap[i]]; + } + } + + return next_component; +} + +static void measureComponents(float* component_errors, size_t component_count, const unsigned int* components, const Vector3* vertex_positions, size_t vertex_count) +{ + memset(component_errors, 0, component_count * 4 * sizeof(float)); + + // compute approximate sphere center for each component as an average + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int c = components[i]; + assert(components[i] < component_count); + + Vector3 v = vertex_positions[i]; // copy avoids aliasing issues + + component_errors[c * 4 + 0] += v.x; + component_errors[c * 4 + 1] += v.y; + component_errors[c * 4 + 2] += v.z; + component_errors[c * 4 + 3] += 1; // weight + } + + // complete the center computation, and reinitialize [3] as a radius + for (size_t i = 0; i < component_count; ++i) + { + float w = component_errors[i * 4 + 3]; + float iw = w == 0.f ? 0.f : 1.f / w; + + component_errors[i * 4 + 0] *= iw; + component_errors[i * 4 + 1] *= iw; + component_errors[i * 4 + 2] *= iw; + component_errors[i * 4 + 3] = 0; // radius + } + + // compute squared radius for each component + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int c = components[i]; + + float dx = vertex_positions[i].x - component_errors[c * 4 + 0]; + float dy = vertex_positions[i].y - component_errors[c * 4 + 1]; + float dz = vertex_positions[i].z - component_errors[c * 4 + 2]; + float r = dx * dx + dy * dy + dz * dz; + + component_errors[c * 4 + 3] = component_errors[c * 4 + 3] < r ? r : component_errors[c * 4 + 3]; + } + + // we've used the output buffer as scratch space, so we need to move the results to proper indices + for (size_t i = 0; i < component_count; ++i) + { +#if TRACE >= 2 + printf("component %d: center %f %f %f, error %e\n", int(i), + component_errors[i * 4 + 0], component_errors[i * 4 + 1], component_errors[i * 4 + 2], sqrtf(component_errors[i * 4 + 3])); +#endif + // note: we keep the squared error to make it match quadric error metric + component_errors[i] = component_errors[i * 4 + 3]; + } +} + +static size_t pruneComponents(unsigned int* indices, size_t index_count, const unsigned int* components, const float* component_errors, size_t component_count, float error_cutoff, float& nexterror) +{ + (void)component_count; + + size_t write = 0; + float min_error = FLT_MAX; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int v0 = indices[i + 0], v1 = indices[i + 1], v2 = indices[i + 2]; + unsigned int c = components[v0]; + assert(c == components[v1] && c == components[v2]); + + if (component_errors[c] > error_cutoff) + { + min_error = min_error > component_errors[c] ? component_errors[c] : min_error; + + indices[write + 0] = v0; + indices[write + 1] = v1; + indices[write + 2] = v2; + write += 3; + } + } + +#if TRACE + size_t pruned_components = 0; + for (size_t i = 0; i < component_count; ++i) + pruned_components += (component_errors[i] >= nexterror && component_errors[i] <= error_cutoff); + + printf("pruned %d triangles in %d components (goal %e); next %e\n", int((index_count - write) / 3), int(pruned_components), sqrtf(error_cutoff), min_error < FLT_MAX ? sqrtf(min_error) : min_error * 2); +#endif + + // update next error with the smallest error of the remaining components + nexterror = min_error; + return write; +} + struct CellHasher { const unsigned int* vertex_ids; @@ -1299,7 +2080,7 @@ struct TriangleHasher } }; -static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_positions, size_t vertex_count, int grid_size) +static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_positions, const unsigned char* vertex_lock, size_t vertex_count, int grid_size) { assert(grid_size >= 1 && grid_size <= 1024); float cell_scale = float(grid_size - 1); @@ -1312,7 +2093,10 @@ static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_pos int yi = int(v.y * cell_scale + 0.5f); int zi = int(v.z * cell_scale + 0.5f); - vertex_ids[i] = (xi << 20) | (yi << 10) | zi; + if (vertex_lock && (vertex_lock[i] & meshopt_SimplifyVertex_Lock)) + vertex_ids[i] = (1 << 30) | unsigned(i); + else + vertex_ids[i] = (xi << 20) | (yi << 10) | zi; } } @@ -1541,17 +2325,17 @@ static float interpolate(float y, float x0, float y0, float x1, float y1, float // three point interpolation from "revenge of interpolation search" paper float num = (y1 - y) * (x1 - x2) * (x1 - x0) * (y2 - y0); float den = (y2 - y) * (x1 - x2) * (y0 - y1) + (y0 - y) * (x1 - x0) * (y1 - y2); - return x1 + num / den; + return x1 + (den == 0.f ? 0.f : num / den); } } // namespace meshopt -#ifndef NDEBUG -// Note: this is only exposed for debug visualization purposes; do *not* use these in debug builds -MESHOPTIMIZER_API unsigned char* meshopt_simplifyDebugKind = NULL; -MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoop = NULL; -MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = NULL; -#endif +// Note: this is only exposed for development purposes; do *not* use +enum +{ + meshopt_SimplifyInternalSolve = 1 << 29, + meshopt_SimplifyInternalDebug = 1 << 30 +}; size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) { @@ -1561,10 +2345,13 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); assert(target_index_count <= index_count); - assert((options & ~(meshopt_SimplifyLockBorder | meshopt_SimplifySparse | meshopt_SimplifyErrorAbsolute)) == 0); + assert(target_error >= 0); + assert((options & ~(meshopt_SimplifyLockBorder | meshopt_SimplifySparse | meshopt_SimplifyErrorAbsolute | meshopt_SimplifyPrune | meshopt_SimplifyRegularize | meshopt_SimplifyPermissive | meshopt_SimplifyInternalSolve | meshopt_SimplifyInternalDebug)) == 0); assert(vertex_attributes_stride >= attribute_count * sizeof(float) && vertex_attributes_stride <= 256); assert(vertex_attributes_stride % sizeof(float) == 0); assert(attribute_count <= kMaxAttributes); + for (size_t i = 0; i < attribute_count; ++i) + assert(attribute_weights[i] >= 0); meshopt_Allocator allocator; @@ -1584,6 +2371,7 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic updateEdgeAdjacency(adjacency, result, index_count, vertex_count, NULL); // build position remap that maps each vertex to the one with identical position + // wedge table stores next vertex with identical position for each vertex unsigned int* remap = allocator.allocate(vertex_count); unsigned int* wedge = allocator.allocate(vertex_count); buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, sparse_remap, allocator); @@ -1610,14 +2398,23 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic #endif Vector3* vertex_positions = allocator.allocate(vertex_count); - float vertex_scale = rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride, sparse_remap); + float vertex_offset[3] = {}; + float vertex_scale = rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride, sparse_remap, vertex_offset); float* vertex_attributes = NULL; + unsigned int attribute_remap[kMaxAttributes]; if (attribute_count) { + // remap attributes to only include ones with weight > 0 to minimize memory/compute overhead for quadrics + size_t attributes_used = 0; + for (size_t i = 0; i < attribute_count; ++i) + if (attribute_weights[i] > 0) + attribute_remap[attributes_used++] = unsigned(i); + + attribute_count = attributes_used; vertex_attributes = allocator.allocate(vertex_count * attribute_count); - rescaleAttributes(vertex_attributes, vertex_attributes_data, vertex_count, vertex_attributes_stride, attribute_weights, attribute_count, sparse_remap); + rescaleAttributes(vertex_attributes, vertex_attributes_data, vertex_count, vertex_attributes_stride, attribute_weights, attribute_count, attribute_remap, sparse_remap); } Quadric* vertex_quadrics = allocator.allocate(vertex_count); @@ -1625,6 +2422,7 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic Quadric* attribute_quadrics = NULL; QuadricGrad* attribute_gradients = NULL; + QuadricGrad* volume_gradients = NULL; if (attribute_count) { @@ -1633,13 +2431,42 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic attribute_gradients = allocator.allocate(vertex_count * attribute_count); memset(attribute_gradients, 0, vertex_count * attribute_count * sizeof(QuadricGrad)); + + if (options & meshopt_SimplifyInternalSolve) + { + volume_gradients = allocator.allocate(vertex_count); + memset(volume_gradients, 0, vertex_count * sizeof(QuadricGrad)); + } } - fillFaceQuadrics(vertex_quadrics, result, index_count, vertex_positions, remap); + fillFaceQuadrics(vertex_quadrics, volume_gradients, result, index_count, vertex_positions, remap); + fillVertexQuadrics(vertex_quadrics, vertex_positions, vertex_count, remap, options); fillEdgeQuadrics(vertex_quadrics, result, index_count, vertex_positions, remap, vertex_kind, loop, loopback); if (attribute_count) - fillAttributeQuadrics(attribute_quadrics, attribute_gradients, result, index_count, vertex_positions, vertex_attributes, attribute_count, remap); + fillAttributeQuadrics(attribute_quadrics, attribute_gradients, result, index_count, vertex_positions, vertex_attributes, attribute_count); + + unsigned int* components = NULL; + float* component_errors = NULL; + size_t component_count = 0; + float component_nexterror = 0; + + if (options & meshopt_SimplifyPrune) + { + components = allocator.allocate(vertex_count); + component_count = buildComponents(components, vertex_count, result, index_count, remap); + + component_errors = allocator.allocate(component_count * 4); // overallocate for temporary use inside measureComponents + measureComponents(component_errors, component_count, components, vertex_positions, vertex_count); + + component_nexterror = FLT_MAX; + for (size_t i = 0; i < component_count; ++i) + component_nexterror = component_nexterror > component_errors[i] ? component_errors[i] : component_nexterror; + +#if TRACE + printf("components: %d (min error %e)\n", int(component_count), sqrtf(component_nexterror)); +#endif + } #if TRACE size_t pass_count = 0; @@ -1654,6 +2481,7 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic size_t result_count = index_count; float result_error = 0; + float vertex_error = 0; // target_error input is linear; we need to adjust it to match quadricError units float error_scale = (options & meshopt_SimplifyErrorAbsolute) ? vertex_scale : 1.f; @@ -1664,14 +2492,18 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic // note: throughout the simplification process adjacency structure reflects welded topology for result-in-progress updateEdgeAdjacency(adjacency, result, result_count, vertex_count, remap); - size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, collapse_capacity, result, result_count, remap, vertex_kind, loop); + size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, collapse_capacity, result, result_count, remap, vertex_kind, loop, loopback); assert(edge_collapse_count <= collapse_capacity); // no edges can be collapsed any more due to topology restrictions if (edge_collapse_count == 0) break; - rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_attributes, vertex_quadrics, attribute_quadrics, attribute_gradients, attribute_count, remap); +#if TRACE + printf("pass %d:%c", int(pass_count++), TRACE >= 2 ? '\n' : ' '); +#endif + + rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_attributes, vertex_quadrics, attribute_quadrics, attribute_gradients, attribute_count, remap, wedge, vertex_kind, loop, loopback); sortEdgeCollapses(collapse_order, edge_collapses, edge_collapse_count); @@ -1682,39 +2514,101 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic memset(collapse_locked, 0, vertex_count); -#if TRACE - printf("pass %d: ", int(pass_count++)); -#endif - - size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, attribute_quadrics, attribute_gradients, attribute_count, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error); + size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, loop, loopback, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error); // no edges can be collapsed any more due to hitting the error limit or triangle collapse limit if (collapses == 0) break; + updateQuadrics(collapse_remap, vertex_count, vertex_quadrics, volume_gradients, attribute_quadrics, attribute_gradients, attribute_count, vertex_positions, remap, vertex_error); + + // updateQuadrics will update vertex error if we use attributes, but if we don't then result_error and vertex_error are equivalent + vertex_error = attribute_count == 0 ? result_error : vertex_error; + + // note: we update loops following edge collapses, but after this we might still have stale loop data + // this can happen when a triangle with a loop edge gets collapsed along a non-loop edge + // that works since a loop that points to a vertex that is no longer connected is not affecting collapse logic remapEdgeLoops(loop, vertex_count, collapse_remap); remapEdgeLoops(loopback, vertex_count, collapse_remap); - size_t new_count = remapIndexBuffer(result, result_count, collapse_remap); - assert(new_count < result_count); + result_count = remapIndexBuffer(result, result_count, collapse_remap, remap); + if ((options & meshopt_SimplifyPrune) && result_count > target_index_count && component_nexterror <= vertex_error) + result_count = pruneComponents(result, result_count, components, component_errors, component_count, vertex_error, component_nexterror); + } + + // at this point, component_nexterror might be stale: component it references may have been removed through a series of edge collapses + bool component_nextstale = true; + + // we're done with the regular simplification but we're still short of the target; try pruning more aggressively towards error_limit + while ((options & meshopt_SimplifyPrune) && result_count > target_index_count && component_nexterror <= error_limit) + { +#if TRACE + printf("pass %d: cleanup; ", int(pass_count++)); +#endif + + float component_cutoff = component_nexterror * 1.5f < error_limit ? component_nexterror * 1.5f : error_limit; + + // track maximum error in eligible components as we are increasing resulting error + float component_maxerror = 0; + for (size_t i = 0; i < component_count; ++i) + if (component_errors[i] > component_maxerror && component_errors[i] <= component_cutoff) + component_maxerror = component_errors[i]; + + size_t new_count = pruneComponents(result, result_count, components, component_errors, component_count, component_cutoff, component_nexterror); + if (new_count == result_count && !component_nextstale) + break; + + component_nextstale = false; // pruneComponents guarantees next error is up to date result_count = new_count; + result_error = result_error < component_maxerror ? component_maxerror : result_error; + vertex_error = vertex_error < component_maxerror ? component_maxerror : vertex_error; } #if TRACE - printf("result: %d triangles, error: %e; total %d passes\n", int(result_count / 3), sqrtf(result_error), int(pass_count)); + printf("result: %d triangles, error: %e (pos %.3e); total %d passes\n", int(result_count / 3), sqrtf(result_error), sqrtf(vertex_error), int(pass_count)); #endif -#ifndef NDEBUG - if (meshopt_simplifyDebugKind) - memcpy(meshopt_simplifyDebugKind, vertex_kind, vertex_count); + // if solve is requested, update input buffers destructively from internal data + if (options & meshopt_SimplifyInternalSolve) + { + unsigned char* vertex_update = collapse_locked; // reuse as scratch space + memset(vertex_update, 0, vertex_count); - if (meshopt_simplifyDebugLoop) - memcpy(meshopt_simplifyDebugLoop, loop, vertex_count * sizeof(unsigned int)); + // limit quadric solve to vertices that are still used in the result + for (size_t i = 0; i < result_count; ++i) + { + unsigned int v = result[i]; - if (meshopt_simplifyDebugLoopBack) - memcpy(meshopt_simplifyDebugLoopBack, loopback, vertex_count * sizeof(unsigned int)); -#endif + // mark the vertex for finalizeVertices and root vertex for solve* + vertex_update[remap[v]] = vertex_update[v] = 1; + } + + // edge adjacency may be stale as we haven't updated it after last series of edge collapses + updateEdgeAdjacency(adjacency, result, result_count, vertex_count, remap); + + solvePositions(vertex_positions, vertex_count, vertex_quadrics, volume_gradients, attribute_quadrics, attribute_gradients, attribute_count, remap, wedge, adjacency, vertex_kind, vertex_update); + + if (attribute_count) + solveAttributes(vertex_positions, vertex_attributes, vertex_count, attribute_quadrics, attribute_gradients, attribute_count, remap, wedge, vertex_kind, vertex_update); + + finalizeVertices(const_cast(vertex_positions_data), vertex_positions_stride, const_cast(vertex_attributes_data), vertex_attributes_stride, attribute_weights, attribute_count, vertex_count, vertex_positions, vertex_attributes, sparse_remap, attribute_remap, vertex_scale, vertex_offset, vertex_kind, vertex_update, vertex_lock); + } + + // if debug visualization data is requested, fill it instead of index data; for simplicity, this doesn't work with sparsity + if ((options & meshopt_SimplifyInternalDebug) && !sparse_remap) + { + assert(Kind_Count <= 8 && vertex_count < (1 << 28)); // 3 bit kind, 1 bit loop + + for (size_t i = 0; i < result_count; i += 3) + { + unsigned int a = result[i + 0], b = result[i + 1], c = result[i + 2]; + + result[i + 0] |= (vertex_kind[a] << 28) | (unsigned(loop[a] == b || loopback[b] == a) << 31); + result[i + 1] |= (vertex_kind[b] << 28) | (unsigned(loop[b] == c || loopback[c] == b) << 31); + result[i + 2] |= (vertex_kind[c] << 28) | (unsigned(loop[c] == a || loopback[a] == c) << 31); + } + } // convert resulting indices back into the dense space of the larger mesh if (sparse_remap) @@ -1730,15 +2624,24 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) { + assert((options & meshopt_SimplifyInternalSolve) == 0); // use meshopt_simplifyWithUpdate instead + return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, NULL, 0, NULL, 0, NULL, target_index_count, target_error, options, out_result_error); } size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) { + assert((options & meshopt_SimplifyInternalSolve) == 0); // use meshopt_simplifyWithUpdate instead + return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, vertex_attributes_data, vertex_attributes_stride, attribute_weights, attribute_count, vertex_lock, target_index_count, target_error, options, out_result_error); } -size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error) +size_t meshopt_simplifyWithUpdate(unsigned int* indices, size_t index_count, float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) +{ + return meshopt_simplifyEdge(indices, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, vertex_attributes_data, vertex_attributes_stride, attribute_weights, attribute_count, vertex_lock, target_index_count, target_error, options | meshopt_SimplifyInternalSolve, out_result_error); +} + +size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const unsigned char* vertex_lock, size_t target_index_count, float target_error, float* out_result_error) { using namespace meshopt; @@ -1766,15 +2669,15 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind const int kInterpolationPasses = 5; // invariant: # of triangles in min_grid <= target_count - int min_grid = int(1.f / (target_error < 1e-3f ? 1e-3f : target_error)); + int min_grid = int(1.f / (target_error < 1e-3f ? 1e-3f : (target_error < 1.f ? target_error : 1.f))); int max_grid = 1025; size_t min_triangles = 0; size_t max_triangles = index_count / 3; // when we're error-limited, we compute the triangle count for the min. size; this accelerates convergence and provides the correct answer when we can't use a larger grid - if (min_grid > 1) + if (min_grid > 1 || vertex_lock) { - computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid); + computeVertexIds(vertex_ids, vertex_positions, vertex_lock, vertex_count, min_grid); min_triangles = countTriangles(vertex_ids, indices, index_count); } @@ -1790,7 +2693,7 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind int grid_size = next_grid_size; grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid ? max_grid - 1 : grid_size); - computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size); + computeVertexIds(vertex_ids, vertex_positions, vertex_lock, vertex_count, grid_size); size_t triangles = countTriangles(vertex_ids, indices, index_count); #if TRACE @@ -1800,7 +2703,7 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind (triangles <= target_index_count / 3) ? "under" : "over"); #endif - float tip = interpolate(float(target_index_count / 3), float(min_grid), float(min_triangles), float(grid_size), float(triangles), float(max_grid), float(max_triangles)); + float tip = interpolate(float(size_t(target_index_count / 3)), float(min_grid), float(min_triangles), float(grid_size), float(triangles), float(max_grid), float(max_triangles)); if (triangles <= target_index_count / 3) { @@ -1832,7 +2735,7 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind unsigned int* vertex_cells = allocator.allocate(vertex_count); - computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid); + computeVertexIds(vertex_ids, vertex_positions, vertex_lock, vertex_count, min_grid); size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count); // build a quadric for each target cell @@ -1853,15 +2756,15 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind for (size_t i = 0; i < cell_count; ++i) result_error = result_error < cell_errors[i] ? cell_errors[i] : result_error; - // collapse triangles! - // note that we need to filter out triangles that we've already output because we very frequently generate redundant triangles between cells :( + // vertex collapses often result in duplicate triangles; we need a table to filter them out size_t tritable_size = hashBuckets2(min_triangles); unsigned int* tritable = allocator.allocate(tritable_size); + // note: this is the first and last write to destination, which allows aliasing destination with indices size_t write = filterTriangles(destination, tritable, tritable_size, indices, index_count, vertex_cells, cell_remap); #if TRACE - printf("result: %d cells, %d triangles (%d unfiltered), error %e\n", int(cell_count), int(write / 3), int(min_triangles), sqrtf(result_error)); + printf("result: grid size %d, %d cells, %d triangles (%d unfiltered), error %e\n", min_grid, int(cell_count), int(write / 3), int(min_triangles), sqrtf(result_error)); #endif if (out_result_error) @@ -1870,6 +2773,40 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind return write; } +size_t meshopt_simplifyPrune(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, float target_error) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + assert(target_error >= 0); + + meshopt_Allocator allocator; + + unsigned int* result = destination; + if (result != indices) + memcpy(result, indices, index_count * sizeof(unsigned int)); + + // build position remap that maps each vertex to the one with identical position + unsigned int* remap = allocator.allocate(vertex_count); + buildPositionRemap(remap, NULL, vertex_positions_data, vertex_count, vertex_positions_stride, NULL, allocator); + + Vector3* vertex_positions = allocator.allocate(vertex_count); + rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride, NULL); + + unsigned int* components = allocator.allocate(vertex_count); + size_t component_count = buildComponents(components, vertex_count, indices, index_count, remap); + + float* component_errors = allocator.allocate(component_count * 4); // overallocate for temporary use inside measureComponents + measureComponents(component_errors, component_count, components, vertex_positions, vertex_count); + + float component_nexterror = 0; + size_t result_count = pruneComponents(result, index_count, components, component_errors, component_count, target_error * target_error, component_nexterror); + + return result_count; +} + size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t target_vertex_count) { using namespace meshopt; @@ -1922,7 +2859,7 @@ size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_pos int grid_size = next_grid_size; grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid ? max_grid - 1 : grid_size); - computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size); + computeVertexIds(vertex_ids, vertex_positions, NULL, vertex_count, grid_size); size_t vertices = countVertexCells(table, table_size, vertex_ids, vertex_count); #if TRACE @@ -1959,7 +2896,7 @@ size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_pos // build vertex->cell association by mapping all vertices with the same quantized position to the same cell unsigned int* vertex_cells = allocator.allocate(vertex_count); - computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid); + computeVertexIds(vertex_ids, vertex_positions, NULL, vertex_count, min_grid); size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count); // accumulate points into a reservoir for each target cell @@ -1972,7 +2909,10 @@ size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_pos unsigned int* cell_remap = allocator.allocate(cell_count); float* cell_errors = allocator.allocate(cell_count); - fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_reservoirs, vertex_positions, vertex_colors, vertex_colors_stride, color_weight * color_weight, vertex_count); + // we scale the color weight to bring it to the same scale as position so that error addition makes sense + float color_weight_scaled = color_weight * (min_grid == 1 ? 1.f : 1.f / (min_grid - 1)); + + fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_reservoirs, vertex_positions, vertex_colors, vertex_colors_stride, color_weight_scaled * color_weight_scaled, vertex_count); // copy results to the output assert(cell_count <= target_vertex_count); diff --git a/Source/ThirdParty/meshoptimizer/spatialorder.cpp b/Source/ThirdParty/meshoptimizer/spatialorder.cpp index 7b1a06945..8a785fcd5 100644 --- a/Source/ThirdParty/meshoptimizer/spatialorder.cpp +++ b/Source/ThirdParty/meshoptimizer/spatialorder.cpp @@ -10,18 +10,19 @@ namespace meshopt { -// "Insert" two 0 bits after each of the 10 low bits of x -inline unsigned int part1By2(unsigned int x) +// "Insert" two 0 bits after each of the 20 low bits of x +inline unsigned long long part1By2(unsigned long long x) { - x &= 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210 - x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210 - x = (x ^ (x << 8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210 - x = (x ^ (x << 4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10 - x = (x ^ (x << 2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 + x &= 0x000fffffull; // x = ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- jihg fedc ba98 7654 3210 + x = (x ^ (x << 32)) & 0x000f00000000ffffull; // x = ---- ---- ---- jihg ---- ---- ---- ---- ---- ---- ---- ---- fedc ba98 7654 3210 + x = (x ^ (x << 16)) & 0x000f0000ff0000ffull; // x = ---- ---- ---- jihg ---- ---- ---- ---- fedc ba98 ---- ---- ---- ---- 7654 3210 + x = (x ^ (x << 8)) & 0x000f00f00f00f00full; // x = ---- ---- ---- jihg ---- ---- fedc ---- ---- ba98 ---- ---- 7654 ---- ---- 3210 + x = (x ^ (x << 4)) & 0x00c30c30c30c30c3ull; // x = ---- ---- ji-- --hg ---- fe-- --dc ---- ba-- --98 ---- 76-- --54 ---- 32-- --10 + x = (x ^ (x << 2)) & 0x0249249249249249ull; // x = ---- --j- -i-- h--g --f- -e-- d--c --b- -a-- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 return x; } -static void computeOrder(unsigned int* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) +static void computeOrder(unsigned long long* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, bool morton) { size_t vertex_stride_float = vertex_positions_stride / sizeof(float); @@ -47,66 +48,171 @@ static void computeOrder(unsigned int* result, const float* vertex_positions_dat extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]); extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]); - float scale = extent == 0 ? 0.f : 1.f / extent; + // rescale each axis to 16 bits to get 48-bit Morton codes + float scale = extent == 0 ? 0.f : 65535.f / extent; // generate Morton order based on the position inside a unit cube for (size_t i = 0; i < vertex_count; ++i) { const float* v = vertex_positions_data + i * vertex_stride_float; - int x = int((v[0] - minv[0]) * scale * 1023.f + 0.5f); - int y = int((v[1] - minv[1]) * scale * 1023.f + 0.5f); - int z = int((v[2] - minv[2]) * scale * 1023.f + 0.5f); + int x = int((v[0] - minv[0]) * scale + 0.5f); + int y = int((v[1] - minv[1]) * scale + 0.5f); + int z = int((v[2] - minv[2]) * scale + 0.5f); - result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2); + if (morton) + result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2); + else + result[i] = ((unsigned long long)x << 0) | ((unsigned long long)y << 20) | ((unsigned long long)z << 40); } } -static void computeHistogram(unsigned int (&hist)[1024][3], const unsigned int* data, size_t count) +static void radixSort10(unsigned int* destination, const unsigned int* source, const unsigned short* keys, size_t count) { + unsigned int hist[1024]; memset(hist, 0, sizeof(hist)); - // compute 3 10-bit histograms in parallel + // compute histogram (assume keys are 10-bit) for (size_t i = 0; i < count; ++i) - { - unsigned int id = data[i]; + hist[keys[i]]++; - hist[(id >> 0) & 1023][0]++; - hist[(id >> 10) & 1023][1]++; - hist[(id >> 20) & 1023][2]++; - } - - unsigned int sumx = 0, sumy = 0, sumz = 0; + unsigned int sum = 0; // replace histogram data with prefix histogram sums in-place for (int i = 0; i < 1024; ++i) { - unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2]; - - hist[i][0] = sumx; - hist[i][1] = sumy; - hist[i][2] = sumz; - - sumx += hx; - sumy += hy; - sumz += hz; + unsigned int h = hist[i]; + hist[i] = sum; + sum += h; } - assert(sumx == count && sumy == count && sumz == count); + assert(sum == count); + + // reorder values + for (size_t i = 0; i < count; ++i) + { + unsigned int id = keys[source[i]]; + + destination[hist[id]++] = source[i]; + } } -static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned int* keys, size_t count, unsigned int (&hist)[1024][3], int pass) +static void computeHistogram(unsigned int (&hist)[256][2], const unsigned short* data, size_t count) { - int bitoff = pass * 10; + memset(hist, 0, sizeof(hist)); + + // compute 2 8-bit histograms in parallel + for (size_t i = 0; i < count; ++i) + { + unsigned long long id = data[i]; + + hist[(id >> 0) & 255][0]++; + hist[(id >> 8) & 255][1]++; + } + + unsigned int sum0 = 0, sum1 = 0; + + // replace histogram data with prefix histogram sums in-place + for (int i = 0; i < 256; ++i) + { + unsigned int h0 = hist[i][0], h1 = hist[i][1]; + + hist[i][0] = sum0; + hist[i][1] = sum1; + + sum0 += h0; + sum1 += h1; + } + + assert(sum0 == count && sum1 == count); +} + +static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned short* keys, size_t count, unsigned int (&hist)[256][2], int pass) +{ + int bitoff = pass * 8; for (size_t i = 0; i < count; ++i) { - unsigned int id = (keys[source[i]] >> bitoff) & 1023; + unsigned int id = unsigned(keys[source[i]] >> bitoff) & 255; destination[hist[id][pass]++] = source[i]; } } +static void partitionPoints(unsigned int* target, const unsigned int* order, const unsigned char* sides, size_t split, size_t count) +{ + size_t l = 0, r = split; + + for (size_t i = 0; i < count; ++i) + { + unsigned char side = sides[order[i]]; + target[side ? r : l] = order[i]; + l += 1; + l -= side; + r += side; + } + + assert(l == split && r == count); +} + +static void splitPoints(unsigned int* destination, unsigned int* orderx, unsigned int* ordery, unsigned int* orderz, const unsigned long long* keys, size_t count, void* scratch, size_t cluster_size) +{ + if (count <= cluster_size) + { + memcpy(destination, orderx, count * sizeof(unsigned int)); + return; + } + + unsigned int* axes[3] = {orderx, ordery, orderz}; + + int bestk = -1; + unsigned int bestdim = 0; + + for (int k = 0; k < 3; ++k) + { + const unsigned int mask = (1 << 20) - 1; + unsigned int dim = (unsigned(keys[axes[k][count - 1]] >> (k * 20)) & mask) - (unsigned(keys[axes[k][0]] >> (k * 20)) & mask); + + if (dim >= bestdim) + { + bestk = k; + bestdim = dim; + } + } + + assert(bestk >= 0); + + // split roughly in half, with the left split always being aligned to cluster size + size_t split = ((count / 2) + cluster_size - 1) / cluster_size * cluster_size; + assert(split > 0 && split < count); + + // mark sides of split for partitioning + unsigned char* sides = static_cast(scratch) + count * sizeof(unsigned int); + + for (size_t i = 0; i < split; ++i) + sides[axes[bestk][i]] = 0; + + for (size_t i = split; i < count; ++i) + sides[axes[bestk][i]] = 1; + + // partition all axes into two sides, maintaining order + unsigned int* temp = static_cast(scratch); + + for (int k = 0; k < 3; ++k) + { + if (k == bestk) + continue; + + unsigned int* axis = axes[k]; + memcpy(temp, axis, sizeof(unsigned int) * count); + partitionPoints(axis, temp, sides, split, count); + } + + // recursion depth is logarithmic and bounded as we always split in approximately half + splitPoints(destination, orderx, ordery, orderz, keys, split, scratch, cluster_size); + splitPoints(destination + split, orderx + split, ordery + split, orderz + split, keys, count - split, scratch, cluster_size); +} + } // namespace meshopt void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) @@ -118,21 +224,26 @@ void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_pos meshopt_Allocator allocator; - unsigned int* keys = allocator.allocate(vertex_count); - computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride); + unsigned long long* keys = allocator.allocate(vertex_count); + computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride, /* morton= */ true); - unsigned int hist[1024][3]; - computeHistogram(hist, keys, vertex_count); - - unsigned int* scratch = allocator.allocate(vertex_count); + unsigned int* scratch = allocator.allocate(vertex_count * 2); // 4b for order + 2b for keys + unsigned short* keyk = (unsigned short*)(scratch + vertex_count); for (size_t i = 0; i < vertex_count; ++i) destination[i] = unsigned(i); - // 3-pass radix sort computes the resulting order into scratch - radixPass(scratch, destination, keys, vertex_count, hist, 0); - radixPass(destination, scratch, keys, vertex_count, hist, 1); - radixPass(scratch, destination, keys, vertex_count, hist, 2); + unsigned int* order[] = {scratch, destination}; + + // 5-pass radix sort computes the resulting order into scratch + for (int k = 0; k < 5; ++k) + { + // copy 10-bit key segments into keyk to reduce cache pressure during radix pass + for (size_t i = 0; i < vertex_count; ++i) + keyk[i] = (unsigned short)((keys[i] >> (k * 10)) & 1023); + + radixSort10(order[k % 2], order[(k + 1) % 2], keyk, vertex_count); + } // since our remap table is mapping old=>new, we need to reverse it for (size_t i = 0; i < vertex_count; ++i) @@ -192,3 +303,39 @@ void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* destination[r * 3 + 2] = c; } } + +void meshopt_spatialClusterPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t cluster_size) +{ + using namespace meshopt; + + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + assert(cluster_size > 0); + + meshopt_Allocator allocator; + + unsigned long long* keys = allocator.allocate(vertex_count); + computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride, /* morton= */ false); + + unsigned int* order = allocator.allocate(vertex_count * 3); + unsigned int* scratch = allocator.allocate(vertex_count * 2); // 4b for order + 1b for side or 2b for keys + unsigned short* keyk = reinterpret_cast(scratch + vertex_count); + + for (int k = 0; k < 3; ++k) + { + // copy 16-bit key segments into keyk to reduce cache pressure during radix pass + for (size_t i = 0; i < vertex_count; ++i) + keyk[i] = (unsigned short)(keys[i] >> (k * 20)); + + unsigned int hist[256][2]; + computeHistogram(hist, keyk, vertex_count); + + for (size_t i = 0; i < vertex_count; ++i) + order[k * vertex_count + i] = unsigned(i); + + radixPass(scratch, order + k * vertex_count, keyk, vertex_count, hist, 0); + radixPass(order + k * vertex_count, scratch, keyk, vertex_count, hist, 1); + } + + splitPoints(destination, order, order + vertex_count, order + 2 * vertex_count, keys, vertex_count, scratch, cluster_size); +} diff --git a/Source/ThirdParty/meshoptimizer/stripifier.cpp b/Source/ThirdParty/meshoptimizer/stripifier.cpp index d57fb512b..4043195ae 100644 --- a/Source/ThirdParty/meshoptimizer/stripifier.cpp +++ b/Source/ThirdParty/meshoptimizer/stripifier.cpp @@ -10,14 +10,14 @@ namespace meshopt { -static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned int* valence) +static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned char* valence) { unsigned int index = 0; unsigned int iv = ~0u; for (size_t i = 0; i < buffer_size; ++i) { - unsigned int va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]]; + unsigned char va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]]; unsigned int v = (va < vb && va < vc) ? va : (vb < vc ? vb : vc); if (v < iv) @@ -71,8 +71,9 @@ size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t strip_size = 0; // compute vertex valence; this is used to prioritize starting triangle for strips - unsigned int* valence = allocator.allocate(vertex_count); - memset(valence, 0, vertex_count * sizeof(unsigned int)); + // note: we use 8-bit counters for performance; for outlier vertices the valence is incorrect but that just affects the heuristic + unsigned char* valence = allocator.allocate(vertex_count); + memset(valence, 0, vertex_count); for (size_t i = 0; i < index_count; ++i) { @@ -151,7 +152,7 @@ size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, { // if we didn't find anything, we need to find the next new triangle // we use a heuristic to maximize the strip length - unsigned int i = findStripFirst(buffer, buffer_size, &valence[0]); + unsigned int i = findStripFirst(buffer, buffer_size, valence); unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; // ordered removal from the buffer diff --git a/Source/ThirdParty/meshoptimizer/vertexcodec.cpp b/Source/ThirdParty/meshoptimizer/vertexcodec.cpp index 94f7a1adc..7085cce32 100644 --- a/Source/ThirdParty/meshoptimizer/vertexcodec.cpp +++ b/Source/ThirdParty/meshoptimizer/vertexcodec.cpp @@ -60,6 +60,15 @@ #define SIMD_LATENCYOPT #endif +// In switch dispatch, marking default case as unreachable allows to remove redundant bounds checks +#if defined(__GNUC__) +#define SIMD_UNREACHABLE() __builtin_unreachable() +#elif defined(_MSC_VER) +#define SIMD_UNREACHABLE() __assume(false) +#else +#define SIMD_UNREACHABLE() assert(!"Unreachable") +#endif + #endif // !MESHOPTIMIZER_NO_SIMD #ifdef SIMD_SSE @@ -90,6 +99,14 @@ #include #endif +#ifndef TRACE +#define TRACE 0 +#endif + +#if TRACE +#include +#endif + #ifdef SIMD_WASM #define wasmx_splat_v32x4(v, i) wasm_i32x4_shuffle(v, v, i, i, i, i) #define wasmx_unpacklo_v8x16(a, b) wasm_i8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) @@ -105,50 +122,76 @@ namespace meshopt const unsigned char kVertexHeader = 0xa0; -static int gEncodeVertexVersion = 0; +static int gEncodeVertexVersion = 1; +const int kDecodeVertexVersion = 1; const size_t kVertexBlockSizeBytes = 8192; const size_t kVertexBlockMaxSize = 256; const size_t kByteGroupSize = 16; const size_t kByteGroupDecodeLimit = 24; -const size_t kTailMaxSize = 32; +const size_t kTailMinSizeV0 = 32; +const size_t kTailMinSizeV1 = 24; + +static const int kBitsV0[4] = {0, 2, 4, 8}; +static const int kBitsV1[5] = {0, 1, 2, 4, 8}; + +const int kEncodeDefaultLevel = 2; static size_t getVertexBlockSize(size_t vertex_size) { - // make sure the entire block fits into the scratch buffer - size_t result = kVertexBlockSizeBytes / vertex_size; - - // align to byte group size; we encode each byte as a byte group - // if vertex block is misaligned, it results in wasted bytes, so just truncate the block size - result &= ~(kByteGroupSize - 1); + // make sure the entire block fits into the scratch buffer and is aligned to byte group size + // note: the block size is implicitly part of the format, so we can't change it without breaking compatibility + size_t result = (kVertexBlockSizeBytes / vertex_size) & ~(kByteGroupSize - 1); return (result < kVertexBlockMaxSize) ? result : kVertexBlockMaxSize; } -inline unsigned char zigzag8(unsigned char v) +inline unsigned int rotate(unsigned int v, int r) { - return ((signed char)(v) >> 7) ^ (v << 1); + return (v << r) | (v >> ((32 - r) & 31)); } -inline unsigned char unzigzag8(unsigned char v) +template +inline T zigzag(T v) { - return -(v & 1) ^ (v >> 1); + return (0 - (v >> (sizeof(T) * 8 - 1))) ^ (v << 1); } +template +inline T unzigzag(T v) +{ + return (0 - (v & 1)) ^ (v >> 1); +} + +#if TRACE +struct Stats +{ + size_t size; + size_t header; // bytes for header + size_t bitg[9]; // bytes for bit groups + size_t bitc[8]; // bit consistency: how many bits are shared between all bytes in a group + size_t ctrl[4]; // number of control groups +}; + +static Stats* bytestats = NULL; +static Stats vertexstats[256]; +#endif + static bool encodeBytesGroupZero(const unsigned char* buffer) { - for (size_t i = 0; i < kByteGroupSize; ++i) - if (buffer[i]) - return false; + assert(kByteGroupSize == sizeof(unsigned long long) * 2); - return true; + unsigned long long v[2]; + memcpy(v, buffer, sizeof(v)); + + return (v[0] | v[1]) == 0; } static size_t encodeBytesGroupMeasure(const unsigned char* buffer, int bits) { - assert(bits >= 1 && bits <= 8); + assert(bits >= 0 && bits <= 8); - if (bits == 1) + if (bits == 0) return encodeBytesGroupZero(buffer) ? 0 : size_t(-1); if (bits == 8) @@ -166,9 +209,10 @@ static size_t encodeBytesGroupMeasure(const unsigned char* buffer, int bits) static unsigned char* encodeBytesGroup(unsigned char* data, const unsigned char* buffer, int bits) { - assert(bits >= 1 && bits <= 8); + assert(bits >= 0 && bits <= 8); + assert(kByteGroupSize % 8 == 0); - if (bits == 1) + if (bits == 0) return data; if (bits == 8) @@ -196,21 +240,27 @@ static unsigned char* encodeBytesGroup(unsigned char* data, const unsigned char* byte |= enc; } + // encode 1-bit groups in reverse bit order + // this makes them faster to decode alongside other groups + if (bits == 1) + byte = (unsigned char)(((byte * 0x80200802ull) & 0x0884422110ull) * 0x0101010101ull >> 32); + *data++ = byte; } for (size_t i = 0; i < kByteGroupSize; ++i) { - if (buffer[i] >= sentinel) - { - *data++ = buffer[i]; - } + unsigned char v = buffer[i]; + + // branchless append of out-of-range values + *data = v; + data += v >= sentinel; } return data; } -static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, const unsigned char* buffer, size_t buffer_size) +static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, const unsigned char* buffer, size_t buffer_size, const int bits[4]) { assert(buffer_size % kByteGroupSize == 0); @@ -226,69 +276,301 @@ static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, memset(header, 0, header_size); + int last_bits = -1; + for (size_t i = 0; i < buffer_size; i += kByteGroupSize) { if (size_t(data_end - data) < kByteGroupDecodeLimit) return NULL; - int best_bits = 8; - size_t best_size = encodeBytesGroupMeasure(buffer + i, 8); + int best_bitk = 3; + size_t best_size = encodeBytesGroupMeasure(buffer + i, bits[best_bitk]); - for (int bits = 1; bits < 8; bits *= 2) + for (int bitk = 0; bitk < 3; ++bitk) { - size_t size = encodeBytesGroupMeasure(buffer + i, bits); + size_t size = encodeBytesGroupMeasure(buffer + i, bits[bitk]); - if (size < best_size) + // favor consistent bit selection across groups, but never replace literals + if (size < best_size || (size == best_size && bits[bitk] == last_bits && bits[best_bitk] != 8)) { - best_bits = bits; + best_bitk = bitk; best_size = size; } } - int bitslog2 = (best_bits == 1) ? 0 : (best_bits == 2 ? 1 : (best_bits == 4 ? 2 : 3)); - assert((1 << bitslog2) == best_bits); - size_t header_offset = i / kByteGroupSize; + header[header_offset / 4] |= best_bitk << ((header_offset % 4) * 2); - header[header_offset / 4] |= bitslog2 << ((header_offset % 4) * 2); - + int best_bits = bits[best_bitk]; unsigned char* next = encodeBytesGroup(data, buffer + i, best_bits); assert(data + best_size == next); data = next; + last_bits = best_bits; + +#if TRACE + bytestats->bitg[best_bits] += best_size; +#endif } +#if TRACE + bytestats->header += header_size; +#endif + return data; } -static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data_end, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +template +static void encodeDeltas1(unsigned char* buffer, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, const unsigned char last_vertex[256], size_t k, int rot) +{ + size_t k0 = k & ~(sizeof(T) - 1); + int ks = (k & (sizeof(T) - 1)) * 8; + + T p = last_vertex[k0]; + for (size_t j = 1; j < sizeof(T); ++j) + p |= T(last_vertex[k0 + j]) << (j * 8); + + const unsigned char* vertex = vertex_data + k0; + + for (size_t i = 0; i < vertex_count; ++i) + { + T v = vertex[0]; + for (size_t j = 1; j < sizeof(T); ++j) + v |= vertex[j] << (j * 8); + + T d = Xor ? T(rotate(v ^ p, rot)) : zigzag(T(v - p)); + + buffer[i] = (unsigned char)(d >> ks); + p = v; + vertex += vertex_size; + } +} + +static void encodeDeltas(unsigned char* buffer, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, const unsigned char last_vertex[256], size_t k, int channel) +{ + switch (channel & 3) + { + case 0: + return encodeDeltas1(buffer, vertex_data, vertex_count, vertex_size, last_vertex, k, 0); + case 1: + return encodeDeltas1(buffer, vertex_data, vertex_count, vertex_size, last_vertex, k, 0); + case 2: + return encodeDeltas1(buffer, vertex_data, vertex_count, vertex_size, last_vertex, k, channel >> 4); + default: + assert(!"Unsupported channel encoding"); // unreachable + } +} + +static int estimateBits(unsigned char v) +{ + return v <= 15 ? (v <= 3 ? (v == 0 ? 0 : 2) : 4) : 8; +} + +static int estimateRotate(const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, size_t k, size_t group_size) +{ + size_t sizes[8] = {}; + + const unsigned char* vertex = vertex_data + k; + unsigned int last = vertex[0] | (vertex[1] << 8) | (vertex[2] << 16) | (vertex[3] << 24); + + for (size_t i = 0; i < vertex_count; i += group_size) + { + unsigned int bitg = 0; + + // calculate bit consistency mask for the group + for (size_t j = 0; j < group_size && i + j < vertex_count; ++j) + { + unsigned int v = vertex[0] | (vertex[1] << 8) | (vertex[2] << 16) | (vertex[3] << 24); + unsigned int d = v ^ last; + + bitg |= d; + last = v; + vertex += vertex_size; + } + +#if TRACE + for (int j = 0; j < 32; ++j) + vertexstats[k + (j / 8)].bitc[j % 8] += (i + group_size < vertex_count ? group_size : vertex_count - i) * (1 - ((bitg >> j) & 1)); +#endif + + for (int j = 0; j < 8; ++j) + { + unsigned int bitr = rotate(bitg, j); + + sizes[j] += estimateBits((unsigned char)(bitr >> 0)) + estimateBits((unsigned char)(bitr >> 8)); + sizes[j] += estimateBits((unsigned char)(bitr >> 16)) + estimateBits((unsigned char)(bitr >> 24)); + } + } + + int best_rot = 0; + for (int rot = 1; rot < 8; ++rot) + best_rot = (sizes[rot] < sizes[best_rot]) ? rot : best_rot; + + return best_rot; +} + +static int estimateChannel(const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, size_t k, size_t vertex_block_size, size_t block_skip, int max_channel, int xor_rot) +{ + unsigned char block[kVertexBlockMaxSize]; + assert(vertex_block_size <= kVertexBlockMaxSize); + + unsigned char last_vertex[256] = {}; + + size_t sizes[3] = {}; + assert(max_channel <= 3); + + for (size_t i = 0; i < vertex_count; i += vertex_block_size * block_skip) + { + size_t block_size = i + vertex_block_size < vertex_count ? vertex_block_size : vertex_count - i; + size_t block_size_aligned = (block_size + kByteGroupSize - 1) & ~(kByteGroupSize - 1); + + memcpy(last_vertex, vertex_data + (i == 0 ? 0 : i - 1) * vertex_size, vertex_size); + + // we sometimes encode elements we didn't fill when rounding to kByteGroupSize + if (block_size < block_size_aligned) + memset(block + block_size, 0, block_size_aligned - block_size); + + for (int channel = 0; channel < max_channel; ++channel) + for (size_t j = 0; j < 4; ++j) + { + encodeDeltas(block, vertex_data + i * vertex_size, block_size, vertex_size, last_vertex, k + j, channel | (xor_rot << 4)); + + for (size_t ig = 0; ig < block_size; ig += kByteGroupSize) + { + // to maximize encoding performance we only evaluate 1/2/4/8 bit groups + size_t size1 = encodeBytesGroupMeasure(block + ig, 1); + size_t size2 = encodeBytesGroupMeasure(block + ig, 2); + size_t size4 = encodeBytesGroupMeasure(block + ig, 4); + size_t size8 = encodeBytesGroupMeasure(block + ig, 8); + + size_t best_size = size1 < size2 ? size1 : size2; + best_size = best_size < size4 ? best_size : size4; + best_size = best_size < size8 ? best_size : size8; + + sizes[channel] += best_size; + } + } + } + + int best_channel = 0; + for (int channel = 1; channel < max_channel; ++channel) + best_channel = (sizes[channel] < sizes[best_channel]) ? channel : best_channel; + + return best_channel == 2 ? best_channel | (xor_rot << 4) : best_channel; +} + +static bool estimateControlZero(const unsigned char* buffer, size_t vertex_count_aligned) +{ + for (size_t i = 0; i < vertex_count_aligned; i += kByteGroupSize) + if (!encodeBytesGroupZero(buffer + i)) + return false; + + return true; +} + +static int estimateControl(const unsigned char* buffer, size_t vertex_count, size_t vertex_count_aligned, int level) +{ + if (estimateControlZero(buffer, vertex_count_aligned)) + return 2; // zero encoding + + if (level == 0) + return 1; // 1248 encoding in level 0 for encoding speed + + // round number of groups to 4 to get number of header bytes + size_t header_size = (vertex_count_aligned / kByteGroupSize + 3) / 4; + + size_t est_bytes0 = header_size, est_bytes1 = header_size; + + for (size_t i = 0; i < vertex_count_aligned; i += kByteGroupSize) + { + // assumes kBitsV1[] = {0, 1, 2, 4, 8} for performance + size_t size0 = encodeBytesGroupMeasure(buffer + i, 0); + size_t size1 = encodeBytesGroupMeasure(buffer + i, 1); + size_t size2 = encodeBytesGroupMeasure(buffer + i, 2); + size_t size4 = encodeBytesGroupMeasure(buffer + i, 4); + size_t size8 = encodeBytesGroupMeasure(buffer + i, 8); + + // both control modes have access to 1/2/4 bit encoding + size_t size12 = size1 < size2 ? size1 : size2; + size_t size124 = size12 < size4 ? size12 : size4; + + // each control mode has access to 0/8 bit encoding respectively + est_bytes0 += size124 < size0 ? size124 : size0; + est_bytes1 += size124 < size8 ? size124 : size8; + } + + // pick shortest control entry but prefer literal encoding + if (est_bytes0 < vertex_count || est_bytes1 < vertex_count) + return est_bytes0 < est_bytes1 ? 0 : 1; + else + return 3; // literal encoding +} + +static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data_end, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256], const unsigned char* channels, int version, int level) { assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); + assert(vertex_size % 4 == 0); unsigned char buffer[kVertexBlockMaxSize]; assert(sizeof(buffer) % kByteGroupSize == 0); + size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); + // we sometimes encode elements we didn't fill when rounding to kByteGroupSize memset(buffer, 0, sizeof(buffer)); + size_t control_size = version == 0 ? 0 : vertex_size / 4; + if (size_t(data_end - data) < control_size) + return NULL; + + unsigned char* control = data; + data += control_size; + + memset(control, 0, control_size); + for (size_t k = 0; k < vertex_size; ++k) { - size_t vertex_offset = k; + encodeDeltas(buffer, vertex_data, vertex_count, vertex_size, last_vertex, k, version == 0 ? 0 : channels[k / 4]); - unsigned char p = last_vertex[k]; +#if TRACE + const unsigned char* olddata = data; + bytestats = &vertexstats[k]; +#endif - for (size_t i = 0; i < vertex_count; ++i) + int ctrl = 0; + + if (version != 0) { - buffer[i] = zigzag8(vertex_data[vertex_offset] - p); + ctrl = estimateControl(buffer, vertex_count, vertex_count_aligned, level); - p = vertex_data[vertex_offset]; + assert(unsigned(ctrl) < 4); + control[k / 4] |= ctrl << ((k % 4) * 2); - vertex_offset += vertex_size; +#if TRACE + vertexstats[k].ctrl[ctrl]++; +#endif } - data = encodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1)); - if (!data) - return NULL; + if (ctrl == 3) + { + // literal encoding + if (size_t(data_end - data) < vertex_count) + return NULL; + + memcpy(data, buffer, vertex_count); + data += vertex_count; + } + else if (ctrl != 2) // non-zero encoding + { + data = encodeBytes(data, data_end, buffer, vertex_count_aligned, version == 0 ? kBitsV0 : kBitsV1 + ctrl); + if (!data) + return NULL; + } + +#if TRACE + bytestats = NULL; + vertexstats[k].size += data - olddata; +#endif } memcpy(last_vertex, &vertex_data[vertex_size * (vertex_count - 1)], vertex_size); @@ -297,7 +579,7 @@ static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data } #if defined(SIMD_FALLBACK) || (!defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_AVX) && !defined(SIMD_WASM)) -static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned char* buffer, int bitslog2) +static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned char* buffer, int bits) { #define READ() byte = *data++ #define NEXT(bits) enc = byte >> (8 - bits), byte <<= bits, encv = *data_var, *buffer++ = (enc == (1 << bits) - 1) ? encv : enc, data_var += (enc == (1 << bits) - 1) @@ -305,12 +587,24 @@ static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned unsigned char byte, enc, encv; const unsigned char* data_var; - switch (bitslog2) + switch (bits) { case 0: memset(buffer, 0, kByteGroupSize); return data; case 1: + data_var = data + 2; + + // 2 groups with 8 1-bit values in each byte (reversed from the order in other groups) + READ(); + byte = (unsigned char)(((byte * 0x80200802ull) & 0x0884422110ull) * 0x0101010101ull >> 32); + NEXT(1), NEXT(1), NEXT(1), NEXT(1), NEXT(1), NEXT(1), NEXT(1), NEXT(1); + READ(); + byte = (unsigned char)(((byte * 0x80200802ull) & 0x0884422110ull) * 0x0101010101ull >> 32); + NEXT(1), NEXT(1), NEXT(1), NEXT(1), NEXT(1), NEXT(1), NEXT(1), NEXT(1); + + return data_var; + case 2: data_var = data + 4; // 4 groups with 4 2-bit values in each byte @@ -320,7 +614,7 @@ static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); return data_var; - case 2: + case 4: data_var = data + 8; // 8 groups with 2 4-bit values in each byte @@ -334,11 +628,11 @@ static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned READ(), NEXT(4), NEXT(4); return data_var; - case 3: + case 8: memcpy(buffer, data, kByteGroupSize); return data + kByteGroupSize; default: - assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + assert(!"Unexpected bit length"); // unreachable return data; } @@ -346,18 +640,16 @@ static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned #undef NEXT } -static const unsigned char* decodeBytes(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size) +static const unsigned char* decodeBytes(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size, const int* bits) { assert(buffer_size % kByteGroupSize == 0); - const unsigned char* header = data; - // round number of groups to 4 to get number of header bytes size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; - if (size_t(data_end - data) < header_size) return NULL; + const unsigned char* header = data; data += header_size; for (size_t i = 0; i < buffer_size; i += kByteGroupSize) @@ -366,43 +658,109 @@ static const unsigned char* decodeBytes(const unsigned char* data, const unsigne return NULL; size_t header_offset = i / kByteGroupSize; + int bitsk = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; - int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; - - data = decodeBytesGroup(data, buffer + i, bitslog2); + data = decodeBytesGroup(data, buffer + i, bits[bitsk]); } return data; } -static const unsigned char* decodeVertexBlock(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +template +static void decodeDeltas1(const unsigned char* buffer, unsigned char* transposed, size_t vertex_count, size_t vertex_size, const unsigned char* last_vertex, int rot) { - assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); - - unsigned char buffer[kVertexBlockMaxSize]; - unsigned char transposed[kVertexBlockSizeBytes]; - - size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); - - for (size_t k = 0; k < vertex_size; ++k) + for (size_t k = 0; k < 4; k += sizeof(T)) { - data = decodeBytes(data, data_end, buffer, vertex_count_aligned); - if (!data) - return NULL; - size_t vertex_offset = k; - unsigned char p = last_vertex[k]; + T p = last_vertex[0]; + for (size_t j = 1; j < sizeof(T); ++j) + p |= last_vertex[j] << (8 * j); for (size_t i = 0; i < vertex_count; ++i) { - unsigned char v = unzigzag8(buffer[i]) + p; + T v = buffer[i]; + for (size_t j = 1; j < sizeof(T); ++j) + v |= buffer[i + vertex_count * j] << (8 * j); + + v = Xor ? T(rotate(v, rot)) ^ p : unzigzag(v) + p; + + for (size_t j = 0; j < sizeof(T); ++j) + transposed[vertex_offset + j] = (unsigned char)(v >> (j * 8)); - transposed[vertex_offset] = v; p = v; vertex_offset += vertex_size; } + + buffer += vertex_count * sizeof(T); + last_vertex += sizeof(T); + } +} + +static const unsigned char* decodeVertexBlock(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256], const unsigned char* channels, int version) +{ + assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); + + unsigned char buffer[kVertexBlockMaxSize * 4]; + unsigned char transposed[kVertexBlockSizeBytes]; + + size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); + assert(vertex_count <= vertex_count_aligned); + + size_t control_size = version == 0 ? 0 : vertex_size / 4; + if (size_t(data_end - data) < control_size) + return NULL; + + const unsigned char* control = data; + data += control_size; + + for (size_t k = 0; k < vertex_size; k += 4) + { + unsigned char ctrl_byte = version == 0 ? 0 : control[k / 4]; + + for (size_t j = 0; j < 4; ++j) + { + int ctrl = (ctrl_byte >> (j * 2)) & 3; + + if (ctrl == 3) + { + // literal encoding + if (size_t(data_end - data) < vertex_count) + return NULL; + + memcpy(buffer + j * vertex_count, data, vertex_count); + data += vertex_count; + } + else if (ctrl == 2) + { + // zero encoding + memset(buffer + j * vertex_count, 0, vertex_count); + } + else + { + data = decodeBytes(data, data_end, buffer + j * vertex_count, vertex_count_aligned, version == 0 ? kBitsV0 : kBitsV1 + ctrl); + if (!data) + return NULL; + } + } + + int channel = version == 0 ? 0 : channels[k / 4]; + + switch (channel & 3) + { + case 0: + decodeDeltas1(buffer, transposed + k, vertex_count, vertex_size, last_vertex + k, 0); + break; + case 1: + decodeDeltas1(buffer, transposed + k, vertex_count, vertex_size, last_vertex + k, 0); + break; + case 2: + decodeDeltas1(buffer, transposed + k, vertex_count, vertex_size, last_vertex + k, (32 - (channel >> 4)) & 31); + break; + default: + return NULL; // invalid channel type + } } memcpy(vertex_data, transposed, vertex_count * vertex_size); @@ -447,7 +805,7 @@ static bool gDecodeBytesGroupInitialized = decodeBytesGroupBuildTables(); #ifdef SIMD_SSE SIMD_TARGET -static __m128i decodeShuffleMask(unsigned char mask0, unsigned char mask1) +inline __m128i decodeShuffleMask(unsigned char mask0, unsigned char mask1) { __m128i sm0 = _mm_loadl_epi64(reinterpret_cast(&kDecodeBytesGroupShuffle[mask0])); __m128i sm1 = _mm_loadl_epi64(reinterpret_cast(&kDecodeBytesGroupShuffle[mask1])); @@ -459,11 +817,12 @@ static __m128i decodeShuffleMask(unsigned char mask0, unsigned char mask1) } SIMD_TARGET -static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +inline const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int hbits) { - switch (bitslog2) + switch (hbits) { case 0: + case 4: { __m128i result = _mm_setzero_si128(); @@ -473,6 +832,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi } case 1: + case 6: { #ifdef __GNUC__ typedef int __attribute__((aligned(1))) unaligned_int; @@ -505,7 +865,6 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi unsigned char mask1 = (unsigned char)(mask16 >> 8); __m128i shuf = decodeShuffleMask(mask0, mask1); - __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel)); _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); @@ -518,6 +877,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi } case 2: + case 7: { #ifdef SIMD_LATENCYOPT unsigned long long data64; @@ -541,7 +901,6 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi unsigned char mask1 = (unsigned char)(mask16 >> 8); __m128i shuf = decodeShuffleMask(mask0, mask1); - __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel)); _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); @@ -554,6 +913,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi } case 3: + case 8: { __m128i result = _mm_loadu_si128(reinterpret_cast(data)); @@ -562,26 +922,46 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi return data + 16; } + case 5: + { + __m128i rest = _mm_loadu_si128(reinterpret_cast(data + 2)); + + unsigned char mask0 = data[0]; + unsigned char mask1 = data[1]; + + __m128i shuf = decodeShuffleMask(mask0, mask1); + __m128i result = _mm_shuffle_epi8(rest, shuf); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data + 2 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + default: - assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value - return data; + SIMD_UNREACHABLE(); // unreachable } } #endif #ifdef SIMD_AVX -static const __m128i decodeBytesGroupConfig[] = { - _mm_set1_epi8(3), - _mm_set1_epi8(15), - _mm_setr_epi8(6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24), - _mm_setr_epi8(4, 0, 12, 8, 20, 16, 28, 24, 36, 32, 44, 40, 52, 48, 60, 56), +static const __m128i kDecodeBytesGroupConfig[8][2] = { + {_mm_setzero_si128(), _mm_setzero_si128()}, + {_mm_set1_epi8(3), _mm_setr_epi8(6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24)}, + {_mm_set1_epi8(15), _mm_setr_epi8(4, 0, 12, 8, 20, 16, 28, 24, 36, 32, 44, 40, 52, 48, 60, 56)}, + {_mm_setzero_si128(), _mm_setzero_si128()}, + {_mm_setzero_si128(), _mm_setzero_si128()}, + {_mm_set1_epi8(1), _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)}, + {_mm_set1_epi8(3), _mm_setr_epi8(6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24)}, + {_mm_set1_epi8(15), _mm_setr_epi8(4, 0, 12, 8, 20, 16, 28, 24, 36, 32, 44, 40, 52, 48, 60, 56)}, }; -static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +SIMD_TARGET +inline const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int hbits) { - switch (bitslog2) + switch (hbits) { case 0: + case 4: { __m128i result = _mm_setzero_si128(); @@ -590,16 +970,19 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi return data; } - case 1: - case 2: + case 5: // 1-bit + case 1: // 2-bit + case 6: + case 2: // 4-bit + case 7: { - const unsigned char* skip = data + (bitslog2 << 2); + const unsigned char* skip = data + (2 << (hbits < 3 ? hbits : hbits - 5)); __m128i selb = _mm_loadl_epi64(reinterpret_cast(data)); __m128i rest = _mm_loadu_si128(reinterpret_cast(skip)); - __m128i sent = decodeBytesGroupConfig[bitslog2 - 1]; - __m128i ctrl = decodeBytesGroupConfig[bitslog2 + 1]; + __m128i sent = kDecodeBytesGroupConfig[hbits][0]; + __m128i ctrl = kDecodeBytesGroupConfig[hbits][1]; __m128i selw = _mm_shuffle_epi32(selb, 0x44); __m128i sel = _mm_and_si128(sent, _mm_multishift_epi64_epi8(ctrl, selw)); @@ -613,6 +996,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi } case 3: + case 8: { __m128i result = _mm_loadu_si128(reinterpret_cast(data)); @@ -622,14 +1006,14 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi } default: - assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value - return data; + SIMD_UNREACHABLE(); // unreachable } } #endif #ifdef SIMD_NEON -static uint8x16_t shuffleBytes(unsigned char mask0, unsigned char mask1, uint8x8_t rest0, uint8x8_t rest1) +SIMD_TARGET +inline uint8x16_t shuffleBytes(unsigned char mask0, unsigned char mask1, uint8x8_t rest0, uint8x8_t rest1) { uint8x8_t sm0 = vld1_u8(kDecodeBytesGroupShuffle[mask0]); uint8x8_t sm1 = vld1_u8(kDecodeBytesGroupShuffle[mask1]); @@ -640,7 +1024,8 @@ static uint8x16_t shuffleBytes(unsigned char mask0, unsigned char mask1, uint8x8 return vcombine_u8(r0, r1); } -static void neonMoveMask(uint8x16_t mask, unsigned char& mask0, unsigned char& mask1) +SIMD_TARGET +inline void neonMoveMask(uint8x16_t mask, unsigned char& mask0, unsigned char& mask1) { // magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00 const uint64_t magic = 0x000103070f1f3f80ull; @@ -651,11 +1036,13 @@ static void neonMoveMask(uint8x16_t mask, unsigned char& mask0, unsigned char& m mask1 = uint8_t((vgetq_lane_u64(mask2, 1) * magic) >> 56); } -static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +SIMD_TARGET +inline const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int hbits) { - switch (bitslog2) + switch (hbits) { case 0: + case 4: { uint8x16_t result = vdupq_n_u8(0); @@ -665,6 +1052,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi } case 1: + case 6: { #ifdef SIMD_LATENCYOPT unsigned int data32; @@ -702,6 +1090,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi } case 2: + case 7: { #ifdef SIMD_LATENCYOPT unsigned long long data64; @@ -736,6 +1125,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi } case 3: + case 8: { uint8x16_t result = vld1q_u8(data); @@ -744,30 +1134,42 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi return data + 16; } + case 5: + { + unsigned char mask0 = data[0]; + unsigned char mask1 = data[1]; + + uint8x8_t rest0 = vld1_u8(data + 2); + uint8x8_t rest1 = vld1_u8(data + 2 + kDecodeBytesGroupCount[mask0]); + + uint8x16_t result = shuffleBytes(mask0, mask1, rest0, rest1); + + vst1q_u8(buffer, result); + + return data + 2 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + default: - assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value - return data; + SIMD_UNREACHABLE(); // unreachable } } #endif #ifdef SIMD_WASM SIMD_TARGET -static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1) +inline v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1) { v128_t sm0 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask0]); v128_t sm1 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask1]); - v128_t sm1off = wasm_v128_load(&kDecodeBytesGroupCount[mask0]); - sm1off = wasm_i8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); - + v128_t sm1off = wasm_v128_load8_splat(&kDecodeBytesGroupCount[mask0]); v128_t sm1r = wasm_i8x16_add(sm1, sm1off); return wasmx_unpacklo_v64x2(sm0, sm1r); } SIMD_TARGET -static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1) +inline void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1) { // magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00 const uint64_t magic = 0x000103070f1f3f80ull; @@ -777,11 +1179,12 @@ static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1 } SIMD_TARGET -static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +inline const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int hbits) { - switch (bitslog2) + switch (hbits) { case 0: + case 4: { v128_t result = wasm_i8x16_splat(0); @@ -791,6 +1194,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi } case 1: + case 6: { v128_t sel2 = wasm_v128_load(data); v128_t rest = wasm_v128_load(data + 4); @@ -805,7 +1209,6 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi wasmMoveMask(mask, mask0, mask1); v128_t shuf = decodeShuffleMask(mask0, mask1); - v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask); wasm_v128_store(buffer, result); @@ -814,6 +1217,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi } case 2: + case 7: { v128_t sel4 = wasm_v128_load(data); v128_t rest = wasm_v128_load(data + 8); @@ -827,7 +1231,6 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi wasmMoveMask(mask, mask0, mask1); v128_t shuf = decodeShuffleMask(mask0, mask1); - v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask); wasm_v128_store(buffer, result); @@ -836,6 +1239,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi } case 3: + case 8: { v128_t result = wasm_v128_load(data); @@ -844,16 +1248,30 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi return data + 16; } + case 5: + { + v128_t rest = wasm_v128_load(data + 2); + + unsigned char mask0 = data[0]; + unsigned char mask1 = data[1]; + + v128_t shuf = decodeShuffleMask(mask0, mask1); + v128_t result = wasm_i8x16_swizzle(rest, shuf); + + wasm_v128_store(buffer, result); + + return data + 2 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + default: - assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value - return data; + SIMD_UNREACHABLE(); // unreachable } } #endif #if defined(SIMD_SSE) || defined(SIMD_AVX) SIMD_TARGET -static void transpose8(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3) +inline void transpose8(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3) { __m128i t0 = _mm_unpacklo_epi8(x0, x1); __m128i t1 = _mm_unpackhi_epi8(x0, x1); @@ -867,17 +1285,33 @@ static void transpose8(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3) } SIMD_TARGET -static __m128i unzigzag8(__m128i v) +inline __m128i unzigzag8(__m128i v) { __m128i xl = _mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(v, _mm_set1_epi8(1))); __m128i xr = _mm_and_si128(_mm_srli_epi16(v, 1), _mm_set1_epi8(127)); return _mm_xor_si128(xl, xr); } + +SIMD_TARGET +inline __m128i unzigzag16(__m128i v) +{ + __m128i xl = _mm_sub_epi16(_mm_setzero_si128(), _mm_and_si128(v, _mm_set1_epi16(1))); + __m128i xr = _mm_srli_epi16(v, 1); + + return _mm_xor_si128(xl, xr); +} + +SIMD_TARGET +inline __m128i rotate32(__m128i v, int r) +{ + return _mm_or_si128(_mm_slli_epi32(v, r), _mm_srli_epi32(v, 32 - r)); +} #endif #ifdef SIMD_NEON -static void transpose8(uint8x16_t& x0, uint8x16_t& x1, uint8x16_t& x2, uint8x16_t& x3) +SIMD_TARGET +inline void transpose8(uint8x16_t& x0, uint8x16_t& x1, uint8x16_t& x2, uint8x16_t& x3) { uint8x16x2_t t01 = vzipq_u8(x0, x1); uint8x16x2_t t23 = vzipq_u8(x2, x3); @@ -891,18 +1325,64 @@ static void transpose8(uint8x16_t& x0, uint8x16_t& x1, uint8x16_t& x2, uint8x16_ x3 = vreinterpretq_u8_u16(x23.val[1]); } -static uint8x16_t unzigzag8(uint8x16_t v) +SIMD_TARGET +inline uint8x16_t unzigzag8(uint8x16_t v) { uint8x16_t xl = vreinterpretq_u8_s8(vnegq_s8(vreinterpretq_s8_u8(vandq_u8(v, vdupq_n_u8(1))))); uint8x16_t xr = vshrq_n_u8(v, 1); return veorq_u8(xl, xr); } + +SIMD_TARGET +inline uint8x16_t unzigzag16(uint8x16_t v) +{ + uint16x8_t vv = vreinterpretq_u16_u8(v); + uint8x16_t xl = vreinterpretq_u8_s16(vnegq_s16(vreinterpretq_s16_u16(vandq_u16(vv, vdupq_n_u16(1))))); + uint8x16_t xr = vreinterpretq_u8_u16(vshrq_n_u16(vv, 1)); + + return veorq_u8(xl, xr); +} + +SIMD_TARGET +inline uint8x16_t rotate32(uint8x16_t v, int r) +{ + uint32x4_t v32 = vreinterpretq_u32_u8(v); + return vreinterpretq_u8_u32(vorrq_u32(vshlq_u32(v32, vdupq_n_s32(r)), vshlq_u32(v32, vdupq_n_s32(r - 32)))); +} + +template +SIMD_TARGET inline uint8x8_t rebase(uint8x8_t npi, uint8x16_t r0, uint8x16_t r1, uint8x16_t r2, uint8x16_t r3) +{ + switch (Channel) + { + case 0: + { + uint8x16_t rsum = vaddq_u8(vaddq_u8(r0, r1), vaddq_u8(r2, r3)); + uint8x8_t rsumx = vadd_u8(vget_low_u8(rsum), vget_high_u8(rsum)); + return vadd_u8(vadd_u8(npi, rsumx), vext_u8(rsumx, rsumx, 4)); + } + case 1: + { + uint16x8_t rsum = vaddq_u16(vaddq_u16(vreinterpretq_u16_u8(r0), vreinterpretq_u16_u8(r1)), vaddq_u16(vreinterpretq_u16_u8(r2), vreinterpretq_u16_u8(r3))); + uint16x4_t rsumx = vadd_u16(vget_low_u16(rsum), vget_high_u16(rsum)); + return vreinterpret_u8_u16(vadd_u16(vadd_u16(vreinterpret_u16_u8(npi), rsumx), vext_u16(rsumx, rsumx, 2))); + } + case 2: + { + uint8x16_t rsum = veorq_u8(veorq_u8(r0, r1), veorq_u8(r2, r3)); + uint8x8_t rsumx = veor_u8(vget_low_u8(rsum), vget_high_u8(rsum)); + return veor_u8(veor_u8(npi, rsumx), vext_u8(rsumx, rsumx, 4)); + } + default: + return npi; + } +} #endif #ifdef SIMD_WASM SIMD_TARGET -static void transpose8(v128_t& x0, v128_t& x1, v128_t& x2, v128_t& x3) +inline void transpose8(v128_t& x0, v128_t& x1, v128_t& x2, v128_t& x3) { v128_t t0 = wasmx_unpacklo_v8x16(x0, x1); v128_t t1 = wasmx_unpackhi_v8x16(x0, x1); @@ -916,44 +1396,57 @@ static void transpose8(v128_t& x0, v128_t& x1, v128_t& x2, v128_t& x3) } SIMD_TARGET -static v128_t unzigzag8(v128_t v) +inline v128_t unzigzag8(v128_t v) { v128_t xl = wasm_i8x16_neg(wasm_v128_and(v, wasm_i8x16_splat(1))); v128_t xr = wasm_u8x16_shr(v, 1); return wasm_v128_xor(xl, xr); } + +SIMD_TARGET +inline v128_t unzigzag16(v128_t v) +{ + v128_t xl = wasm_i16x8_neg(wasm_v128_and(v, wasm_i16x8_splat(1))); + v128_t xr = wasm_u16x8_shr(v, 1); + + return wasm_v128_xor(xl, xr); +} + +SIMD_TARGET +inline v128_t rotate32(v128_t v, int r) +{ + return wasm_v128_or(wasm_i32x4_shl(v, r), wasm_i32x4_shr(v, 32 - r)); +} #endif #if defined(SIMD_SSE) || defined(SIMD_AVX) || defined(SIMD_NEON) || defined(SIMD_WASM) SIMD_TARGET -static const unsigned char* decodeBytesSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size) +static const unsigned char* decodeBytesSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size, int hshift) { assert(buffer_size % kByteGroupSize == 0); assert(kByteGroupSize == 16); - const unsigned char* header = data; - // round number of groups to 4 to get number of header bytes size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; - if (size_t(data_end - data) < header_size) return NULL; + const unsigned char* header = data; data += header_size; size_t i = 0; - // fast-path: process 4 groups at a time, do a shared bounds check - each group reads <=24b + // fast-path: process 4 groups at a time, do a shared bounds check for (; i + kByteGroupSize * 4 <= buffer_size && size_t(data_end - data) >= kByteGroupDecodeLimit * 4; i += kByteGroupSize * 4) { size_t header_offset = i / kByteGroupSize; unsigned char header_byte = header[header_offset / 4]; - data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 0, (header_byte >> 0) & 3); - data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 1, (header_byte >> 2) & 3); - data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 2, (header_byte >> 4) & 3); - data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 3, (header_byte >> 6) & 3); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 0, hshift + ((header_byte >> 0) & 3)); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 1, hshift + ((header_byte >> 2) & 3)); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 2, hshift + ((header_byte >> 4) & 3)); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 3, hshift + ((header_byte >> 6) & 3)); } // slow-path: process remaining groups @@ -963,17 +1456,102 @@ static const unsigned char* decodeBytesSimd(const unsigned char* data, const uns return NULL; size_t header_offset = i / kByteGroupSize; + unsigned char header_byte = header[header_offset / 4]; - int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; - - data = decodeBytesGroupSimd(data, buffer + i, bitslog2); + data = decodeBytesGroupSimd(data, buffer + i, hshift + ((header_byte >> ((header_offset % 4) * 2)) & 3)); } return data; } +template +SIMD_TARGET static void +decodeDeltas4Simd(const unsigned char* buffer, unsigned char* transposed, size_t vertex_count_aligned, size_t vertex_size, unsigned char last_vertex[4], int rot) +{ +#if defined(SIMD_SSE) || defined(SIMD_AVX) +#define TEMP __m128i +#define PREP() __m128i pi = _mm_cvtsi32_si128(*reinterpret_cast(last_vertex)) +#define LOAD(i) __m128i r##i = _mm_loadu_si128(reinterpret_cast(buffer + j + i * vertex_count_aligned)) +#define GRP4(i) t0 = r##i, t1 = _mm_shuffle_epi32(r##i, 1), t2 = _mm_shuffle_epi32(r##i, 2), t3 = _mm_shuffle_epi32(r##i, 3) +#define FIXD(i) t##i = pi = Channel == 0 ? _mm_add_epi8(pi, t##i) : (Channel == 1 ? _mm_add_epi16(pi, t##i) : _mm_xor_si128(pi, t##i)) +#define SAVE(i) *reinterpret_cast(savep) = _mm_cvtsi128_si32(t##i), savep += vertex_size +#endif + +#ifdef SIMD_NEON +#define TEMP uint8x8_t +#define PREP() uint8x8_t pi = vreinterpret_u8_u32(vld1_lane_u32(reinterpret_cast(last_vertex), vdup_n_u32(0), 0)) +#define LOAD(i) uint8x16_t r##i = vld1q_u8(buffer + j + i * vertex_count_aligned) +#define GRP4(i) t0 = vget_low_u8(r##i), t1 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t0), 1)), t2 = vget_high_u8(r##i), t3 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t2), 1)) +#define FIXD(i) t##i = pi = Channel == 0 ? vadd_u8(pi, t##i) : (Channel == 1 ? vreinterpret_u8_u16(vadd_u16(vreinterpret_u16_u8(pi), vreinterpret_u16_u8(t##i))) : veor_u8(pi, t##i)) +#define SAVE(i) vst1_lane_u32(reinterpret_cast(savep), vreinterpret_u32_u8(t##i), 0), savep += vertex_size +#endif + +#ifdef SIMD_WASM +#define TEMP v128_t +#define PREP() v128_t pi = wasm_v128_load(last_vertex) +#define LOAD(i) v128_t r##i = wasm_v128_load(buffer + j + i * vertex_count_aligned) +#define GRP4(i) t0 = r##i, t1 = wasmx_splat_v32x4(r##i, 1), t2 = wasmx_splat_v32x4(r##i, 2), t3 = wasmx_splat_v32x4(r##i, 3) +#define FIXD(i) t##i = pi = Channel == 0 ? wasm_i8x16_add(pi, t##i) : (Channel == 1 ? wasm_i16x8_add(pi, t##i) : wasm_v128_xor(pi, t##i)) +#define SAVE(i) wasm_v128_store32_lane(savep, t##i, 0), savep += vertex_size +#endif + +#define UNZR(i) r##i = Channel == 0 ? unzigzag8(r##i) : (Channel == 1 ? unzigzag16(r##i) : rotate32(r##i, rot)) + + PREP(); + + unsigned char* savep = transposed; + + for (size_t j = 0; j < vertex_count_aligned; j += 16) + { + LOAD(0); + LOAD(1); + LOAD(2); + LOAD(3); + + transpose8(r0, r1, r2, r3); + + TEMP t0, t1, t2, t3; + TEMP npi = pi; + + UNZR(0); + GRP4(0); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + UNZR(1); + GRP4(1); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + UNZR(2); + GRP4(2); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + UNZR(3); + GRP4(3); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + +#if defined(SIMD_LATENCYOPT) && defined(SIMD_NEON) && (defined(__APPLE__) || defined(_WIN32)) + // instead of relying on accumulated pi, recompute it from scratch from r0..r3; this shortens dependency between loop iterations + pi = rebase(npi, r0, r1, r2, r3); +#else + (void)npi; +#endif + +#undef UNZR +#undef TEMP +#undef PREP +#undef LOAD +#undef GRP4 +#undef FIXD +#undef SAVE + } +} + SIMD_TARGET -static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256], const unsigned char* channels, int version) { assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); @@ -982,84 +1560,61 @@ static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, con size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); + size_t control_size = version == 0 ? 0 : vertex_size / 4; + if (size_t(data_end - data) < control_size) + return NULL; + + const unsigned char* control = data; + data += control_size; + for (size_t k = 0; k < vertex_size; k += 4) { + unsigned char ctrl_byte = version == 0 ? 0 : control[k / 4]; + for (size_t j = 0; j < 4; ++j) { - data = decodeBytesSimd(data, data_end, buffer + j * vertex_count_aligned, vertex_count_aligned); - if (!data) - return NULL; + int ctrl = (ctrl_byte >> (j * 2)) & 3; + + if (ctrl == 3) + { + // literal encoding; safe to over-copy due to tail + if (size_t(data_end - data) < vertex_count_aligned) + return NULL; + + memcpy(buffer + j * vertex_count_aligned, data, vertex_count_aligned); + data += vertex_count; + } + else if (ctrl == 2) + { + // zero encoding + memset(buffer + j * vertex_count_aligned, 0, vertex_count_aligned); + } + else + { + // for v0, headers are mapped to 0..3; for v1, headers are mapped to 4..8 + int hshift = version == 0 ? 0 : 4 + ctrl; + + data = decodeBytesSimd(data, data_end, buffer + j * vertex_count_aligned, vertex_count_aligned, hshift); + if (!data) + return NULL; + } } -#if defined(SIMD_SSE) || defined(SIMD_AVX) -#define TEMP __m128i -#define PREP() __m128i pi = _mm_cvtsi32_si128(*reinterpret_cast(last_vertex + k)) -#define LOAD(i) __m128i r##i = _mm_loadu_si128(reinterpret_cast(buffer + j + i * vertex_count_aligned)) -#define GRP4(i) t0 = _mm_shuffle_epi32(r##i, 0), t1 = _mm_shuffle_epi32(r##i, 1), t2 = _mm_shuffle_epi32(r##i, 2), t3 = _mm_shuffle_epi32(r##i, 3) -#define FIXD(i) t##i = pi = _mm_add_epi8(pi, t##i) -#define SAVE(i) *reinterpret_cast(savep) = _mm_cvtsi128_si32(t##i), savep += vertex_size -#endif + int channel = version == 0 ? 0 : channels[k / 4]; -#ifdef SIMD_NEON -#define TEMP uint8x8_t -#define PREP() uint8x8_t pi = vreinterpret_u8_u32(vld1_lane_u32(reinterpret_cast(last_vertex + k), vdup_n_u32(0), 0)) -#define LOAD(i) uint8x16_t r##i = vld1q_u8(buffer + j + i * vertex_count_aligned) -#define GRP4(i) t0 = vget_low_u8(r##i), t1 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t0), 1)), t2 = vget_high_u8(r##i), t3 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t2), 1)) -#define FIXD(i) t##i = pi = vadd_u8(pi, t##i) -#define SAVE(i) vst1_lane_u32(reinterpret_cast(savep), vreinterpret_u32_u8(t##i), 0), savep += vertex_size -#endif - -#ifdef SIMD_WASM -#define TEMP v128_t -#define PREP() v128_t pi = wasm_v128_load(last_vertex + k) -#define LOAD(i) v128_t r##i = wasm_v128_load(buffer + j + i * vertex_count_aligned) -#define GRP4(i) t0 = wasmx_splat_v32x4(r##i, 0), t1 = wasmx_splat_v32x4(r##i, 1), t2 = wasmx_splat_v32x4(r##i, 2), t3 = wasmx_splat_v32x4(r##i, 3) -#define FIXD(i) t##i = pi = wasm_i8x16_add(pi, t##i) -#define SAVE(i) *reinterpret_cast(savep) = wasm_i32x4_extract_lane(t##i, 0), savep += vertex_size -#endif - - PREP(); - - unsigned char* savep = transposed + k; - - for (size_t j = 0; j < vertex_count_aligned; j += 16) + switch (channel & 3) { - LOAD(0); - LOAD(1); - LOAD(2); - LOAD(3); - - r0 = unzigzag8(r0); - r1 = unzigzag8(r1); - r2 = unzigzag8(r2); - r3 = unzigzag8(r3); - - transpose8(r0, r1, r2, r3); - - TEMP t0, t1, t2, t3; - - GRP4(0); - FIXD(0), FIXD(1), FIXD(2), FIXD(3); - SAVE(0), SAVE(1), SAVE(2), SAVE(3); - - GRP4(1); - FIXD(0), FIXD(1), FIXD(2), FIXD(3); - SAVE(0), SAVE(1), SAVE(2), SAVE(3); - - GRP4(2); - FIXD(0), FIXD(1), FIXD(2), FIXD(3); - SAVE(0), SAVE(1), SAVE(2), SAVE(3); - - GRP4(3); - FIXD(0), FIXD(1), FIXD(2), FIXD(3); - SAVE(0), SAVE(1), SAVE(2), SAVE(3); - -#undef TEMP -#undef PREP -#undef LOAD -#undef GRP4 -#undef FIXD -#undef SAVE + case 0: + decodeDeltas4Simd<0>(buffer, transposed + k, vertex_count_aligned, vertex_size, last_vertex + k, 0); + break; + case 1: + decodeDeltas4Simd<1>(buffer, transposed + k, vertex_count_aligned, vertex_size, last_vertex + k, 0); + break; + case 2: + decodeDeltas4Simd<2>(buffer, transposed + k, vertex_count_aligned, vertex_size, last_vertex + k, (32 - (channel >> 4)) & 31); + break; + default: + return NULL; // invalid channel type } } @@ -1088,23 +1643,29 @@ static unsigned int cpuid = getCpuFeatures(); } // namespace meshopt -size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size) +size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level, int version) { using namespace meshopt; assert(vertex_size > 0 && vertex_size <= 256); assert(vertex_size % 4 == 0); + assert(level >= 0 && level <= 9); // only a subset of this range is used right now + assert(version < 0 || unsigned(version) <= kDecodeVertexVersion); + + version = version < 0 ? gEncodeVertexVersion : version; + +#if TRACE + memset(vertexstats, 0, sizeof(vertexstats)); +#endif const unsigned char* vertex_data = static_cast(vertices); unsigned char* data = buffer; unsigned char* data_end = buffer + buffer_size; - if (size_t(data_end - data) < 1 + vertex_size) + if (size_t(data_end - data) < 1) return 0; - int version = gEncodeVertexVersion; - *data++ = (unsigned char)(kVertexHeader | version); unsigned char first_vertex[256] = {}; @@ -1116,40 +1677,110 @@ size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, con size_t vertex_block_size = getVertexBlockSize(vertex_size); + unsigned char channels[64] = {}; + if (version != 0 && level > 1 && vertex_count > 1) + for (size_t k = 0; k < vertex_size; k += 4) + { + int rot = level >= 3 ? estimateRotate(vertex_data, vertex_count, vertex_size, k, /* group_size= */ 16) : 0; + int channel = estimateChannel(vertex_data, vertex_count, vertex_size, k, vertex_block_size, /* block_skip= */ 3, /* max_channels= */ level >= 3 ? 3 : 2, rot); + + assert(unsigned(channel) < 2 || ((channel & 3) == 2 && unsigned(channel >> 4) < 8)); + channels[k / 4] = (unsigned char)channel; + } + size_t vertex_offset = 0; while (vertex_offset < vertex_count) { size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset; - data = encodeVertexBlock(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex); + data = encodeVertexBlock(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex, channels, version, level); if (!data) return 0; vertex_offset += block_size; } - size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; + size_t tail_size = vertex_size + (version == 0 ? 0 : vertex_size / 4); + size_t tail_size_min = version == 0 ? kTailMinSizeV0 : kTailMinSizeV1; + size_t tail_size_pad = tail_size < tail_size_min ? tail_size_min : tail_size; - if (size_t(data_end - data) < tail_size) + if (size_t(data_end - data) < tail_size_pad) return 0; - // write first vertex to the end of the stream and pad it to 32 bytes; this is important to simplify bounds checks in decoder - if (vertex_size < kTailMaxSize) + if (tail_size < tail_size_pad) { - memset(data, 0, kTailMaxSize - vertex_size); - data += kTailMaxSize - vertex_size; + memset(data, 0, tail_size_pad - tail_size); + data += tail_size_pad - tail_size; } memcpy(data, first_vertex, vertex_size); data += vertex_size; + if (version != 0) + { + memcpy(data, channels, vertex_size / 4); + data += vertex_size / 4; + } + assert(data >= buffer + tail_size); assert(data <= buffer + buffer_size); +#if TRACE + size_t total_size = data - buffer; + + for (size_t k = 0; k < vertex_size; ++k) + { + const Stats& vsk = vertexstats[k]; + + printf("%2d: %7d bytes [%4.1f%%] %.1f bpv", int(k), int(vsk.size), double(vsk.size) / double(total_size) * 100, double(vsk.size) / double(vertex_count) * 8); + + size_t total_k = vsk.header + vsk.bitg[1] + vsk.bitg[2] + vsk.bitg[4] + vsk.bitg[8]; + double total_kr = total_k ? 1.0 / double(total_k) : 0; + + if (version != 0) + { + int channel = channels[k / 4]; + + if ((channel & 3) == 2 && k % 4 == 0) + printf(" | ^%d", channel >> 4); + else + printf(" | %2s", channel == 0 ? "1" : (channel == 1 && k % 2 == 0 ? "2" : ".")); + } + + printf(" | hdr [%5.1f%%] bitg [1 %4.1f%% 2 %4.1f%% 4 %4.1f%% 8 %4.1f%%]", + double(vsk.header) * total_kr * 100, + double(vsk.bitg[1]) * total_kr * 100, double(vsk.bitg[2]) * total_kr * 100, + double(vsk.bitg[4]) * total_kr * 100, double(vsk.bitg[8]) * total_kr * 100); + + size_t total_ctrl = vsk.ctrl[0] + vsk.ctrl[1] + vsk.ctrl[2] + vsk.ctrl[3]; + + if (total_ctrl) + { + printf(" | ctrl %3.0f%% %3.0f%% %3.0f%% %3.0f%%", + double(vsk.ctrl[0]) / double(total_ctrl) * 100, double(vsk.ctrl[1]) / double(total_ctrl) * 100, + double(vsk.ctrl[2]) / double(total_ctrl) * 100, double(vsk.ctrl[3]) / double(total_ctrl) * 100); + } + + if (level >= 3) + printf(" | bitc [%3.0f%% %3.0f%% %3.0f%% %3.0f%% %3.0f%% %3.0f%% %3.0f%% %3.0f%%]", + double(vsk.bitc[0]) / double(vertex_count) * 100, double(vsk.bitc[1]) / double(vertex_count) * 100, + double(vsk.bitc[2]) / double(vertex_count) * 100, double(vsk.bitc[3]) / double(vertex_count) * 100, + double(vsk.bitc[4]) / double(vertex_count) * 100, double(vsk.bitc[5]) / double(vertex_count) * 100, + double(vsk.bitc[6]) / double(vertex_count) * 100, double(vsk.bitc[7]) / double(vertex_count) * 100); + + printf("\n"); + } +#endif + return data - buffer; } +size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + return meshopt_encodeVertexBufferLevel(buffer, buffer_size, vertices, vertex_count, vertex_size, meshopt::kEncodeDefaultLevel, meshopt::gEncodeVertexVersion); +} + size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size) { using namespace meshopt; @@ -1160,21 +1791,42 @@ size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size) size_t vertex_block_size = getVertexBlockSize(vertex_size); size_t vertex_block_count = (vertex_count + vertex_block_size - 1) / vertex_block_size; + size_t vertex_block_control_size = vertex_size / 4; size_t vertex_block_header_size = (vertex_block_size / kByteGroupSize + 3) / 4; size_t vertex_block_data_size = vertex_block_size; - size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; + size_t tail_size = vertex_size + (vertex_size / 4); + size_t tail_size_min = kTailMinSizeV0 > kTailMinSizeV1 ? kTailMinSizeV0 : kTailMinSizeV1; + size_t tail_size_pad = tail_size < tail_size_min ? tail_size_min : tail_size; + assert(tail_size_pad >= kByteGroupDecodeLimit); - return 1 + vertex_block_count * vertex_size * (vertex_block_header_size + vertex_block_data_size) + tail_size; + return 1 + vertex_block_count * vertex_size * (vertex_block_control_size + vertex_block_header_size + vertex_block_data_size) + tail_size_pad; } void meshopt_encodeVertexVersion(int version) { - assert(unsigned(version) <= 0); + assert(unsigned(version) <= unsigned(meshopt::kDecodeVertexVersion)); meshopt::gEncodeVertexVersion = version; } +int meshopt_decodeVertexVersion(const unsigned char* buffer, size_t buffer_size) +{ + if (buffer_size < 1) + return -1; + + unsigned char header = buffer[0]; + + if ((header & 0xf0) != meshopt::kVertexHeader) + return -1; + + int version = header & 0x0f; + if (version > meshopt::kDecodeVertexVersion) + return -1; + + return version; +} + int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size) { using namespace meshopt; @@ -1182,7 +1834,7 @@ int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t ve assert(vertex_size > 0 && vertex_size <= 256); assert(vertex_size % 4 == 0); - const unsigned char* (*decode)(const unsigned char*, const unsigned char*, unsigned char*, size_t, size_t, unsigned char[256]) = NULL; + const unsigned char* (*decode)(const unsigned char*, const unsigned char*, unsigned char*, size_t, size_t, unsigned char[256], const unsigned char*, int) = NULL; #if defined(SIMD_SSE) && defined(SIMD_FALLBACK) decode = (cpuid & (1 << 9)) ? decodeVertexBlockSimd : decodeVertexBlock; @@ -1202,7 +1854,7 @@ int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t ve const unsigned char* data = buffer; const unsigned char* data_end = buffer + buffer_size; - if (size_t(data_end - data) < 1 + vertex_size) + if (size_t(data_end - data) < 1) return -2; unsigned char data_header = *data++; @@ -1211,11 +1863,22 @@ int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t ve return -1; int version = data_header & 0x0f; - if (version > 0) + if (version > kDecodeVertexVersion) return -1; + size_t tail_size = vertex_size + (version == 0 ? 0 : vertex_size / 4); + size_t tail_size_min = version == 0 ? kTailMinSizeV0 : kTailMinSizeV1; + size_t tail_size_pad = tail_size < tail_size_min ? tail_size_min : tail_size; + + if (size_t(data_end - data) < tail_size_pad) + return -2; + + const unsigned char* tail = data_end - tail_size; + unsigned char last_vertex[256]; - memcpy(last_vertex, data_end - vertex_size, vertex_size); + memcpy(last_vertex, tail, vertex_size); + + const unsigned char* channels = version == 0 ? NULL : tail + vertex_size; size_t vertex_block_size = getVertexBlockSize(vertex_size); @@ -1225,16 +1888,14 @@ int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t ve { size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset; - data = decode(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex); + data = decode(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex, channels, version); if (!data) return -2; vertex_offset += block_size; } - size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; - - if (size_t(data_end - data) != tail_size) + if (size_t(data_end - data) != tail_size_pad) return -3; return 0; @@ -1246,3 +1907,4 @@ int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t ve #undef SIMD_WASM #undef SIMD_FALLBACK #undef SIMD_TARGET +#undef SIMD_LATENCYOPT diff --git a/Source/ThirdParty/meshoptimizer/vertexfilter.cpp b/Source/ThirdParty/meshoptimizer/vertexfilter.cpp index 4b5f444f0..3fd836083 100644 --- a/Source/ThirdParty/meshoptimizer/vertexfilter.cpp +++ b/Source/ThirdParty/meshoptimizer/vertexfilter.cpp @@ -109,28 +109,33 @@ static void decodeFilterOct(T* data, size_t count) static void decodeFilterQuat(short* data, size_t count) { - const float scale = 1.f / sqrtf(2.f); + const float scale = 32767.f / sqrtf(2.f); for (size_t i = 0; i < count; ++i) { // recover scale from the high byte of the component int sf = data[i * 4 + 3] | 3; - float ss = scale / float(sf); + float s = float(sf); - // convert x/y/z to [-1..1] (scaled...) - float x = float(data[i * 4 + 0]) * ss; - float y = float(data[i * 4 + 1]) * ss; - float z = float(data[i * 4 + 2]) * ss; + // convert x/y/z to floating point (unscaled! implied scale of 1/sqrt(2.f) * 1/sf) + float x = float(data[i * 4 + 0]); + float y = float(data[i * 4 + 1]); + float z = float(data[i * 4 + 2]); - // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors - float ww = 1.f - x * x - y * y - z * z; + // reconstruct w as a square root (unscaled); we clamp to 0.f to avoid NaN due to precision errors + float ws = s * s; + float ww = ws * 2.f - x * x - y * y - z * z; float w = sqrtf(ww >= 0.f ? ww : 0.f); + // compute final scale; note that all computations above are unscaled + // we need to divide by sf to get out of fixed point, divide by sqrt(2) to renormalize and multiply by 32767 to get to int16 range + float ss = scale / s; + // rounded signed float->int - int xf = int(x * 32767.f + (x >= 0.f ? 0.5f : -0.5f)); - int yf = int(y * 32767.f + (y >= 0.f ? 0.5f : -0.5f)); - int zf = int(z * 32767.f + (z >= 0.f ? 0.5f : -0.5f)); - int wf = int(w * 32767.f + 0.5f); + int xf = int(x * ss + (x >= 0.f ? 0.5f : -0.5f)); + int yf = int(y * ss + (y >= 0.f ? 0.5f : -0.5f)); + int zf = int(z * ss + (z >= 0.f ? 0.5f : -0.5f)); + int wf = int(w * ss + 0.5f); int qc = data[i * 4 + 3] & 3; @@ -165,6 +170,47 @@ static void decodeFilterExp(unsigned int* data, size_t count) data[i] = u.ui; } } + +template +static void decodeFilterColor(T* data, size_t count) +{ + const float max = float((1 << (sizeof(T) * 8)) - 1); + + for (size_t i = 0; i < count; ++i) + { + // recover scale from alpha high bit + int as = data[i * 4 + 3]; + as |= as >> 1; + as |= as >> 2; + as |= as >> 4; + as |= as >> 8; // noop for 8-bit + + // convert to RGB in fixed point (co/cg are sign extended) + int y = data[i * 4 + 0], co = ST(data[i * 4 + 1]), cg = ST(data[i * 4 + 2]); + + int r = y + co - cg; + int g = y + cg; + int b = y - co - cg; + + // expand alpha by one bit to match other components + int a = data[i * 4 + 3]; + a = ((a << 1) & as) | (a & 1); + + // compute scaling factor + float ss = max / float(as); + + // rounded float->int + int rf = int(float(r) * ss + 0.5f); + int gf = int(float(g) * ss + 0.5f); + int bf = int(float(b) * ss + 0.5f); + int af = int(float(a) * ss + 0.5f); + + data[i * 4 + 0] = T(rf); + data[i * 4 + 1] = T(gf); + data[i * 4 + 2] = T(bf); + data[i * 4 + 3] = T(af); + } +} #endif #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) @@ -201,7 +247,7 @@ inline uint64_t rotateleft64(uint64_t v, int x) #endif #ifdef SIMD_SSE -static void decodeFilterOctSimd(signed char* data, size_t count) +static void decodeFilterOctSimd8(signed char* data, size_t count) { const __m128 sign = _mm_set1_ps(-0.f); @@ -246,7 +292,7 @@ static void decodeFilterOctSimd(signed char* data, size_t count) } } -static void decodeFilterOctSimd(short* data, size_t count) +static void decodeFilterOctSimd16(short* data, size_t count) { const __m128 sign = _mm_set1_ps(-0.f); @@ -295,8 +341,9 @@ static void decodeFilterOctSimd(short* data, size_t count) __m128i res_1 = _mm_unpackhi_epi16(xzr, y0r); // patch in .w - res_0 = _mm_or_si128(res_0, _mm_and_si128(_mm_castps_si128(n4_0), _mm_set1_epi64x(0xffff000000000000))); - res_1 = _mm_or_si128(res_1, _mm_and_si128(_mm_castps_si128(n4_1), _mm_set1_epi64x(0xffff000000000000))); + __m128i maskw = _mm_set_epi32(0xffff0000, 0, 0xffff0000, 0); + res_0 = _mm_or_si128(res_0, _mm_and_si128(_mm_castps_si128(n4_0), maskw)); + res_1 = _mm_or_si128(res_1, _mm_and_si128(_mm_castps_si128(n4_1), maskw)); _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4]), res_0); _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4]), res_1); @@ -305,7 +352,7 @@ static void decodeFilterOctSimd(short* data, size_t count) static void decodeFilterQuatSimd(short* data, size_t count) { - const float scale = 1.f / sqrtf(2.f); + const float scale = 32767.f / sqrtf(2.f); for (size_t i = 0; i < count; i += 4) { @@ -324,24 +371,27 @@ static void decodeFilterQuatSimd(short* data, size_t count) // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) __m128i sf = _mm_or_si128(cf, _mm_set1_epi32(3)); - __m128 ss = _mm_div_ps(_mm_set1_ps(scale), _mm_cvtepi32_ps(sf)); + __m128 s = _mm_cvtepi32_ps(sf); - // convert x/y/z to [-1..1] (scaled...) - __m128 x = _mm_mul_ps(_mm_cvtepi32_ps(xf), ss); - __m128 y = _mm_mul_ps(_mm_cvtepi32_ps(yf), ss); - __m128 z = _mm_mul_ps(_mm_cvtepi32_ps(zf), ss); + // convert x/y/z to floating point (unscaled! implied scale of 1/sqrt(2.f) * 1/sf) + __m128 x = _mm_cvtepi32_ps(xf); + __m128 y = _mm_cvtepi32_ps(yf); + __m128 z = _mm_cvtepi32_ps(zf); - // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors - __m128 ww = _mm_sub_ps(_mm_set1_ps(1.f), _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z)))); + // reconstruct w as a square root (unscaled); we clamp to 0.f to avoid NaN due to precision errors + __m128 ws = _mm_mul_ps(s, _mm_add_ps(s, s)); // s*2s instead of 2*(s*s) to work around clang bug with integer multiplication + __m128 ww = _mm_sub_ps(ws, _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z)))); __m128 w = _mm_sqrt_ps(_mm_max_ps(ww, _mm_setzero_ps())); - __m128 s = _mm_set1_ps(32767.f); + // compute final scale; note that all computations above are unscaled + // we need to divide by sf to get out of fixed point, divide by sqrt(2) to renormalize and multiply by 32767 to get to int16 range + __m128 ss = _mm_div_ps(_mm_set1_ps(scale), s); // rounded signed float->int - __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s)); - __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s)); - __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s)); - __m128i wr = _mm_cvtps_epi32(_mm_mul_ps(w, s)); + __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, ss)); + __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, ss)); + __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, ss)); + __m128i wr = _mm_cvtps_epi32(_mm_mul_ps(w, ss)); // mix x/z and w/y to make 16-bit unpack easier __m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16)); @@ -385,6 +435,105 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count) _mm_storeu_ps(reinterpret_cast(&data[i]), r); } } + +static void decodeFilterColorSimd8(unsigned char* data, size_t count) +{ + for (size_t i = 0; i < count; i += 4) + { + __m128i c4 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i * 4])); + + // unpack y/co/cg/a (co/cg are sign extended with arithmetic shifts) + __m128i yf = _mm_and_si128(c4, _mm_set1_epi32(0xff)); + __m128i cof = _mm_srai_epi32(_mm_slli_epi32(c4, 16), 24); + __m128i cgf = _mm_srai_epi32(_mm_slli_epi32(c4, 8), 24); + __m128i af = _mm_srli_epi32(c4, 24); + + // recover scale from alpha high bit + __m128i as = af; + as = _mm_or_si128(as, _mm_srli_epi32(as, 1)); + as = _mm_or_si128(as, _mm_srli_epi32(as, 2)); + as = _mm_or_si128(as, _mm_srli_epi32(as, 4)); + + // expand alpha by one bit to match other components + af = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(af, 1), as), _mm_and_si128(af, _mm_set1_epi32(1))); + + // compute scaling factor + __m128 ss = _mm_mul_ps(_mm_set1_ps(255.f), _mm_rcp_ps(_mm_cvtepi32_ps(as))); + + // convert to RGB in fixed point + __m128i rf = _mm_add_epi32(yf, _mm_sub_epi32(cof, cgf)); + __m128i gf = _mm_add_epi32(yf, cgf); + __m128i bf = _mm_sub_epi32(yf, _mm_add_epi32(cof, cgf)); + + // rounded signed float->int + __m128i rr = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(rf), ss)); + __m128i gr = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(gf), ss)); + __m128i br = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(bf), ss)); + __m128i ar = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(af), ss)); + + // repack rgba into final value + __m128i res = rr; + res = _mm_or_si128(res, _mm_slli_epi32(gr, 8)); + res = _mm_or_si128(res, _mm_slli_epi32(br, 16)); + res = _mm_or_si128(res, _mm_slli_epi32(ar, 24)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[i * 4]), res); + } +} + +static void decodeFilterColorSimd16(unsigned short* data, size_t count) +{ + for (size_t i = 0; i < count; i += 4) + { + __m128i c4_0 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4])); + __m128i c4_1 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4])); + + // gather both y/co 16-bit pairs in each 32-bit lane + __m128i c4_yco = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(c4_0), _mm_castsi128_ps(c4_1), _MM_SHUFFLE(2, 0, 2, 0))); + __m128i c4_cga = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(c4_0), _mm_castsi128_ps(c4_1), _MM_SHUFFLE(3, 1, 3, 1))); + + // unpack y/co/cg/a components (co/cg are sign extended with arithmetic shifts) + __m128i yf = _mm_and_si128(c4_yco, _mm_set1_epi32(0xffff)); + __m128i cof = _mm_srai_epi32(c4_yco, 16); + __m128i cgf = _mm_srai_epi32(_mm_slli_epi32(c4_cga, 16), 16); + __m128i af = _mm_srli_epi32(c4_cga, 16); + + // recover scale from alpha high bit + __m128i as = af; + as = _mm_or_si128(as, _mm_srli_epi32(as, 1)); + as = _mm_or_si128(as, _mm_srli_epi32(as, 2)); + as = _mm_or_si128(as, _mm_srli_epi32(as, 4)); + as = _mm_or_si128(as, _mm_srli_epi32(as, 8)); + + // expand alpha by one bit to match other components + af = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(af, 1), as), _mm_and_si128(af, _mm_set1_epi32(1))); + + // compute scaling factor + __m128 ss = _mm_div_ps(_mm_set1_ps(65535.f), _mm_cvtepi32_ps(as)); + + // convert to RGB in fixed point + __m128i rf = _mm_add_epi32(yf, _mm_sub_epi32(cof, cgf)); + __m128i gf = _mm_add_epi32(yf, cgf); + __m128i bf = _mm_sub_epi32(yf, _mm_add_epi32(cof, cgf)); + + // rounded signed float->int + __m128i rr = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(rf), ss)); + __m128i gr = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(gf), ss)); + __m128i br = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(bf), ss)); + __m128i ar = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(af), ss)); + + // mix r/b and g/a to make 16-bit unpack easier + __m128i rbr = _mm_or_si128(_mm_and_si128(rr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(br, 16)); + __m128i gar = _mm_or_si128(_mm_and_si128(gr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(ar, 16)); + + // pack r/g/b/a using 16-bit unpacks + __m128i res_0 = _mm_unpacklo_epi16(rbr, gar); + __m128i res_1 = _mm_unpackhi_epi16(rbr, gar); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4]), res_0); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4]), res_1); + } +} #endif #if defined(SIMD_NEON) && !defined(__aarch64__) && !defined(_M_ARM64) @@ -401,10 +550,17 @@ inline float32x4_t vdivq_f32(float32x4_t x, float32x4_t y) r = vmulq_f32(r, vrecpsq_f32(y, r)); // refine rcp estimate return vmulq_f32(x, r); } + +#ifndef __ARM_FEATURE_FMA +inline float32x4_t vfmaq_f32(float32x4_t x, float32x4_t y, float32x4_t z) +{ + return vaddq_f32(x, vmulq_f32(y, z)); +} +#endif #endif #ifdef SIMD_NEON -static void decodeFilterOctSimd(signed char* data, size_t count) +static void decodeFilterOctSimd8(signed char* data, size_t count) { const int32x4_t sign = vdupq_n_s32(0x80000000); @@ -431,29 +587,27 @@ static void decodeFilterOctSimd(signed char* data, size_t count) y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign)))); // compute normal length & scale - float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z))); + float32x4_t ll = vfmaq_f32(vfmaq_f32(vmulq_f32(x, x), y, y), z, z); float32x4_t rl = vrsqrteq_f32(ll); float32x4_t s = vmulq_f32(vdupq_n_f32(127.f), rl); // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value - // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + // note: the result is offset by 0x4B40_0000, but we only need the low 8 bits so we can omit the subtraction const float32x4_t fsnap = vdupq_n_f32(3 << 22); - int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); - int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); - int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); + int32x4_t xr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, x, s)); + int32x4_t yr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, y, s)); + int32x4_t zr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, z, s)); // combine xr/yr/zr into final value - int32x4_t res = vandq_s32(n4, vdupq_n_s32(0xff000000)); - res = vorrq_s32(res, vandq_s32(xr, vdupq_n_s32(0xff))); - res = vorrq_s32(res, vshlq_n_s32(vandq_s32(yr, vdupq_n_s32(0xff)), 8)); - res = vorrq_s32(res, vshlq_n_s32(vandq_s32(zr, vdupq_n_s32(0xff)), 16)); + int32x4_t res = vsliq_n_s32(xr, vsliq_n_s32(yr, zr, 8), 8); + res = vbslq_s32(vdupq_n_u32(0xff000000), n4, res); vst1q_s32(reinterpret_cast(&data[i * 4]), res); } } -static void decodeFilterOctSimd(short* data, size_t count) +static void decodeFilterOctSimd16(short* data, size_t count) { const int32x4_t sign = vdupq_n_s32(0x80000000); @@ -485,21 +639,25 @@ static void decodeFilterOctSimd(short* data, size_t count) y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign)))); // compute normal length & scale - float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z))); + float32x4_t ll = vfmaq_f32(vfmaq_f32(vmulq_f32(x, x), y, y), z, z); +#if !defined(__aarch64__) && !defined(_M_ARM64) float32x4_t rl = vrsqrteq_f32(ll); rl = vmulq_f32(rl, vrsqrtsq_f32(vmulq_f32(rl, ll), rl)); // refine rsqrt estimate float32x4_t s = vmulq_f32(vdupq_n_f32(32767.f), rl); +#else + float32x4_t s = vdivq_f32(vdupq_n_f32(32767.f), vsqrtq_f32(ll)); +#endif // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction const float32x4_t fsnap = vdupq_n_f32(3 << 22); - int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); - int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); - int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); + int32x4_t xr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, x, s)); + int32x4_t yr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, y, s)); + int32x4_t zr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, z, s)); // mix x/z and y/0 to make 16-bit unpack easier - int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16)); + int32x4_t xzr = vsliq_n_s32(xr, zr, 16); int32x4_t y0r = vandq_s32(yr, vdupq_n_s32(0xffff)); // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w @@ -517,7 +675,7 @@ static void decodeFilterOctSimd(short* data, size_t count) static void decodeFilterQuatSimd(short* data, size_t count) { - const float scale = 1.f / sqrtf(2.f); + const float scale = 32767.f / sqrtf(2.f); for (size_t i = 0; i < count; i += 4) { @@ -536,43 +694,52 @@ static void decodeFilterQuatSimd(short* data, size_t count) // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) int32x4_t sf = vorrq_s32(cf, vdupq_n_s32(3)); - float32x4_t ss = vdivq_f32(vdupq_n_f32(scale), vcvtq_f32_s32(sf)); + float32x4_t s = vcvtq_f32_s32(sf); - // convert x/y/z to [-1..1] (scaled...) - float32x4_t x = vmulq_f32(vcvtq_f32_s32(xf), ss); - float32x4_t y = vmulq_f32(vcvtq_f32_s32(yf), ss); - float32x4_t z = vmulq_f32(vcvtq_f32_s32(zf), ss); + // convert x/y/z to floating point (unscaled! implied scale of 1/sqrt(2.f) * 1/sf) + float32x4_t x = vcvtq_f32_s32(xf); + float32x4_t y = vcvtq_f32_s32(yf); + float32x4_t z = vcvtq_f32_s32(zf); - // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors - float32x4_t ww = vsubq_f32(vdupq_n_f32(1.f), vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z)))); + // reconstruct w as a square root (unscaled); we clamp to 0.f to avoid NaN due to precision errors + float32x4_t ws = vmulq_f32(s, s); + float32x4_t ww = vsubq_f32(vaddq_f32(ws, ws), vfmaq_f32(vfmaq_f32(vmulq_f32(x, x), y, y), z, z)); float32x4_t w = vsqrtq_f32(vmaxq_f32(ww, vdupq_n_f32(0.f))); - float32x4_t s = vdupq_n_f32(32767.f); + // compute final scale; note that all computations above are unscaled + // we need to divide by sf to get out of fixed point, divide by sqrt(2) to renormalize and multiply by 32767 to get to int16 range + float32x4_t ss = vdivq_f32(vdupq_n_f32(scale), s); // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction const float32x4_t fsnap = vdupq_n_f32(3 << 22); - int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); - int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); - int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); - int32x4_t wr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(w, s), fsnap)); + int32x4_t xr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, x, ss)); + int32x4_t yr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, y, ss)); + int32x4_t zr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, z, ss)); + int32x4_t wr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, w, ss)); // mix x/z and w/y to make 16-bit unpack easier - int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16)); - int32x4_t wyr = vorrq_s32(vandq_s32(wr, vdupq_n_s32(0xffff)), vshlq_n_s32(yr, 16)); + int32x4_t xzr = vsliq_n_s32(xr, zr, 16); + int32x4_t wyr = vsliq_n_s32(wr, yr, 16); // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0) - int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[0]); - int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[1]); + uint64x2_t res_0 = vreinterpretq_u64_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[0]); + uint64x2_t res_1 = vreinterpretq_u64_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[1]); + + // store results to stack so that we can rotate using scalar instructions + // TODO: volatile works around LLVM mis-optimizing code; https://github.com/llvm/llvm-project/issues/166808 + volatile uint64_t res[4]; + vst1q_u64(const_cast(&res[0]), res_0); + vst1q_u64(const_cast(&res[2]), res_1); // rotate and store - uint64_t* out = (uint64_t*)&data[i * 4]; + uint64_t* out = reinterpret_cast(&data[i * 4]); - out[0] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 0), vgetq_lane_s32(cf, 0) << 4); - out[1] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 1), vgetq_lane_s32(cf, 1) << 4); - out[2] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 0), vgetq_lane_s32(cf, 2) << 4); - out[3] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 1), vgetq_lane_s32(cf, 3) << 4); + out[0] = rotateleft64(res[0], data[(i + 0) * 4 + 3] << 4); + out[1] = rotateleft64(res[1], data[(i + 1) * 4 + 3] << 4); + out[2] = rotateleft64(res[2], data[(i + 2) * 4 + 3] << 4); + out[3] = rotateleft64(res[3], data[(i + 3) * 4 + 3] << 4); } } @@ -595,10 +762,112 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count) vst1q_f32(reinterpret_cast(&data[i]), r); } } + +static void decodeFilterColorSimd8(unsigned char* data, size_t count) +{ + for (size_t i = 0; i < count; i += 4) + { + int32x4_t c4 = vld1q_s32(reinterpret_cast(&data[i * 4])); + + // unpack y/co/cg/a (co/cg are sign extended with arithmetic shifts) + int32x4_t yf = vandq_s32(c4, vdupq_n_s32(0xff)); + int32x4_t cof = vshrq_n_s32(vshlq_n_s32(c4, 16), 24); + int32x4_t cgf = vshrq_n_s32(vshlq_n_s32(c4, 8), 24); + int32x4_t af = vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(c4), 24)); + + // recover scale from alpha high bit + int32x4_t as = af; + as = vorrq_s32(as, vshrq_n_s32(as, 1)); + as = vorrq_s32(as, vshrq_n_s32(as, 2)); + as = vorrq_s32(as, vshrq_n_s32(as, 4)); + + // expand alpha by one bit to match other components + af = vorrq_s32(vandq_s32(vshlq_n_s32(af, 1), as), vandq_s32(af, vdupq_n_s32(1))); + + // compute scaling factor + float32x4_t ss = vmulq_f32(vdupq_n_f32(255.f), vrecpeq_f32(vcvtq_f32_s32(as))); + + // convert to RGB in fixed point + int32x4_t rf = vaddq_s32(yf, vsubq_s32(cof, cgf)); + int32x4_t gf = vaddq_s32(yf, cgf); + int32x4_t bf = vsubq_s32(yf, vaddq_s32(cof, cgf)); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 8 bits so we can omit the subtraction + const float32x4_t fsnap = vdupq_n_f32(3 << 22); + + int32x4_t rr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, vcvtq_f32_s32(rf), ss)); + int32x4_t gr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, vcvtq_f32_s32(gf), ss)); + int32x4_t br = vreinterpretq_s32_f32(vfmaq_f32(fsnap, vcvtq_f32_s32(bf), ss)); + int32x4_t ar = vreinterpretq_s32_f32(vfmaq_f32(fsnap, vcvtq_f32_s32(af), ss)); + + // repack rgba into final value + int32x4_t res = vsliq_n_s32(rr, vsliq_n_s32(gr, vsliq_n_s32(br, ar, 8), 8), 8); + + vst1q_s32(reinterpret_cast(&data[i * 4]), res); + } +} + +static void decodeFilterColorSimd16(unsigned short* data, size_t count) +{ + for (size_t i = 0; i < count; i += 4) + { + int32x4_t c4_0 = vld1q_s32(reinterpret_cast(&data[(i + 0) * 4])); + int32x4_t c4_1 = vld1q_s32(reinterpret_cast(&data[(i + 2) * 4])); + + // gather both y/co 16-bit pairs in each 32-bit lane + int32x4_t c4_yco = vuzpq_s32(c4_0, c4_1).val[0]; + int32x4_t c4_cga = vuzpq_s32(c4_0, c4_1).val[1]; + + // unpack y/co/cg/a components (co/cg are sign extended with arithmetic shifts) + int32x4_t yf = vandq_s32(c4_yco, vdupq_n_s32(0xffff)); + int32x4_t cof = vshrq_n_s32(c4_yco, 16); + int32x4_t cgf = vshrq_n_s32(vshlq_n_s32(c4_cga, 16), 16); + int32x4_t af = vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(c4_cga), 16)); + + // recover scale from alpha high bit + int32x4_t as = af; + as = vorrq_s32(as, vshrq_n_s32(as, 1)); + as = vorrq_s32(as, vshrq_n_s32(as, 2)); + as = vorrq_s32(as, vshrq_n_s32(as, 4)); + as = vorrq_s32(as, vshrq_n_s32(as, 8)); + + // expand alpha by one bit to match other components + af = vorrq_s32(vandq_s32(vshlq_n_s32(af, 1), as), vandq_s32(af, vdupq_n_s32(1))); + + // compute scaling factor + float32x4_t ss = vdivq_f32(vdupq_n_f32(65535.f), vcvtq_f32_s32(as)); + + // convert to RGB in fixed point + int32x4_t rf = vaddq_s32(yf, vsubq_s32(cof, cgf)); + int32x4_t gf = vaddq_s32(yf, cgf); + int32x4_t bf = vsubq_s32(yf, vaddq_s32(cof, cgf)); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const float32x4_t fsnap = vdupq_n_f32(3 << 22); + + int32x4_t rr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, vcvtq_f32_s32(rf), ss)); + int32x4_t gr = vreinterpretq_s32_f32(vfmaq_f32(fsnap, vcvtq_f32_s32(gf), ss)); + int32x4_t br = vreinterpretq_s32_f32(vfmaq_f32(fsnap, vcvtq_f32_s32(bf), ss)); + int32x4_t ar = vreinterpretq_s32_f32(vfmaq_f32(fsnap, vcvtq_f32_s32(af), ss)); + + // mix r/b and g/a to make 16-bit unpack easier + int32x4_t rbr = vsliq_n_s32(rr, br, 16); + int32x4_t gar = vsliq_n_s32(gr, ar, 16); + + // pack r/g/b/a using 16-bit unpacks + int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(rbr), vreinterpretq_s16_s32(gar)).val[0]); + int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(rbr), vreinterpretq_s16_s32(gar)).val[1]); + + vst1q_s32(reinterpret_cast(&data[(i + 0) * 4]), res_0); + vst1q_s32(reinterpret_cast(&data[(i + 2) * 4]), res_1); + } +} #endif #ifdef SIMD_WASM -static void decodeFilterOctSimd(signed char* data, size_t count) +static void decodeFilterOctSimd8(signed char* data, size_t count) { const v128_t sign = wasm_f32x4_splat(-0.f); @@ -647,10 +916,11 @@ static void decodeFilterOctSimd(signed char* data, size_t count) } } -static void decodeFilterOctSimd(short* data, size_t count) +static void decodeFilterOctSimd16(short* data, size_t count) { const v128_t sign = wasm_f32x4_splat(-0.f); - const v128_t zmask = wasm_i32x4_splat(0x7fff); + // TODO: volatile here works around LLVM mis-optimizing code; https://github.com/llvm/llvm-project/issues/149457 + volatile v128_t zmask = wasm_i32x4_splat(0x7fff); for (size_t i = 0; i < count; i += 4) { @@ -711,7 +981,7 @@ static void decodeFilterOctSimd(short* data, size_t count) static void decodeFilterQuatSimd(short* data, size_t count) { - const float scale = 1.f / sqrtf(2.f); + const float scale = 32767.f / sqrtf(2.f); for (size_t i = 0; i < count; i += 4) { @@ -730,28 +1000,31 @@ static void decodeFilterQuatSimd(short* data, size_t count) // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) v128_t sf = wasm_v128_or(cf, wasm_i32x4_splat(3)); - v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(scale), wasm_f32x4_convert_i32x4(sf)); + v128_t s = wasm_f32x4_convert_i32x4(sf); - // convert x/y/z to [-1..1] (scaled...) - v128_t x = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(xf), ss); - v128_t y = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(yf), ss); - v128_t z = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(zf), ss); + // convert x/y/z to floating point (unscaled! implied scale of 1/sqrt(2.f) * 1/sf) + v128_t x = wasm_f32x4_convert_i32x4(xf); + v128_t y = wasm_f32x4_convert_i32x4(yf); + v128_t z = wasm_f32x4_convert_i32x4(zf); - // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors + // reconstruct w as a square root (unscaled); we clamp to 0.f to avoid NaN due to precision errors // note: i32x4_max with 0 is equivalent to f32x4_max - v128_t ww = wasm_f32x4_sub(wasm_f32x4_splat(1.f), wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z)))); + v128_t ws = wasm_f32x4_mul(s, s); + v128_t ww = wasm_f32x4_sub(wasm_f32x4_add(ws, ws), wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z)))); v128_t w = wasm_f32x4_sqrt(wasm_i32x4_max(ww, wasm_i32x4_splat(0))); - v128_t s = wasm_f32x4_splat(32767.f); + // compute final scale; note that all computations above are unscaled + // we need to divide by sf to get out of fixed point, divide by sqrt(2) to renormalize and multiply by 32767 to get to int16 range + v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(scale), s); // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction const v128_t fsnap = wasm_f32x4_splat(3 << 22); - v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap); - v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap); - v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap); - v128_t wr = wasm_f32x4_add(wasm_f32x4_mul(w, s), fsnap); + v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, ss), fsnap); + v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, ss), fsnap); + v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, ss), fsnap); + v128_t wr = wasm_f32x4_add(wasm_f32x4_mul(w, ss), fsnap); // mix x/z and w/y to make 16-bit unpack easier v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16)); @@ -762,8 +1035,7 @@ static void decodeFilterQuatSimd(short* data, size_t count) v128_t res_1 = wasmx_unpackhi_v16x8(wyr, xzr); // compute component index shifted left by 4 (and moved into i32x4 slot) - // TODO: volatile here works around LLVM mis-optimizing code; https://github.com/emscripten-core/emscripten/issues/11449 - volatile v128_t cm = wasm_i32x4_shl(cf, 4); + v128_t cm = wasm_i32x4_shl(cf, 4); // rotate and store uint64_t* out = reinterpret_cast(&data[i * 4]); @@ -794,6 +1066,117 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count) wasm_v128_store(&data[i], r); } } + +static void decodeFilterColorSimd8(unsigned char* data, size_t count) +{ + // TODO: volatile here works around LLVM mis-optimizing code; https://github.com/llvm/llvm-project/issues/149457 + volatile v128_t zero = wasm_i32x4_splat(0); + + for (size_t i = 0; i < count; i += 4) + { + v128_t c4 = wasm_v128_load(&data[i * 4]); + + // unpack y/co/cg/a (co/cg are sign extended with arithmetic shifts) + v128_t yf = wasm_v128_and(c4, wasm_i32x4_splat(0xff)); + v128_t cof = wasm_i32x4_shr(wasm_i32x4_shl(c4, 16), 24); + v128_t cgf = wasm_i32x4_shr(wasm_i32x4_shl(c4, 8), 24); + v128_t af = wasm_v128_or(zero, wasm_u32x4_shr(c4, 24)); + + // recover scale from alpha high bit + v128_t as = af; + as = wasm_v128_or(as, wasm_i32x4_shr(as, 1)); + as = wasm_v128_or(as, wasm_i32x4_shr(as, 2)); + as = wasm_v128_or(as, wasm_i32x4_shr(as, 4)); + + // expand alpha by one bit to match other components + af = wasm_v128_or(wasm_v128_and(wasm_i32x4_shl(af, 1), as), wasm_v128_and(af, wasm_i32x4_splat(1))); + + // compute scaling factor + v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(255.f), wasm_f32x4_convert_i32x4(as)); + + // convert to RGB in fixed point + v128_t rf = wasm_i32x4_add(yf, wasm_i32x4_sub(cof, cgf)); + v128_t gf = wasm_i32x4_add(yf, cgf); + v128_t bf = wasm_i32x4_sub(yf, wasm_i32x4_add(cof, cgf)); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 8 bits so we can omit the subtraction + const v128_t fsnap = wasm_f32x4_splat(3 << 22); + + v128_t rr = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(rf), ss), fsnap); + v128_t gr = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(gf), ss), fsnap); + v128_t br = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(bf), ss), fsnap); + v128_t ar = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(af), ss), fsnap); + + // repack rgba into final value + v128_t res = wasm_v128_and(rr, wasm_i32x4_splat(0xff)); + res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(gr, wasm_i32x4_splat(0xff)), 8)); + res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(br, wasm_i32x4_splat(0xff)), 16)); + res = wasm_v128_or(res, wasm_i32x4_shl(ar, 24)); + + wasm_v128_store(&data[i * 4], res); + } +} + +static void decodeFilterColorSimd16(unsigned short* data, size_t count) +{ + // TODO: volatile here works around LLVM mis-optimizing code; https://github.com/llvm/llvm-project/issues/149457 + volatile v128_t zero = wasm_i32x4_splat(0); + + for (size_t i = 0; i < count; i += 4) + { + v128_t c4_0 = wasm_v128_load(&data[(i + 0) * 4]); + v128_t c4_1 = wasm_v128_load(&data[(i + 2) * 4]); + + // gather both y/co 16-bit pairs in each 32-bit lane + v128_t c4_yco = wasmx_unziplo_v32x4(c4_0, c4_1); + v128_t c4_cga = wasmx_unziphi_v32x4(c4_0, c4_1); + + // unpack y/co/cg/a components (co/cg are sign extended with arithmetic shifts) + v128_t yf = wasm_v128_and(c4_yco, wasm_i32x4_splat(0xffff)); + v128_t cof = wasm_i32x4_shr(c4_yco, 16); + v128_t cgf = wasm_i32x4_shr(wasm_i32x4_shl(c4_cga, 16), 16); + v128_t af = wasm_v128_or(zero, wasm_u32x4_shr(c4_cga, 16)); + + // recover scale from alpha high bit + v128_t as = af; + as = wasm_v128_or(as, wasm_i32x4_shr(as, 1)); + as = wasm_v128_or(as, wasm_i32x4_shr(as, 2)); + as = wasm_v128_or(as, wasm_i32x4_shr(as, 4)); + as = wasm_v128_or(as, wasm_i32x4_shr(as, 8)); + + // expand alpha by one bit to match other components + af = wasm_v128_or(wasm_v128_and(wasm_i32x4_shl(af, 1), as), wasm_v128_and(af, wasm_i32x4_splat(1))); + + // compute scaling factor + v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(65535.f), wasm_f32x4_convert_i32x4(as)); + + // convert to RGB in fixed point + v128_t rf = wasm_i32x4_add(yf, wasm_i32x4_sub(cof, cgf)); + v128_t gf = wasm_i32x4_add(yf, cgf); + v128_t bf = wasm_i32x4_sub(yf, wasm_i32x4_add(cof, cgf)); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const v128_t fsnap = wasm_f32x4_splat(3 << 22); + + v128_t rr = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(rf), ss), fsnap); + v128_t gr = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(gf), ss), fsnap); + v128_t br = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(bf), ss), fsnap); + v128_t ar = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(af), ss), fsnap); + + // mix r/b and g/a to make 16-bit unpack easier + v128_t rbr = wasm_v128_or(wasm_v128_and(rr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(br, 16)); + v128_t gar = wasm_v128_or(wasm_v128_and(gr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(ar, 16)); + + // pack r/g/b/a using 16-bit unpacks + v128_t res_0 = wasmx_unpacklo_v16x8(rbr, gar); + v128_t res_1 = wasmx_unpackhi_v16x8(rbr, gar); + + wasm_v128_store(&data[(i + 0) * 4], res_0); + wasm_v128_store(&data[(i + 2) * 4], res_1); + } +} #endif // optimized variant of frexp @@ -807,7 +1190,7 @@ inline int optlog2(float v) u.f = v; // +1 accounts for implicit 1. in mantissa; denormalized numbers will end up clamped to min_exp by calling code - return u.ui == 0 ? 0 : int((u.ui >> 23) & 0xff) - 127 + 1; + return v == 0 ? 0 : int((u.ui >> 23) & 0xff) - 127 + 1; } // optimized variant of ldexp @@ -833,9 +1216,9 @@ void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride) #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) if (stride == 4) - dispatchSimd(decodeFilterOctSimd, static_cast(buffer), count, 4); + dispatchSimd(decodeFilterOctSimd8, static_cast(buffer), count, 4); else - dispatchSimd(decodeFilterOctSimd, static_cast(buffer), count, 4); + dispatchSimd(decodeFilterOctSimd16, static_cast(buffer), count, 4); #else if (stride == 4) decodeFilterOct(static_cast(buffer), count); @@ -871,10 +1254,29 @@ void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride) #endif } +void meshopt_decodeFilterColor(void* buffer, size_t count, size_t stride) +{ + using namespace meshopt; + + assert(stride == 4 || stride == 8); + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) + if (stride == 4) + dispatchSimd(decodeFilterColorSimd8, static_cast(buffer), count, 4); + else + dispatchSimd(decodeFilterColorSimd16, static_cast(buffer), count, 4); +#else + if (stride == 4) + decodeFilterColor(static_cast(buffer), count); + else + decodeFilterColor(static_cast(buffer), count); +#endif +} + void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data) { assert(stride == 4 || stride == 8); - assert(bits >= 1 && bits <= 16); + assert(bits >= 2 && bits <= 16); signed char* d8 = static_cast(destination); short* d16 = static_cast(destination); @@ -1010,6 +1412,20 @@ void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, in component_exp[j] = (min_exp < e) ? e : min_exp; } } + else if (mode == meshopt_EncodeExpClamped) + { + for (size_t j = 0; j < stride_float; ++j) + { + int e = optlog2(v[j]); + + component_exp[j] = (0 < e) ? e : 0; + } + } + else + { + // the code below assumes component_exp is initialized outside of the loop + assert(mode == meshopt_EncodeExpSharedComponent); + } for (size_t j = 0; j < stride_float; ++j) { @@ -1020,7 +1436,6 @@ void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, in // compute renormalized rounded mantissa for each component int mmask = (1 << 24) - 1; - int m = int(v[j] * optexp2(-exp) + (v[j] >= 0 ? 0.5f : -0.5f)); d[j] = (m & mmask) | (unsigned(exp) << 24); @@ -1028,6 +1443,51 @@ void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, in } } +void meshopt_encodeFilterColor(void* destination, size_t count, size_t stride, int bits, const float* data) +{ + assert(stride == 4 || stride == 8); + assert(bits >= 2 && bits <= 16); + + unsigned char* d8 = static_cast(destination); + unsigned short* d16 = static_cast(destination); + + for (size_t i = 0; i < count; ++i) + { + const float* c = &data[i * 4]; + + int fr = meshopt_quantizeUnorm(c[0], bits); + int fg = meshopt_quantizeUnorm(c[1], bits); + int fb = meshopt_quantizeUnorm(c[2], bits); + + // YCoCg-R encoding with truncated Co/Cg ensures that decoding can be done using integers + int fco = (fr - fb) / 2; + int tmp = fb + fco; + int fcg = (fg - tmp) / 2; + int fy = tmp + fcg; + + // validate that R/G/B can be reconstructed with K bit integers + assert(unsigned((fy + fco - fcg) | (fy + fcg) | (fy - fco - fcg)) < (1u << bits)); + + // alpha: K-1-bit encoding with high bit set to 1 + int fa = meshopt_quantizeUnorm(c[3], bits - 1) | (1 << (bits - 1)); + + if (stride == 4) + { + d8[i * 4 + 0] = (unsigned char)(fy); + d8[i * 4 + 1] = (unsigned char)(fco); + d8[i * 4 + 2] = (unsigned char)(fcg); + d8[i * 4 + 3] = (unsigned char)(fa); + } + else + { + d16[i * 4 + 0] = (unsigned short)(fy); + d16[i * 4 + 1] = (unsigned short)(fco); + d16[i * 4 + 2] = (unsigned short)(fcg); + d16[i * 4 + 3] = (unsigned short)(fa); + } + } +} + #undef SIMD_SSE #undef SIMD_NEON #undef SIMD_WASM diff --git a/Source/ThirdParty/meshoptimizer/vfetchanalyzer.cpp b/Source/ThirdParty/meshoptimizer/vfetchanalyzer.cpp deleted file mode 100644 index 51dca873f..000000000 --- a/Source/ThirdParty/meshoptimizer/vfetchanalyzer.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details -#include "meshoptimizer.h" - -#include -#include - -meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size) -{ - assert(index_count % 3 == 0); - assert(vertex_size > 0 && vertex_size <= 256); - - meshopt_Allocator allocator; - - meshopt_VertexFetchStatistics result = {}; - - unsigned char* vertex_visited = allocator.allocate(vertex_count); - memset(vertex_visited, 0, vertex_count); - - const size_t kCacheLine = 64; - const size_t kCacheSize = 128 * 1024; - - // simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway - size_t cache[kCacheSize / kCacheLine] = {}; - - for (size_t i = 0; i < index_count; ++i) - { - unsigned int index = indices[i]; - assert(index < vertex_count); - - vertex_visited[index] = 1; - - size_t start_address = index * vertex_size; - size_t end_address = start_address + vertex_size; - - size_t start_tag = start_address / kCacheLine; - size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine; - - assert(start_tag < end_tag); - - for (size_t tag = start_tag; tag < end_tag; ++tag) - { - size_t line = tag % (sizeof(cache) / sizeof(cache[0])); - - // we store +1 since cache is filled with 0 by default - result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine; - cache[line] = tag + 1; - } - } - - size_t unique_vertex_count = 0; - - for (size_t i = 0; i < vertex_count; ++i) - unique_vertex_count += vertex_visited[i]; - - result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size); - - return result; -} diff --git a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs index 54329634f..034803d25 100644 --- a/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs +++ b/Source/Tools/Flax.Build/Bindings/BindingsGenerator.CSharp.cs @@ -703,6 +703,8 @@ namespace Flax.Build.Bindings else if (nativeType.EndsWith("[]")) { parameterMarshalType = $"MarshalUsing(typeof(FlaxEngine.Interop.ArrayMarshaller<,>))"; + if (!parameterInfo.IsOut && !parameterInfo.IsRef) + parameterMarshalType += ", In"; // The usage of 'LibraryImportAttribute' does not follow recommendations. It is recommended to use explicit '[In]' and '[Out]' attributes on array parameters. } if (!string.IsNullOrEmpty(parameterMarshalType)) diff --git a/Source/Tools/Flax.Build/Build/Builder.cs b/Source/Tools/Flax.Build/Build/Builder.cs index 3a4286254..775bef1dc 100644 --- a/Source/Tools/Flax.Build/Build/Builder.cs +++ b/Source/Tools/Flax.Build/Build/Builder.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; using System.IO; using System.Linq; +using System.Text; using Flax.Build.Graph; using Flax.Build.NativeCpp; @@ -424,5 +425,120 @@ namespace Flax.Build return failed; } + + private static void DeployFiles(TaskGraph graph, Target target, BuildOptions targetBuildOptions, string outputPath) + { + using (new ProfileEventScope("DeployFiles")) + { + foreach (var srcFile in targetBuildOptions.OptionalDependencyFiles.Where(File.Exists).Union(targetBuildOptions.DependencyFiles)) + { + var dstFile = Path.Combine(outputPath, Path.GetFileName(srcFile)); + graph.AddCopyFile(dstFile, srcFile); + } + + if (targetBuildOptions.NugetPackageReferences.Any()) + { + // Find all packages to deploy (incl. dependencies) and restore if needed + var nugetPath = Utilities.GetNugetPackagesPath(); + Log.Verbose($"Deploying NuGet packages from {nugetPath}"); + var restoreOnce = true; + var nugetFiles = new HashSet(); + foreach (var reference in targetBuildOptions.NugetPackageReferences) + { + var folder = reference.GetLibFolder(nugetPath); + if (!Directory.Exists(folder) && restoreOnce) + { + // Package binaries folder is missing so restore packages (incl. dependency packages) + RestoreNugetPackages(graph, target, targetBuildOptions); + restoreOnce = false; + } + + DeployNuGetPackage(nugetPath, targetBuildOptions, nugetFiles, reference, folder); + } + + // Copy libraries from all referenced packages to the output folder + foreach (var file in nugetFiles) + { + var dstFile = Path.Combine(outputPath, Path.GetFileName(file)); + graph.AddCopyFile(dstFile, file); + } + } + } + } + + private static void RestoreNugetPackages(TaskGraph graph, Target target, BuildOptions targetBuildOptions) + { + // Generate a dummy csproj file to restore package from it + var csprojPath = Path.Combine(targetBuildOptions.IntermediateFolder, "nuget.restore.csproj"); + var dotnetSdk = DotNetSdk.Instance; + var csProjectFileContent = new StringBuilder(); + csProjectFileContent.AppendLine(""); + csProjectFileContent.AppendLine(" "); + csProjectFileContent.AppendLine($" net{dotnetSdk.Version.Major}.{dotnetSdk.Version.Minor}"); + csProjectFileContent.AppendLine(" false"); + csProjectFileContent.AppendLine(" false"); + csProjectFileContent.AppendLine(" false"); + csProjectFileContent.AppendLine(" false"); + csProjectFileContent.AppendLine(" false"); + csProjectFileContent.AppendLine(" false"); + csProjectFileContent.AppendLine($" {dotnetSdk.CSharpLanguageVersion}"); + csProjectFileContent.AppendLine(" 512"); + csProjectFileContent.AppendLine(" true"); + csProjectFileContent.AppendLine(" "); + csProjectFileContent.AppendLine(" "); + foreach (var reference in targetBuildOptions.NugetPackageReferences) + csProjectFileContent.AppendLine($" "); + csProjectFileContent.AppendLine(" "); + csProjectFileContent.AppendLine(""); + Utilities.WriteFileIfChanged(csprojPath, csProjectFileContent.ToString()); + + // Restore packages using dotnet CLI (synchronous to prevent task ordering issues on C# library building) + Log.Info($"Restoring NuGet packages for target {target.Name}"); + Utilities.Run(Utilities.GetDotNetPath(), $"restore \"{csprojPath}\"", null, null, Utilities.RunOptions.DefaultTool); + } + + private static void DeployNuGetPackage(string nugetPath, BuildOptions targetBuildOptions, HashSet nugetFiles, NugetPackage package, string folder = null) + { + // Deploy library + var path = package.GetLibPath(nugetPath, folder); + if (!File.Exists(path)) + return; + Log.Verbose($"Deploying NuGet package {package.Name}, {package.Version}, {package.Framework}"); + nugetFiles.Add(path); + + // Copy additional files (if included) + path = Path.ChangeExtension(path, "xml"); + if (File.Exists(path)) + nugetFiles.Add(path); + path = Path.ChangeExtension(path, "pdb"); + if (targetBuildOptions.Configuration != TargetConfiguration.Release && File.Exists(path)) + nugetFiles.Add(path); + + // Read package dependencies + var nuspecFile = package.GetNuspecPath(nugetPath); + if (File.Exists(nuspecFile)) + { + var doc = System.Xml.Linq.XDocument.Load(nuspecFile); + var root = (System.Xml.Linq.XElement)doc.FirstNode; + var metadataNode = root.Descendants().First(x => x.Name.LocalName== "metadata"); + var dependenciesNode = metadataNode.Descendants().First(x => x.Name.LocalName == "dependencies"); + var groupNode = dependenciesNode.Descendants().FirstOrDefault(x => x.Attribute("targetFramework")?.Value == package.Framework); + if (groupNode == null) + { + Log.Warning($"Cannot find framework {package.Framework} inside NuGet package {package.Name}, {package.Version}"); + return; + } + foreach (var dependency in groupNode.Descendants()) + { + if (dependency.Name.LocalName != "dependency") + continue; + + // Deploy dependency package + var dependencyId = dependency.Attribute("id").Value; + var dependencyVersion = dependency.Attribute("version").Value; + DeployNuGetPackage(nugetPath, targetBuildOptions, nugetFiles, new NugetPackage { Name = dependencyId, Version = dependencyVersion, Framework = package.Framework } ); + } + } + } } } diff --git a/Source/Tools/Flax.Build/Build/DotNet/Builder.DotNet.cs b/Source/Tools/Flax.Build/Build/DotNet/Builder.DotNet.cs index 23646a3d7..54136b490 100644 --- a/Source/Tools/Flax.Build/Build/DotNet/Builder.DotNet.cs +++ b/Source/Tools/Flax.Build/Build/DotNet/Builder.DotNet.cs @@ -135,27 +135,7 @@ namespace Flax.Build // Deploy files if (!target.IsPreBuilt) { - using (new ProfileEventScope("DeployFiles")) - { - foreach (var srcFile in targetBuildOptions.OptionalDependencyFiles.Where(File.Exists).Union(targetBuildOptions.DependencyFiles)) - { - var dstFile = Path.Combine(outputPath, Path.GetFileName(srcFile)); - graph.AddCopyFile(dstFile, srcFile); - } - - if (targetBuildOptions.NugetPackageReferences.Any()) - { - var nugetPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), ".nuget", "packages"); - foreach (var reference in targetBuildOptions.NugetPackageReferences) - { - var path = Path.Combine(nugetPath, reference.Name, reference.Version, "lib", reference.Framework, $"{reference.Name}.dll"); - if (!File.Exists(path)) - Utilities.RestoreNugetPackages(graph, target); - var dstFile = Path.Combine(outputPath, Path.GetFileName(path)); - graph.AddCopyFile(dstFile, path); - } - } - } + DeployFiles(graph, target, targetBuildOptions, outputPath); } using (new ProfileEventScope("PostBuild")) @@ -301,10 +281,10 @@ namespace Flax.Build // Reference Nuget package if (buildData.TargetOptions.NugetPackageReferences.Any()) { - var nugetPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), ".nuget", "packages"); + var nugetPath = Utilities.GetNugetPackagesPath(); foreach (var reference in buildOptions.NugetPackageReferences) { - var path = Path.Combine(nugetPath, reference.Name, reference.Version, "lib", reference.Framework, $"{reference.Name}.dll"); + var path = reference.GetLibPath(nugetPath); args.Add(string.Format("/reference:\"{0}\"", path)); } } diff --git a/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs b/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs index 13ecd1982..8251ff186 100644 --- a/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs +++ b/Source/Tools/Flax.Build/Build/NativeCpp/BuildOptions.cs @@ -102,6 +102,51 @@ namespace Flax.Build.NativeCpp Version = version; Framework = framework; } + + internal string GetLibFolder(string nugetPath) + { + var libFolder = Path.Combine(nugetPath, Name, Version, "lib", Framework); + if (Directory.Exists(libFolder)) + return libFolder; + + // Try to find nearest framework folder + if (Framework.StartsWith("net")) + { + var baseVersion = int.Parse(Framework.Substring(3, Framework.IndexOf('.') - 3)); + for (int version = baseVersion - 1; version >= 5; version--) + { + var framework = $"net{version}.0"; + libFolder = Path.Combine(nugetPath, Name, Version, "lib", framework); + if (Directory.Exists(libFolder)) + { + Framework = framework; + return libFolder; + } + } + } + + Log.Error($"Missing NuGet package \"{Name}, {Version}, {Framework}\" (nuget: {nugetPath})"); + return string.Empty; + } + + internal string GetNuspecPath(string nugetPath) + { + var files = Directory.GetFiles(Path.Combine(nugetPath, Name, Version), "*.nuspec", SearchOption.TopDirectoryOnly); + return files[0]; + } + + internal string GetLibPath(string nugetPath, string libFolder = null) + { + if (libFolder == null) + libFolder = GetLibFolder(nugetPath); + var dlls = Directory.GetFiles(libFolder, "*.dll", SearchOption.TopDirectoryOnly); + if (dlls.Length == 0) + { + Log.Error($"Missing NuGet package \"{Name}, {Version}, {Framework}\" binaries (folder: {libFolder})"); + return string.Empty; + } + return dlls[0]; + } } /// @@ -167,7 +212,7 @@ namespace Flax.Build.NativeCpp /// /// The nuget package references. /// - public List NugetPackageReferences = new List(); + public HashSet NugetPackageReferences = new HashSet(); /// /// The collection of defines with preprocessing symbol for a source files of this module. Inherited by the modules that include it. diff --git a/Source/Tools/Flax.Build/Build/NativeCpp/Builder.NativeCpp.cs b/Source/Tools/Flax.Build/Build/NativeCpp/Builder.NativeCpp.cs index f279e65d6..fb07b0886 100644 --- a/Source/Tools/Flax.Build/Build/NativeCpp/Builder.NativeCpp.cs +++ b/Source/Tools/Flax.Build/Build/NativeCpp/Builder.NativeCpp.cs @@ -1057,27 +1057,7 @@ namespace Flax.Build // Deploy files if (!buildData.Target.IsPreBuilt) { - using (new ProfileEventScope("DeployFiles")) - { - foreach (var srcFile in targetBuildOptions.OptionalDependencyFiles.Where(File.Exists).Union(targetBuildOptions.DependencyFiles)) - { - var dstFile = Path.Combine(outputPath, Path.GetFileName(srcFile)); - graph.AddCopyFile(dstFile, srcFile); - } - - if (targetBuildOptions.NugetPackageReferences.Any()) - { - var nugetPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), ".nuget", "packages"); - foreach (var reference in targetBuildOptions.NugetPackageReferences) - { - var path = Path.Combine(nugetPath, reference.Name, reference.Version, "lib", reference.Framework, $"{reference.Name}.dll"); - if (!File.Exists(path)) - Utilities.RestoreNugetPackages(graph, target); - var dstFile = Path.Combine(outputPath, Path.GetFileName(path)); - graph.AddCopyFile(dstFile, path); - } - } - } + DeployFiles(graph, target, targetBuildOptions, outputPath); } using (new ProfileEventScope("PostBuild")) @@ -1270,27 +1250,7 @@ namespace Flax.Build // Deploy files if (!buildData.Target.IsPreBuilt) { - using (new ProfileEventScope("DeployFiles")) - { - foreach (var srcFile in targetBuildOptions.OptionalDependencyFiles.Where(File.Exists).Union(targetBuildOptions.DependencyFiles)) - { - var dstFile = Path.Combine(outputPath, Path.GetFileName(srcFile)); - graph.AddCopyFile(dstFile, srcFile); - } - - if (targetBuildOptions.NugetPackageReferences.Any()) - { - var nugetPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), ".nuget", "packages"); - foreach (var reference in targetBuildOptions.NugetPackageReferences) - { - var path = Path.Combine(nugetPath, reference.Name, reference.Version, "lib", reference.Framework, $"{reference.Name}.dll"); - if (!File.Exists(path)) - Utilities.RestoreNugetPackages(graph, target); - var dstFile = Path.Combine(outputPath, Path.GetFileName(path)); - graph.AddCopyFile(dstFile, path); - } - } - } + DeployFiles(graph, target, targetBuildOptions, outputPath); } using (new ProfileEventScope("PostBuild")) diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/AGS.cs b/Source/Tools/Flax.Build/Deps/Dependencies/AGS.cs index 60be17f0b..ff16bd1c1 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/AGS.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/AGS.cs @@ -18,6 +18,23 @@ namespace Flax.Deps.Dependencies get => new[] { TargetPlatform.Windows }; } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { @@ -30,7 +47,7 @@ namespace Flax.Deps.Dependencies // Copy files foreach (var platform in options.Platforms) { - BuildStarted(platform); + BuildStarted(platform, TargetArchitecture.x64); var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); Utilities.FileCopy(Path.Combine(root, "ags_lib/lib/amd_ags_x64.lib"), Path.Combine(depsFolder, "amd_ags_x64.lib")); Utilities.FileCopy(Path.Combine(root, "ags_lib/lib/amd_ags_x64.dll"), Path.Combine(depsFolder, "amd_ags_x64.dll")); diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs b/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs index bb1c4fa3c..629a69070 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/Assimp.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.IO; +using System.Linq; using Flax.Build; namespace Flax.Deps.Dependencies @@ -39,6 +40,36 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + case TargetPlatform.Linux: + return new[] + { + TargetArchitecture.x64, + //TargetArchitecture.ARM64, + }; + case TargetPlatform.Mac: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { @@ -91,22 +122,22 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - { - var configuration = "Release"; - var binariesWin = new[] + BuildStarted(platform, architecture); + switch (platform) { - Path.Combine("bin", configuration, "assimp-vc140-md.dll"), - Path.Combine("lib", configuration, "assimp-vc140-md.lib"), - }; + case TargetPlatform.Windows: + { + var configuration = "Release"; + var binariesWin = new[] + { + Path.Combine("bin", configuration, "assimp-vc140-md.dll"), + Path.Combine("lib", configuration, "assimp-vc140-md.lib"), + }; - // Build for Windows - File.Delete(Path.Combine(root, "CMakeCache.txt")); - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) - { + // Build for Windows + File.Delete(Path.Combine(root, "CMakeCache.txt")); var buildDir = Path.Combine(root, "build-" + architecture); var solutionPath = Path.Combine(buildDir, "Assimp.sln"); SetupDirectory(buildDir, true); @@ -116,42 +147,40 @@ namespace Flax.Deps.Dependencies var depsFolder = GetThirdPartyFolder(options, platform, architecture); foreach (var file in binariesWin) Utilities.FileCopy(Path.Combine(buildDir, file), Path.Combine(depsFolder, Path.GetFileName(file))); + break; } - - break; - } - case TargetPlatform.Linux: - { - var envVars = new Dictionary + case TargetPlatform.Linux: { - { "CC", "clang-" + Configuration.LinuxClangMinVer }, - { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, - { "CXX", "clang++-" + Configuration.LinuxClangMinVer }, - { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, - }; + var envVars = new Dictionary + { + { "CC", "clang-" + Configuration.LinuxClangMinVer }, + { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, + { "CXX", "clang-" + Configuration.LinuxClangMinVer }, + { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, + }; - // Build for Linux - RunCmake(root, platform, TargetArchitecture.x64, " -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF " + globalConfig, envVars); - Utilities.Run("make", null, null, root, Utilities.RunOptions.DefaultTool, envVars); - configHeaderFilePath = Path.Combine(root, "include", "assimp", "config.h"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - Utilities.FileCopy(Path.Combine(root, "lib", "libassimp.a"), Path.Combine(depsFolder, "libassimp.a")); - break; - } - case TargetPlatform.Mac: - { - // Build for Mac - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + // Build for Linux + File.Delete(Path.Combine(root, "CMakeCache.txt")); + RunCmake(root, platform, architecture, " -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF " + globalConfig, envVars); + Utilities.Run("make", null, null, root, Utilities.RunOptions.DefaultTool, envVars); + configHeaderFilePath = Path.Combine(root, "include", "assimp", "config.h"); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + Utilities.FileCopy(Path.Combine(root, "lib", "libassimp.a"), Path.Combine(depsFolder, "libassimp.a")); + break; + } + case TargetPlatform.Mac: { + // Build for Mac + File.Delete(Path.Combine(root, "CMakeCache.txt")); RunCmake(root, platform, architecture, " -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF " + globalConfig); Utilities.Run("make", null, null, root, Utilities.RunOptions.DefaultTool); configHeaderFilePath = Path.Combine(root, "include", "assimp", "config.h"); var depsFolder = GetThirdPartyFolder(options, platform, architecture); Utilities.FileCopy(Path.Combine(root, "lib", "libassimp.a"), Path.Combine(depsFolder, "libassimp.a")); Utilities.Run("make", "clean", null, root, Utilities.RunOptions.DefaultTool); + break; + } } - break; - } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs index e631b280b..0da78e580 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXMesh.cs @@ -28,6 +28,24 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { @@ -46,12 +64,12 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - { - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + BuildStarted(platform, architecture); + switch (platform) + { + case TargetPlatform.Windows: { Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString()); var depsFolder = GetThirdPartyFolder(options, TargetPlatform.Windows, architecture); @@ -61,7 +79,7 @@ namespace Flax.Deps.Dependencies } } break; - } + } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs index 894af3840..f74494a30 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXShaderCompiler.cs @@ -1,6 +1,5 @@ // Copyright (c) Wojciech Figat. All rights reserved. -using System; using System.IO; using System.Linq; using Flax.Build; @@ -31,22 +30,40 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - { - var sdk = WindowsPlatformBase.GetSDKs().Last(); - var sdkLibLocation = Path.Combine(sdk.Value, "Lib", WindowsPlatformBase.GetSDKVersion(sdk.Key).ToString(), "um"); - string binLocation = Path.Combine(sdk.Value, "bin", WindowsPlatformBase.GetSDKVersion(sdk.Key).ToString()); - - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + BuildStarted(platform, architecture); + switch (platform) { + case TargetPlatform.Windows: + { + var sdk = WindowsPlatformBase.GetSDKs().Last(); + var sdkLibLocation = Path.Combine(sdk.Value, "Lib", WindowsPlatformBase.GetSDKVersion(sdk.Key).ToString(), "um"); + string binLocation = Path.Combine(sdk.Value, "bin", WindowsPlatformBase.GetSDKVersion(sdk.Key).ToString()); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); string dxilLocation = @$"{binLocation}\{architecture}\dxil.dll"; @@ -60,9 +77,9 @@ namespace Flax.Deps.Dependencies string d3dcompilerLibLocation = @$"{sdkLibLocation}\{architecture}\d3dcompiler.lib"; Utilities.FileCopy(dxcompilerLibLocation, Path.Combine(depsFolder, Path.GetFileName(dxcompilerLibLocation))); Utilities.FileCopy(d3dcompilerLibLocation, Path.Combine(depsFolder, "d3dcompiler_47.lib")); + break; + } } - break; - } } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs index c0d1a461f..cfbb88870 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/DirectXTex.cs @@ -30,6 +30,30 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + case TargetPlatform.XboxOne: + case TargetPlatform.XboxScarlett: + return new[] + { + TargetArchitecture.x64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { @@ -47,44 +71,44 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - { - var solutionPath = Path.Combine(root, "DirectXTex_Desktop_2022_Win10.sln"); - var binFolder = Path.Combine(root, "DirectXTex", "Bin", "Desktop_2022_Win10"); - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + BuildStarted(platform, architecture); + switch (platform) { + case TargetPlatform.Windows: + { + var solutionPath = Path.Combine(root, "DirectXTex_Desktop_2022_Win10.sln"); + var binFolder = Path.Combine(root, "DirectXTex", "Bin", "Desktop_2022_Win10"); Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString()); var depsFolder = GetThirdPartyFolder(options, platform, architecture); foreach (var file in outputFileNames) Utilities.FileCopy(Path.Combine(binFolder, architecture.ToString(), configuration, file), Path.Combine(depsFolder, file)); + break; + } + case TargetPlatform.UWP: + { + var solutionPath = Path.Combine(root, "DirectXTex_Windows10_2019.sln"); + var binFolder = Path.Combine(root, "DirectXTex", "Bin", "Windows10_2019"); + Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, "x64"); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + foreach (var file in outputFileNames) + Utilities.FileCopy(Path.Combine(binFolder, "x64", configuration, file), Path.Combine(depsFolder, file)); + break; + } + case TargetPlatform.XboxOne: + case TargetPlatform.XboxScarlett: + { + var solutionPath = Path.Combine(root, "DirectXTex_GDK_2022.sln"); + var binFolder = Path.Combine(root, "DirectXTex", "Bin", "GDK_2022"); + var xboxName = platform == TargetPlatform.XboxOne ? "Gaming.Xbox.XboxOne.x64" : "Gaming.Xbox.Scarlett.x64"; + Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, xboxName); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); + foreach (var file in outputFileNames) + Utilities.FileCopy(Path.Combine(binFolder, xboxName, configuration, file), Path.Combine(depsFolder, file)); + break; + } } - break; - } - case TargetPlatform.UWP: - { - var solutionPath = Path.Combine(root, "DirectXTex_Windows10_2019.sln"); - var binFolder = Path.Combine(root, "DirectXTex", "Bin", "Windows10_2019"); - Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, "x64"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in outputFileNames) - Utilities.FileCopy(Path.Combine(binFolder, "x64", configuration, file), Path.Combine(depsFolder, file)); - break; - } - case TargetPlatform.XboxOne: - case TargetPlatform.XboxScarlett: - { - var solutionPath = Path.Combine(root, "DirectXTex_GDK_2022.sln"); - var binFolder = Path.Combine(root, "DirectXTex", "Bin", "GDK_2022"); - var xboxName = platform == TargetPlatform.XboxOne ? "Gaming.Xbox.XboxOne.x64" : "Gaming.Xbox.Scarlett.x64"; - Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, xboxName); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in outputFileNames) - Utilities.FileCopy(Path.Combine(binFolder, xboxName, configuration, file), Path.Combine(depsFolder, file)); - break; - } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/EnvDTE.cs b/Source/Tools/Flax.Build/Deps/Dependencies/EnvDTE.cs new file mode 100644 index 000000000..3f9a2148b --- /dev/null +++ b/Source/Tools/Flax.Build/Deps/Dependencies/EnvDTE.cs @@ -0,0 +1,92 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using System.IO; +using System.IO.Compression; +using Flax.Build; + +namespace Flax.Deps.Dependencies +{ + /// + /// Visual Studio EnvDTE COM library. https://learn.microsoft.com/en-us/dotnet/api/envdte?view=visualstudiosdk-2022 + /// + /// + class EnvDTE : Dependency + { + /// + public override TargetPlatform[] Platforms + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetPlatform.Windows, + }; + default: return new TargetPlatform[0]; + } + } + } + + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + + /// + public override void Build(BuildOptions options) + { + options.IntermediateFolder.Replace("/" + GetType().Name, "/Microsoft.VisualStudio.Setup.Configuration.Native"); + + // Get the source + var root = options.IntermediateFolder; + var packagePath = Path.Combine(root, $"package.zip"); + if (!File.Exists(packagePath)) + { + Downloader.DownloadFileFromUrlToPath("https://www.nuget.org/api/v2/package/Microsoft.VisualStudio.Setup.Configuration.Native/3.14.2075", packagePath); + } + var extractedPath = Path.Combine(root, "extracted"); + if (!Directory.Exists(extractedPath)) + { + using (ZipArchive archive = ZipFile.Open(packagePath, ZipArchiveMode.Read)) + archive.ExtractToDirectory(extractedPath); + } + root = extractedPath; + + foreach (var platform in options.Platforms) + { + foreach (var architecture in options.Architectures) + { + BuildStarted(platform, architecture); + switch (platform) + { + case TargetPlatform.Windows: + { + var bin = Path.Combine(root, "lib", "native", "v141", architecture.ToString().ToLower()); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + Utilities.FileCopy(Path.Combine(bin, "Microsoft.VisualStudio.Setup.Configuration.Native.lib"), Path.Combine(depsFolder, "Microsoft.VisualStudio.Setup.Configuration.Native.lib")); + + var include = Path.Combine(root, "lib", "native", "include"); + Utilities.FileCopy(Path.Combine(include, "Setup.Configuration.h"), Path.Combine(options.ThirdPartyFolder, "Microsoft.VisualStudio.Setup.Configuration.Native", "Setup.Configuration.h")); + break; + } + } + } + } + } + } +} diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs b/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs index 495de4734..58fb21b25 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/NewtonsoftJson.cs @@ -36,6 +36,24 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs b/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs index a4dbad307..99fd3ff21 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/NvCloth.cs @@ -1,5 +1,6 @@ // Copyright (c) Wojciech Figat. All rights reserved. +using System; using System.Collections.Generic; using System.IO; using System.Linq; @@ -16,40 +17,6 @@ namespace Flax.Deps.Dependencies { private string root, nvCloth; - /// - public override TargetPlatform[] Platforms - { - get - { - switch (BuildPlatform) - { - case TargetPlatform.Windows: - return new[] - { - TargetPlatform.Windows, - TargetPlatform.XboxOne, - TargetPlatform.XboxScarlett, - TargetPlatform.PS4, - TargetPlatform.PS5, - TargetPlatform.Switch, - TargetPlatform.Android, - }; - case TargetPlatform.Linux: - return new[] - { - TargetPlatform.Linux, - }; - case TargetPlatform.Mac: - return new[] - { - TargetPlatform.Mac, - TargetPlatform.iOS, - }; - default: return new TargetPlatform[0]; - } - } - } - /// public override void Build(BuildOptions options) { @@ -59,41 +26,51 @@ namespace Flax.Deps.Dependencies // Get the source CloneGitRepoSingleBranch(root, "https://github.com/FlaxEngine/NvCloth.git", "master"); + // Patch the CMakeLists.txt to support custom compilation flags + foreach (var os in new[] { "android", "ios", "linux", "mac", "windows", }) + { + var filePath = Path.Combine(nvCloth, "compiler", "cmake", os, "CMakeLists.txt"); + var appendLine = "SET(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} ${NVCLOTH_CXX_FLAGS}\")"; + if (!File.ReadAllText(filePath).Contains(appendLine)) + File.AppendAllText(filePath, Environment.NewLine + appendLine + Environment.NewLine); + } + foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - Build(options, platform, TargetArchitecture.x64); - Build(options, platform, TargetArchitecture.ARM64); - break; - case TargetPlatform.XboxOne: - case TargetPlatform.XboxScarlett: - Build(options, platform, TargetArchitecture.x64); - break; - case TargetPlatform.PS4: - case TargetPlatform.PS5: - Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "NvCloth"), root, true, true); - Build(options, platform, TargetArchitecture.x64); - break; - case TargetPlatform.Switch: - Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "NvCloth"), root, true, true); - Build(options, platform, TargetArchitecture.ARM64); - break; - case TargetPlatform.Android: - Build(options, platform, TargetArchitecture.ARM64); - break; - case TargetPlatform.Mac: - Build(options, platform, TargetArchitecture.x64); - Build(options, platform, TargetArchitecture.ARM64); - break; - case TargetPlatform.iOS: - Build(options, platform, TargetArchitecture.ARM64); - break; - case TargetPlatform.Linux: - Build(options, platform, TargetArchitecture.x64); - break; + BuildStarted(platform, architecture); + switch (platform) + { + case TargetPlatform.Windows: + Build(options, platform, architecture); + break; + case TargetPlatform.XboxOne: + case TargetPlatform.XboxScarlett: + Build(options, platform, TargetArchitecture.x64); + break; + case TargetPlatform.PS4: + case TargetPlatform.PS5: + Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "NvCloth"), root, true, true); + Build(options, platform, TargetArchitecture.x64); + break; + case TargetPlatform.Switch: + Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "NvCloth"), root, true, true); + Build(options, platform, TargetArchitecture.ARM64); + break; + case TargetPlatform.Android: + Build(options, platform, TargetArchitecture.ARM64); + break; + case TargetPlatform.Mac: + Build(options, platform, architecture); + break; + case TargetPlatform.iOS: + Build(options, platform, TargetArchitecture.ARM64); + break; + case TargetPlatform.Linux: + Build(options, platform, architecture); + break; + } } } @@ -110,7 +87,7 @@ namespace Flax.Deps.Dependencies // Peek options var binariesPrefix = string.Empty; var binariesPostfix = string.Empty; - var cmakeArgs = "-DNV_CLOTH_ENABLE_DX11=0 -DNV_CLOTH_ENABLE_CUDA=0 -DPX_GENERATE_GPU_PROJECTS=0"; + var cmakeArgs = "-DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DNV_CLOTH_ENABLE_DX11=0 -DNV_CLOTH_ENABLE_CUDA=0 -DPX_GENERATE_GPU_PROJECTS=0"; var cmakeName = string.Empty; var buildFolder = Path.Combine(nvCloth, "compiler", platform.ToString() + '_' + architecture.ToString()); var envVars = new Dictionary(); @@ -154,7 +131,7 @@ namespace Flax.Deps.Dependencies } break; case TargetPlatform.Mac: - cmakeArgs += " -DTARGET_BUILD_PLATFORM=mac"; + cmakeArgs += " -DTARGET_BUILD_PLATFORM=mac -DNVCLOTH_CXX_FLAGS=\"-Wno-error=poison-system-directories -Wno-error=missing-include-dirs\""; cmakeName = "mac"; binariesPrefix = "lib"; break; @@ -164,7 +141,7 @@ namespace Flax.Deps.Dependencies binariesPrefix = "lib"; break; case TargetPlatform.Linux: - cmakeArgs += " -DTARGET_BUILD_PLATFORM=linux"; + cmakeArgs += " -DTARGET_BUILD_PLATFORM=linux -DNVCLOTH_CXX_FLAGS=\"-Wno-error=poison-system-directories -Wno-error=missing-include-dirs\""; cmakeName = "linux"; binariesPrefix = "lib"; envVars.Add("CC", "clang-" + Configuration.LinuxClangMinVer); diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs b/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs index 319ad70b3..37e446ce1 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/OpenAL.cs @@ -1,5 +1,5 @@ // Copyright (c) Wojciech Figat. All rights reserved. - +//#define USE_GIT_REPOSITORY using System; using System.Collections.Generic; using System.IO; @@ -45,132 +45,75 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + case TargetPlatform.Linux: + return new[] + { + TargetArchitecture.x64, + //TargetArchitecture.ARM64, + }; + case TargetPlatform.Mac: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + case TargetPlatform.iOS: + return new[] + { + TargetArchitecture.ARM64, + }; + case TargetPlatform.Android: + return new[] + { + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { var root = options.IntermediateFolder; var version = "1.24.3"; var configuration = "Release"; + var cmakeArgs = "-DCMAKE_POLICY_VERSION_MINIMUM=3.5"; var dstIncludePath = Path.Combine(options.ThirdPartyFolder, "OpenAL"); var noSSL = true; // OpenAL Soft website has broken certs - foreach (var platform in options.Platforms) - { - BuildStarted(platform); - switch (platform) - { - case TargetPlatform.Windows: - { - var binariesToCopy = new[] - { - "OpenAL32.lib", - "OpenAL32.dll", - }; - - // Get the source - CloneGitRepo(root, "https://github.com/kcat/openal-soft.git"); - GitCheckout(root, "master", "dc7d7054a5b4f3bec1dc23a42fd616a0847af948"); // 1.24.3 - - // Build for Win64 and ARM64 - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) - { - var buildDir = Path.Combine(root, "build-" + architecture.ToString()); - var solutionPath = Path.Combine(buildDir, "OpenAL.sln"); - - RunCmake(root, platform, architecture, $"-B\"{buildDir}\" -DBUILD_SHARED_LIBS=OFF -DCMAKE_C_FLAGS=\"/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR /EHsc\" -DCMAKE_CXX_FLAGS=\"/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR /EHsc\""); - Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString()); - var depsFolder = GetThirdPartyFolder(options, platform, architecture); - foreach (var file in binariesToCopy) - Utilities.FileCopy(Path.Combine(buildDir, configuration, file), Path.Combine(depsFolder, Path.GetFileName(file))); - } - -#if false - // Get the binaries - var packagePath = Path.Combine(root, "package.zip"); - if (!File.Exists(packagePath)) - Downloader.DownloadFileFromUrlToPath("https://openal-soft.org/openal-binaries/openal-soft-" + version + "-bin.zip", packagePath, noSSL); - using (ZipArchive archive = ZipFile.Open(packagePath, ZipArchiveMode.Read)) - { - if (!Directory.Exists(root)) - archive.ExtractToDirectory(root); - root = Path.Combine(root, archive.Entries.First().FullName); - } - - // Deploy Win64 binaries - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - Utilities.FileCopy(Path.Combine(root, "bin", "Win64", "soft_oal.dll"), Path.Combine(depsFolder, "OpenAL32.dll")); - Utilities.FileCopy(Path.Combine(root, "libs", "Win64", "OpenAL32.lib"), Path.Combine(depsFolder, "OpenAL32.lib")); - - // Deploy license - Utilities.FileCopy(Path.Combine(root, "COPYING"), Path.Combine(dstIncludePath, "COPYING"), true); - - // Deploy header files - var files = Directory.GetFiles(Path.Combine(root, "include", "AL")); - foreach (var file in files) - { - Utilities.FileCopy(file, Path.Combine(dstIncludePath, Path.GetFileName(file))); - } +#if !USE_GIT_REPOSITORY + if (options.Platforms.Contains(TargetPlatform.Windows)) #endif - break; - } - case TargetPlatform.Linux: + { + // Get the source + CloneGitRepo(root, "https://github.com/kcat/openal-soft.git"); + GitCheckout(root, "master", "dc7d7054a5b4f3bec1dc23a42fd616a0847af948"); // 1.24.3 + } +#if !USE_GIT_REPOSITORY + else + { + // Get the source + var packagePath = Path.Combine(root, $"package-{version}.zip"); + if (!File.Exists(packagePath)) { - var binariesToCopy = new[] - { - "libopenal.a", - }; - var envVars = new Dictionary - { - { "CC", "clang-" + Configuration.LinuxClangMinVer }, - { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, - { "CXX", "clang++-" + Configuration.LinuxClangMinVer }, - { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, - }; - var config = $"-DALSOFT_REQUIRE_ALSA=ON " + - $"-DALSOFT_REQUIRE_OSS=ON " + - $"-DALSOFT_REQUIRE_PORTAUDIO=ON " + - $"-DALSOFT_REQUIRE_PULSEAUDIO=ON " + - $"-DALSOFT_REQUIRE_JACK=ON " + - $"-DALSOFT_REQUIRE_PIPEWIRE=ON " + - $"-DALSOFT_EMBED_HRTF_DATA=YES "; - - // Get the source - var packagePath = Path.Combine(root, "package.zip"); - File.Delete(packagePath); - Downloader.DownloadFileFromUrlToPath("https://openal-soft.org/openal-releases/openal-soft-" + version + ".tar.bz2", packagePath, noSSL); - Utilities.Run("tar", "xjf " + packagePath.Replace('\\', '/'), null, root, Utilities.RunOptions.ConsoleLogOutput); - - // Use separate build directory - root = Path.Combine(root, "openal-soft-" + version); - var buildDir = Path.Combine(root, "build"); - SetupDirectory(buildDir, true); - - // Build for Linux - Utilities.Run("cmake", $"-G \"Unix Makefiles\" -DCMAKE_BUILD_TYPE={configuration} -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DLIBTYPE=STATIC {config} ..", null, buildDir, Utilities.RunOptions.ConsoleLogOutput, envVars); - BuildCmake(buildDir, configuration, envVars); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in binariesToCopy) - Utilities.FileCopy(Path.Combine(buildDir, file), Path.Combine(depsFolder, file)); - break; - } - case TargetPlatform.Android: - { - var binariesToCopy = new[] - { - "libopenal.a", - }; - var envVars = new Dictionary - { - { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, - }; - var config = " -DALSOFT_REQUIRE_OBOE=OFF -DALSOFT_REQUIRE_OPENSL=ON -DALSOFT_EMBED_HRTF_DATA=YES"; - - // Get the source - var packagePath = Path.Combine(root, "package.zip"); - File.Delete(packagePath); Downloader.DownloadFileFromUrlToPath("https://openal-soft.org/openal-releases/openal-soft-" + version + ".tar.bz2", packagePath, noSSL); if (Platform.BuildTargetPlatform == TargetPlatform.Windows) { + // TODO: Maybe use PowerShell Expand-Archive instead? var sevenZip = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ProgramFiles), "7-Zip", "7z.exe"); Utilities.Run(sevenZip, "x package.zip", null, root); Utilities.Run(sevenZip, "x package", null, root); @@ -179,89 +122,167 @@ namespace Flax.Deps.Dependencies { Utilities.Run("tar", "xjf " + packagePath.Replace('\\', '/'), null, root, Utilities.RunOptions.ConsoleLogOutput); } - - // Use separate build directory - root = Path.Combine(root, "openal-soft-" + version); - var buildDir = Path.Combine(root, "build"); - SetupDirectory(buildDir, true); - - // Build - RunCmake(buildDir, platform, TargetArchitecture.ARM64, ".. -DLIBTYPE=STATIC -DCMAKE_BUILD_TYPE=" + configuration + config, envVars); - BuildCmake(buildDir, envVars); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); - foreach (var file in binariesToCopy) - Utilities.FileCopy(Path.Combine(buildDir, file), Path.Combine(depsFolder, file)); - break; } - case TargetPlatform.Mac: + } +#endif + + foreach (var platform in options.Platforms) + { + foreach (var architecture in options.Architectures) { - var binariesToCopy = new[] + BuildStarted(platform, architecture); + switch (platform) { - "libopenal.a", - }; - var envVars = new Dictionary + case TargetPlatform.Windows: { - { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, - }; - var config = " -DALSOFT_REQUIRE_COREAUDIO=ON -DALSOFT_EMBED_HRTF_DATA=YES"; + var binariesToCopy = new[] + { + "OpenAL32.lib", + "OpenAL32.dll", + }; - // Get the source - var packagePath = Path.Combine(root, "package.zip"); - File.Delete(packagePath); - Downloader.DownloadFileFromUrlToPath("https://openal-soft.org/openal-releases/openal-soft-" + version + ".tar.bz2", packagePath, noSSL); - Utilities.Run("tar", "xjf " + packagePath.Replace('\\', '/'), null, root, Utilities.RunOptions.ConsoleLogOutput); - - // Use separate build directory - root = Path.Combine(root, "openal-soft-" + version); - var buildDir = Path.Combine(root, "build"); - - // Build for Mac - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) - { + // Build for Windows + var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + var solutionPath = Path.Combine(buildDir, "OpenAL.sln"); SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, architecture, ".. -DLIBTYPE=STATIC -DCMAKE_BUILD_TYPE=" + configuration + config, envVars); + RunCmake(root, platform, architecture, $"-B\"{buildDir}\" -DBUILD_SHARED_LIBS=OFF -DCMAKE_C_FLAGS=\"/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR /EHsc\" -DCMAKE_CXX_FLAGS=\"/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR /EHsc\" " + cmakeArgs); + Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString()); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + foreach (var file in binariesToCopy) + Utilities.FileCopy(Path.Combine(buildDir, configuration, file), Path.Combine(depsFolder, Path.GetFileName(file))); + break; + } + case TargetPlatform.Linux: + { + var binariesToCopy = new[] + { + "libopenal.a", + }; + var envVars = new Dictionary + { + { "CC", "clang-" + Configuration.LinuxClangMinVer }, + { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, + { "CXX", "clang++-" + Configuration.LinuxClangMinVer }, + { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, + }; + var config = $"-DALSOFT_REQUIRE_ALSA=ON " + + $"-DALSOFT_REQUIRE_OSS=ON " + + $"-DALSOFT_REQUIRE_PORTAUDIO=ON " + + $"-DALSOFT_REQUIRE_PULSEAUDIO=ON " + + $"-DALSOFT_REQUIRE_JACK=ON " + + $"-DALSOFT_REQUIRE_PIPEWIRE=ON " + + $"-DALSOFT_EMBED_HRTF_DATA=YES " + + cmakeArgs; + + // Use separate build directory +#if !USE_GIT_REPOSITORY + root = Path.Combine(root, "openal-soft-" + version); +#endif + var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + SetupDirectory(buildDir, true); + + // Build for Linux + RunCmake(root, platform, architecture, $"-B\"{buildDir}\" -DLIBTYPE=STATIC -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=" + configuration + config, envVars); + BuildCmake(buildDir, configuration, envVars); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + foreach (var file in binariesToCopy) + Utilities.FileCopy(Path.Combine(buildDir, file), Path.Combine(depsFolder, file)); + break; + } + case TargetPlatform.Android: + { + var binariesToCopy = new[] + { + "libopenal.a", + }; + var envVars = new Dictionary + { + { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, + }; + var config = "-DALSOFT_REQUIRE_OBOE=OFF -DALSOFT_REQUIRE_OPENSL=ON -DALSOFT_EMBED_HRTF_DATA=YES " + cmakeArgs; + + // Use separate build directory +#if !USE_GIT_REPOSITORY + root = Path.Combine(root, "openal-soft-" + version); +#endif + var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + SetupDirectory(buildDir, true); + + // Build + RunCmake(root, platform, TargetArchitecture.ARM64, $"-B\"{buildDir}\" -DLIBTYPE=STATIC -DCMAKE_BUILD_TYPE=" + configuration + config, envVars); + BuildCmake(buildDir, envVars); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); + foreach (var file in binariesToCopy) + Utilities.FileCopy(Path.Combine(buildDir, file), Path.Combine(depsFolder, file)); + break; + } + case TargetPlatform.Mac: + { + var binariesToCopy = new[] + { + "libopenal.a", + }; + var envVars = new Dictionary + { + { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, + }; + var config = " -DALSOFT_REQUIRE_COREAUDIO=ON -DALSOFT_EMBED_HRTF_DATA=YES " + cmakeArgs; + + // Use separate build directory +#if !USE_GIT_REPOSITORY + root = Path.Combine(root, "openal-soft-" + version); +#endif + var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + SetupDirectory(buildDir, true); + + // Build for Mac + RunCmake(root, platform, architecture, $"-B\"{buildDir}\" -DLIBTYPE=STATIC -DCMAKE_BUILD_TYPE=" + configuration + config, envVars); BuildCmake(buildDir, envVars); var depsFolder = GetThirdPartyFolder(options, platform, architecture); foreach (var file in binariesToCopy) Utilities.FileCopy(Path.Combine(buildDir, file), Path.Combine(depsFolder, file)); + break; } - break; - } - case TargetPlatform.iOS: - { - var binariesToCopy = new[] + case TargetPlatform.iOS: { - "libopenal.a", - }; - var envVars = new Dictionary - { - { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, - }; - var config = " -DALSOFT_REQUIRE_COREAUDIO=ON -DALSOFT_EMBED_HRTF_DATA=YES"; + var binariesToCopy = new[] + { + "libopenal.a", + }; + var envVars = new Dictionary + { + { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, + }; + var config = " -DALSOFT_REQUIRE_COREAUDIO=ON -DALSOFT_EMBED_HRTF_DATA=YES " + cmakeArgs; - // Get the source - var packagePath = Path.Combine(root, "package.zip"); - if (!File.Exists(packagePath)) - { - Downloader.DownloadFileFromUrlToPath("https://openal-soft.org/openal-releases/openal-soft-" + version + ".tar.bz2", packagePath, noSSL); - Utilities.Run("tar", "xjf " + packagePath.Replace('\\', '/'), null, root, Utilities.RunOptions.ConsoleLogOutput); + // Use separate build directory +#if !USE_GIT_REPOSITORY + root = Path.Combine(root, "openal-soft-" + version); +#endif + var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + SetupDirectory(buildDir, true); + + // Build for iOS + RunCmake(root, platform, TargetArchitecture.ARM64, $"-B\"{buildDir}\" -DCMAKE_SYSTEM_NAME=iOS -DALSOFT_OSX_FRAMEWORK=ON -DLIBTYPE=STATIC -DCMAKE_BUILD_TYPE=" + configuration + config, envVars); + BuildCmake(buildDir, envVars); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); + foreach (var file in binariesToCopy) + Utilities.FileCopy(Path.Combine(buildDir, file), Path.Combine(depsFolder, file)); + break; + } } - - // Use separate build directory - root = Path.Combine(root, "openal-soft-" + version); - var buildDir = Path.Combine(root, "build"); - - // Build for iOS - SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_SYSTEM_NAME=iOS -DALSOFT_OSX_FRAMEWORK=ON -DLIBTYPE=STATIC -DCMAKE_BUILD_TYPE=" + configuration + config, envVars); - BuildCmake(buildDir, envVars); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); - foreach (var file in binariesToCopy) - Utilities.FileCopy(Path.Combine(buildDir, file), Path.Combine(depsFolder, file)); - break; - } } } + + // Deploy license + Utilities.FileCopy(Path.Combine(root, "COPYING"), Path.Combine(dstIncludePath, "COPYING"), true); + + // Deploy header files + var files = Directory.GetFiles(Path.Combine(root, "include", "AL")); + foreach (var file in files) + { + Utilities.FileCopy(file, Path.Combine(dstIncludePath, Path.GetFileName(file))); + } } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs b/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs index 39f7ad975..18bb4e69f 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/PhysX.cs @@ -17,40 +17,6 @@ namespace Flax.Deps.Dependencies /// class PhysX : Dependency { - /// - public override TargetPlatform[] Platforms - { - get - { - switch (BuildPlatform) - { - case TargetPlatform.Windows: - return new[] - { - TargetPlatform.Windows, - TargetPlatform.XboxOne, - TargetPlatform.PS4, - TargetPlatform.PS5, - TargetPlatform.XboxScarlett, - TargetPlatform.Android, - TargetPlatform.Switch, - }; - case TargetPlatform.Linux: - return new[] - { - TargetPlatform.Linux, - }; - case TargetPlatform.Mac: - return new[] - { - TargetPlatform.Mac, - TargetPlatform.iOS, - }; - default: return new TargetPlatform[0]; - } - } - } - private string root; private string projectGenDir; private string projectGenPath; @@ -65,8 +31,13 @@ namespace Flax.Deps.Dependencies if (cmakeSwitch.HasAttribute("name") && cmakeSwitch.Attributes["name"].Value == name) { cmakeSwitch.Attributes["value"].Value = value; + return; } } + var child = cmakeSwitches.OwnerDocument.CreateElement(cmakeSwitches.ChildNodes[0].Name); + child.SetAttribute("name", name); + child.SetAttribute("value", value); + cmakeSwitches.AppendChild(child); } private void Build(BuildOptions options, string preset, TargetPlatform targetPlatform, TargetArchitecture architecture) @@ -94,11 +65,14 @@ namespace Flax.Deps.Dependencies case TargetPlatform.Windows: if (architecture == TargetArchitecture.ARM64) { - // Windows ARM64 doesn't have GPU support, so avoid copying those DLLs around + // Windows ARM64 doesn't have precompiled files for GPU support, so avoid copying those DLLs around ConfigureCmakeSwitch(cmakeSwitches, "PX_COPY_EXTERNAL_DLL", "OFF"); ConfigureCmakeSwitch(cmakeParams, "PX_COPY_EXTERNAL_DLL", "OFF"); } break; + case TargetPlatform.Linux: + ConfigureCmakeSwitch(cmakeParams, "PHYSX_CXX_FLAGS", "\"-Wno-error=format -Wno-error=unused-but-set-variable -Wno-error=switch-default -Wno-error=invalid-offsetof -Wno-error=unsafe-buffer-usage -Wno-error=unsafe-buffer-usage-in-libc-call -Wno-error=missing-include-dirs\""); + break; case TargetPlatform.Android: ConfigureCmakeSwitch(cmakeParams, "CMAKE_INSTALL_PREFIX", $"install/android-{Configuration.AndroidPlatformApi}/PhysX"); ConfigureCmakeSwitch(cmakeParams, "ANDROID_NATIVE_API_LEVEL", $"android-{Configuration.AndroidPlatformApi}"); @@ -106,6 +80,7 @@ namespace Flax.Deps.Dependencies break; case TargetPlatform.Mac: ConfigureCmakeSwitch(cmakeParams, "CMAKE_OSX_DEPLOYMENT_TARGET", Configuration.MacOSXMinVer); + ConfigureCmakeSwitch(cmakeParams, "PHYSX_CXX_FLAGS", "\"-Wno-error=format -Wno-error=unused-but-set-variable -Wno-error=switch-default -Wno-error=invalid-offsetof -Wno-error=unsafe-buffer-usage -Wno-error=unsafe-buffer-usage-in-libc-call -Wno-error=missing-include-dirs\""); break; case TargetPlatform.iOS: ConfigureCmakeSwitch(cmakeParams, "CMAKE_OSX_DEPLOYMENT_TARGET", Configuration.iOSMinVer); @@ -122,10 +97,11 @@ namespace Flax.Deps.Dependencies string bits; string arch; string binariesSubDir; - string buildPlatform; + string buildPlatform = architecture == TargetArchitecture.x86 ? "Win32" : architecture.ToString(); bool suppressBitsPostfix = false; string binariesPrefix = string.Empty; var envVars = new Dictionary(); + envVars.Add("CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel); switch (architecture) { case TargetArchitecture.x86: @@ -146,15 +122,6 @@ namespace Flax.Deps.Dependencies break; default: throw new InvalidArchitectureException(architecture); } - switch (architecture) - { - case TargetArchitecture.x86: - buildPlatform = "Win32"; - break; - default: - buildPlatform = architecture.ToString(); - break; - } var msBuildProps = new Dictionary(); switch (targetPlatform) { @@ -385,60 +352,84 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - { - Build(options, "vc17win64", platform, TargetArchitecture.x64); - Build(options, "vc17win-arm64", platform, TargetArchitecture.ARM64); - break; - } - case TargetPlatform.Linux: - { - Build(options, "linux", platform, TargetArchitecture.x64); - break; - } - case TargetPlatform.PS4: - { - Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "PhysX"), root, true, true); - Build(options, "ps4", platform, TargetArchitecture.x64); - break; - } - case TargetPlatform.PS5: - { - Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "PhysX"), root, true, true); - Build(options, "ps5", platform, TargetArchitecture.x64); - break; - } - case TargetPlatform.XboxScarlett: - case TargetPlatform.XboxOne: - { - Build(options, "vc16win64", platform, TargetArchitecture.x64); - break; - } - case TargetPlatform.Android: - { - Build(options, "android", platform, TargetArchitecture.ARM64); - break; - } - case TargetPlatform.Switch: - { - Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "PhysX"), root, true, true); - Build(options, "switch64", platform, TargetArchitecture.ARM64); - break; - } - case TargetPlatform.Mac: - { - Build(options, "mac64", platform, TargetArchitecture.x64); - Build(options, "mac-arm64", platform, TargetArchitecture.ARM64); - break; - } - case TargetPlatform.iOS: - { - Build(options, "ios64", platform, TargetArchitecture.ARM64); - break; - } + BuildStarted(platform, architecture); + switch (platform) + { + case TargetPlatform.Windows: + { + if (architecture == TargetArchitecture.x64 || architecture == TargetArchitecture.ARM64) + { + if (WindowsPlatform.GetToolsets().Any(x => x.Key == WindowsPlatformToolset.v145)) + { + try + { + Build(options, architecture == TargetArchitecture.x64 ? "vc18win64" : "vc18win-arm64", platform, architecture); + } + catch (Exception e) + { + Log.Warning($"Failed to generate VS2026 solution for PhysX, fallback to VS2022: {e.Message}"); + Build(options, architecture == TargetArchitecture.x64 ? "vc17win64" : "vc17win-arm64", platform, architecture); + } + } + else + Build(options, architecture == TargetArchitecture.x64 ? "vc17win64" : "vc17win-arm64", platform, architecture); + } + else + throw new InvalidArchitectureException(architecture); + break; + } + case TargetPlatform.Linux: + { + Build(options, "linux", platform, architecture); + break; + } + case TargetPlatform.PS4: + { + Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "PhysX"), root, true, true); + Build(options, "ps4", platform, TargetArchitecture.x64); + break; + } + case TargetPlatform.PS5: + { + Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "PhysX"), root, true, true); + Build(options, "ps5", platform, TargetArchitecture.x64); + break; + } + case TargetPlatform.XboxScarlett: + case TargetPlatform.XboxOne: + { + Build(options, "vc16win64", platform, TargetArchitecture.x64); + break; + } + case TargetPlatform.Android: + { + Build(options, "android", platform, TargetArchitecture.ARM64); + break; + } + case TargetPlatform.Switch: + { + Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "PhysX"), root, true, true); + Build(options, "switch64", platform, TargetArchitecture.ARM64); + break; + } + case TargetPlatform.Mac: + { + if (architecture == TargetArchitecture.x64) + Build(options, "mac64", platform, architecture); + else if (architecture == TargetArchitecture.ARM64) + Build(options, "mac-arm64", platform, architecture); + else + throw new InvalidArchitectureException(architecture); + break; + } + case TargetPlatform.iOS: + { + Build(options, "ios64", platform, TargetArchitecture.ARM64); + break; + } + } } } @@ -446,7 +437,7 @@ namespace Flax.Deps.Dependencies var dstIncludePath = Path.Combine(options.ThirdPartyFolder, "PhysX"); Directory.GetFiles(dstIncludePath, "*.h", SearchOption.AllDirectories).ToList().ForEach(File.Delete); Utilities.FileCopy(Path.Combine(root, "LICENSE.md"), Path.Combine(dstIncludePath, "License.txt")); - Utilities.DirectoryCopy(Path.Combine(root, "physx", "include"), dstIncludePath); + Utilities.DirectoryCopy(Path.Combine(root, "physx", "include"), dstIncludePath, true, true); } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs b/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs index 617b82af0..19e314326 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/UVAtlas.cs @@ -29,6 +29,24 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { @@ -47,23 +65,23 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - { - // Build for Win64 - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + BuildStarted(platform, architecture); + switch (platform) { + case TargetPlatform.Windows: + { + // Build for Windows Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString(), new Dictionary() { { "RestorePackagesConfig", "true" } }); var depsFolder = GetThirdPartyFolder(options, TargetPlatform.Windows, architecture); foreach (var file in outputFileNames) { Utilities.FileCopy(Path.Combine(binFolder, architecture.ToString(), "Release", file), Path.Combine(depsFolder, file)); } + break; + } } - break; - } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/WinPixEventRuntime.cs b/Source/Tools/Flax.Build/Deps/Dependencies/WinPixEventRuntime.cs new file mode 100644 index 000000000..84a6f4f8b --- /dev/null +++ b/Source/Tools/Flax.Build/Deps/Dependencies/WinPixEventRuntime.cs @@ -0,0 +1,91 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +using System; +using System.IO; +using System.IO.Compression; +using System.Linq; +using Flax.Build; +using Flax.Build.Platforms; + +namespace Flax.Deps.Dependencies +{ + /// + /// WinPixEventRuntime. https://github.com/microsoft/PixEvents + /// + /// + class WinPixEventRuntime : Dependency + { + /// + public override TargetPlatform[] Platforms + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetPlatform.Windows, + }; + default: return new TargetPlatform[0]; + } + } + } + + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + + /// + public override void Build(BuildOptions options) + { + // Get the source + var root = options.IntermediateFolder; + var packagePath = Path.Combine(root, $"package.zip"); + if (!File.Exists(packagePath)) + { + Downloader.DownloadFileFromUrlToPath("https://www.nuget.org/api/v2/package/WinPixEventRuntime/1.0.240308001", packagePath); + } + var extractedPath = Path.Combine(root, "extracted"); + if (!Directory.Exists(extractedPath)) + { + using (ZipArchive archive = ZipFile.Open(packagePath, ZipArchiveMode.Read)) + archive.ExtractToDirectory(extractedPath); + } + root = extractedPath; + + foreach (var platform in options.Platforms) + { + foreach (var architecture in options.Architectures) + { + BuildStarted(platform, architecture); + switch (platform) + { + case TargetPlatform.Windows: + { + var bin = Path.Combine(root, "bin", architecture.ToString()); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + Utilities.FileCopy(Path.Combine(bin, "WinPixEventRuntime.dll"), Path.Combine(depsFolder, "WinPixEventRuntime.dll")); + Utilities.FileCopy(Path.Combine(bin, "WinPixEventRuntime.lib"), Path.Combine(depsFolder, "WinPixEventRuntime.lib")); + break; + } + } + } + } + } + } +} diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs b/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs index d5886810d..62a2b1097 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/astc.cs @@ -1,6 +1,5 @@ // Copyright (c) Wojciech Figat. All rights reserved. -using System.Collections.Generic; using System.IO; using Flax.Build; @@ -34,6 +33,30 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + case TargetPlatform.Mac: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { @@ -45,14 +68,14 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - - foreach (var architecture in new []{ TargetArchitecture.x64, TargetArchitecture.ARM64 }) + BuildStarted(platform, architecture); + switch (platform) { - string buildDir = Path.Combine(root, "build-" + architecture.ToString()); + case TargetPlatform.Windows: + { + string buildDir = Path.Combine(root, "build-" + architecture); var isa = architecture == TargetArchitecture.ARM64 ? "-DASTCENC_ISA_NEON=ON" : "-DASTCENC_ISA_SSE2=ON"; var lib = architecture == TargetArchitecture.ARM64 ? "astcenc-neon-static.lib" : "astcenc-sse2-static.lib"; SetupDirectory(buildDir, true); @@ -60,12 +83,11 @@ namespace Flax.Deps.Dependencies BuildCmake(buildDir); var depsFolder = GetThirdPartyFolder(options, platform, architecture); Utilities.FileCopy(Path.Combine(buildDir, "Source/Release", lib), Path.Combine(depsFolder, "astcenc.lib")); - } - break; - case TargetPlatform.Mac: - foreach (var architecture in new []{ TargetArchitecture.x64, TargetArchitecture.ARM64 }) + break; + } + case TargetPlatform.Mac: { - string buildDir = Path.Combine(root, "build-" + architecture.ToString()); + string buildDir = Path.Combine(root, "build-" + architecture); var isa = architecture == TargetArchitecture.ARM64 ? "-DASTCENC_ISA_NEON=ON" : "-DASTCENC_ISA_SSE2=ON"; var lib = architecture == TargetArchitecture.ARM64 ? "libastcenc-neon-static.a" : "libastcenc-sse2-static.a"; SetupDirectory(buildDir, true); @@ -73,8 +95,9 @@ namespace Flax.Deps.Dependencies BuildCmake(buildDir); var depsFolder = GetThirdPartyFolder(options, platform, architecture); Utilities.FileCopy(Path.Combine(buildDir, "Source", lib), Path.Combine(depsFolder, "libastcenc.a")); + break; + } } - break; } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs b/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs index 447f573a7..2d25fed3d 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/curl.cs @@ -41,6 +41,36 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + case TargetPlatform.Linux: + return new[] + { + TargetArchitecture.x64, + //TargetArchitecture.ARM64, + }; + case TargetPlatform.Mac: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { @@ -69,15 +99,15 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - { - // Build for Win64 and ARM64 - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + BuildStarted(platform, architecture); + switch (platform) { - var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + case TargetPlatform.Windows: + { + // Build for Windows + var buildDir = Path.Combine(root, "build-" + architecture); var solutionPath = Path.Combine(buildDir, "CURL.sln"); RunCmake(root, platform, architecture, $"-B\"{buildDir}\" -DBUILD_CURL_EXE=OFF -DBUILD_SHARED_LIBS=OFF -DCURL_STATIC_CRT=OFF"); @@ -85,57 +115,55 @@ namespace Flax.Deps.Dependencies var depsFolder = GetThirdPartyFolder(options, platform, architecture); foreach (var file in binariesToCopyWin) Utilities.FileCopy(Path.Combine(buildDir, "lib", configuration, file), Path.Combine(depsFolder, Path.GetFileName(file))); + break; } - break; - } - case TargetPlatform.Linux: - { - // Build for Linux - var settings = new[] + case TargetPlatform.Linux: { - "--without-librtmp", - "--without-ssl", - "--with-gnutls", - "--disable-ipv6", - "--disable-manual", - "--disable-verbose", - "--disable-shared", - "--enable-static", - "-disable-ldap --disable-sspi --disable-ftp --disable-file --disable-dict --disable-telnet --disable-tftp --disable-rtsp --disable-pop3 --disable-imap --disable-smtp --disable-gopher --disable-smb", - }; - var envVars = new Dictionary - { - { "CC", "clang-" + Configuration.LinuxClangMinVer }, - { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, - { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, - }; - var buildDir = Path.Combine(root, "build"); - SetupDirectory(buildDir, true); - Utilities.Run("chmod", "+x configure", null, root, Utilities.RunOptions.DefaultTool); - Utilities.Run(Path.Combine(root, "configure"), string.Join(" ", settings) + " --prefix=\"" + buildDir + "\"", null, root, Utilities.RunOptions.DefaultTool, envVars); - Utilities.Run("make", null, null, root, Utilities.RunOptions.DefaultTool); - Utilities.Run("make", "install", null, root, Utilities.RunOptions.DefaultTool); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - var filename = "libcurl.a"; - Utilities.FileCopy(Path.Combine(buildDir, "lib", filename), Path.Combine(depsFolder, filename)); - break; - } - case TargetPlatform.Mac: - { - // Build for Mac - var settings = new[] - { - "--with-secure-transport", - "--without-librtmp", - "--disable-ipv6", - "--disable-manual", - "--disable-verbose", - "--disable-shared", - "--enable-static", - "-disable-ldap --disable-sspi --disable-ftp --disable-file --disable-dict --disable-telnet --disable-tftp --disable-rtsp --disable-pop3 --disable-imap --disable-smtp --disable-gopher --disable-smb", - }; - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + // Build for Linux + var settings = new[] + { + "--without-librtmp", + //"--without-ssl", + "--with-gnutls", + "--disable-ipv6", + "--disable-manual", + "--disable-verbose", + "--disable-shared", + "--enable-static", + "-disable-ldap --disable-sspi --disable-ftp --disable-file --disable-dict --disable-telnet --disable-tftp --disable-rtsp --disable-pop3 --disable-imap --disable-smtp --disable-gopher --disable-smb", + }; + var envVars = new Dictionary + { + { "CC", "clang-" + Configuration.LinuxClangMinVer }, + { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, + { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, + }; + var buildDir = Path.Combine(root, "build"); + SetupDirectory(buildDir, true); + Utilities.Run("chmod", "+x configure", null, root, Utilities.RunOptions.DefaultTool); + Utilities.Run(Path.Combine(root, "configure"), string.Join(" ", settings) + " --prefix=\"" + buildDir + "\"", null, root, Utilities.RunOptions.DefaultTool, envVars); + Utilities.Run("make", null, null, root, Utilities.RunOptions.DefaultTool); + Utilities.Run("make", "install", null, root, Utilities.RunOptions.DefaultTool); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); + var filename = "libcurl.a"; + Utilities.FileCopy(Path.Combine(buildDir, "lib", filename), Path.Combine(depsFolder, filename)); + break; + } + case TargetPlatform.Mac: { + // Build for Mac + var settings = new[] + { + "--with-secure-transport", + "--without-librtmp", + "--disable-ipv6", + "--disable-manual", + "--disable-verbose", + "--disable-shared", + "--enable-static", + "-disable-ldap --disable-sspi --disable-ftp --disable-file --disable-dict --disable-telnet --disable-tftp --disable-rtsp --disable-pop3 --disable-imap --disable-smtp --disable-gopher --disable-smb", + }; + var arch = GetAppleArchName(architecture); var archName = arch + "-apple-darwin19"; if (architecture == TargetArchitecture.ARM64) @@ -162,9 +190,9 @@ namespace Flax.Deps.Dependencies var depsFolder = GetThirdPartyFolder(options, platform, architecture); var filename = "libcurl.a"; Utilities.FileCopy(Path.Combine(buildDir, "lib", filename), Path.Combine(depsFolder, filename)); + break; + } } - break; - } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs b/Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs index 7017560fb..34fac56e0 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/dbghelp.cs @@ -30,27 +30,45 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - { - var sdk = WindowsPlatformBase.GetSDKs().Last(); - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + BuildStarted(platform, architecture); + switch (platform) { + case TargetPlatform.Windows: + { + var sdk = WindowsPlatformBase.GetSDKs().Last(); var depsFolder = GetThirdPartyFolder(options, platform, architecture); var libLocation = @$"{sdk.Value}Debuggers\lib\{architecture}\dbghelp.lib"; var dllLocation = @$"{sdk.Value}Debuggers\{architecture}\dbghelp.dll"; Utilities.FileCopy(libLocation, Path.Combine(depsFolder, Path.GetFileName(libLocation))); Utilities.FileCopy(dllLocation, Path.Combine(depsFolder, Path.GetFileName(dllLocation))); + break; + } } - break; - } } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs b/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs index 89ed09a72..d43c73770 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/freetype.cs @@ -15,40 +15,6 @@ namespace Flax.Deps.Dependencies /// class freetype : Dependency { - /// - public override TargetPlatform[] Platforms - { - get - { - switch (BuildPlatform) - { - case TargetPlatform.Windows: - return new[] - { - TargetPlatform.Windows, - TargetPlatform.XboxOne, - TargetPlatform.PS4, - TargetPlatform.PS5, - TargetPlatform.XboxScarlett, - TargetPlatform.Android, - TargetPlatform.Switch, - }; - case TargetPlatform.Linux: - return new[] - { - TargetPlatform.Linux, - }; - case TargetPlatform.Mac: - return new[] - { - TargetPlatform.Mac, - TargetPlatform.iOS, - }; - default: return new TargetPlatform[0]; - } - } - } - /// public override void Build(BuildOptions options) { @@ -94,171 +60,167 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - { - // Patch the RuntimeLibrary value - File.WriteAllText(vcxprojPath, vcxprojContents); - - // Build for Windows - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + BuildStarted(platform, architecture); + switch (platform) { + case TargetPlatform.Windows: + { + // Patch the RuntimeLibrary value + File.WriteAllText(vcxprojPath, vcxprojContents); + + // Build for Windows Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, architecture.ToString(), msvcProps); var depsFolder = GetThirdPartyFolder(options, platform, architecture); foreach (var filename in binariesToCopyMsvc) Utilities.FileCopy(Path.Combine(root, "objs", architecture.ToString(), configurationMsvc, filename), Path.Combine(depsFolder, filename)); + break; } - break; - } - case TargetPlatform.Linux: - { - var envVars = new Dictionary + case TargetPlatform.Linux: { - { "CC", "clang-" + Configuration.LinuxClangMinVer }, - { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, - { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, - }; + var envVars = new Dictionary + { + { "CC", "clang-" + Configuration.LinuxClangMinVer }, + { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, + { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, + }; - // Fix scripts - Utilities.Run("dos2unix", "autogen.sh", null, root, Utilities.RunOptions.ThrowExceptionOnError, envVars); - Utilities.Run("dos2unix", "configure", null, root, Utilities.RunOptions.ThrowExceptionOnError, envVars); - //Utilities.Run("sed", "-i -e \'s/\r$//\' autogen.sh", null, root, Utilities.RunOptions.ThrowExceptionOnError, envVars); - //Utilities.Run("sed", "-i -e \'s/\r$//\' configure", null, root, Utilities.RunOptions.ThrowExceptionOnError, envVars); - Utilities.Run("chmod", "+x autogen.sh", null, root, Utilities.RunOptions.ThrowExceptionOnError); - Utilities.Run("chmod", "+x configure", null, root, Utilities.RunOptions.ThrowExceptionOnError); + // Fix scripts + Utilities.Run("dos2unix", "autogen.sh", null, root, Utilities.RunOptions.ThrowExceptionOnError, envVars); + Utilities.Run("dos2unix", "configure", null, root, Utilities.RunOptions.ThrowExceptionOnError, envVars); + //Utilities.Run("sed", "-i -e \'s/\r$//\' autogen.sh", null, root, Utilities.RunOptions.ThrowExceptionOnError, envVars); + //Utilities.Run("sed", "-i -e \'s/\r$//\' configure", null, root, Utilities.RunOptions.ThrowExceptionOnError, envVars); + Utilities.Run("chmod", "+x autogen.sh", null, root, Utilities.RunOptions.ThrowExceptionOnError); + Utilities.Run("chmod", "+x configure", null, root, Utilities.RunOptions.ThrowExceptionOnError); - Utilities.Run(Path.Combine(root, "autogen.sh"), null, null, root, Utilities.RunOptions.ThrowExceptionOnError, envVars); + Utilities.Run(Path.Combine(root, "autogen.sh"), null, null, root, Utilities.RunOptions.ThrowExceptionOnError, envVars); - // Disable using libpng even if it's found on the system - var cmakeFile = Path.Combine(root, "CMakeLists.txt"); - File.WriteAllText(cmakeFile, - File.ReadAllText(cmakeFile) - .Replace("find_package(PNG)", "") - .Replace("find_package(ZLIB)", "") - .Replace("find_package(BZip2)", "") - ); + // Disable using libpng even if it's found on the system + var cmakeFile = Path.Combine(root, "CMakeLists.txt"); + File.WriteAllText(cmakeFile, + File.ReadAllText(cmakeFile) + .Replace("find_package(PNG)", "") + .Replace("find_package(ZLIB)", "") + .Replace("find_package(BZip2)", "") + ); - // Build for Linux - SetupDirectory(buildDir, true); - var toolchain = UnixToolchain.GetToolchainName(platform, TargetArchitecture.x64); - Utilities.Run("cmake", string.Format("-G \"Unix Makefiles\" -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DFT_WITH_BZIP2=OFF -DFT_WITH_ZLIB=OFF -DFT_WITH_PNG=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER_TARGET={0} ..", toolchain), null, buildDir, Utilities.RunOptions.DefaultTool, envVars); - Utilities.Run("cmake", "--build .", null, buildDir, Utilities.RunOptions.DefaultTool, envVars); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); - - break; - } - case TargetPlatform.PS4: - { - // Get the build data files - Utilities.DirectoryCopy( - Path.Combine(GetBinariesFolder(options, platform), "Data", "freetype"), - Path.Combine(root, "builds", "PS4"), false, true); - - // Build for PS4 - var solutionPath = Path.Combine(root, "builds", "PS4", "freetype.sln"); - Deploy.VCEnvironment.BuildSolution(solutionPath, "Release", "ORBIS"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - Utilities.FileCopy(Path.Combine(root, "lib", "PS4", libraryFileName), Path.Combine(depsFolder, libraryFileName)); - - break; - } - case TargetPlatform.PS5: - { - // Get the build data files - Utilities.DirectoryCopy( - Path.Combine(GetBinariesFolder(options, platform), "Data", "freetype"), - Path.Combine(root, "builds", "PS5"), false, true); - Utilities.ReplaceInFile(Path.Combine(root, "include\\freetype\\config\\ftstdlib.h"), "#define ft_getenv getenv", "char* ft_getenv(const char* n);"); - - // Build for PS5 - var solutionPath = Path.Combine(root, "builds", "PS5", "freetype.sln"); - Deploy.VCEnvironment.BuildSolution(solutionPath, "Release", "PROSPERO"); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - Utilities.FileCopy(Path.Combine(root, "lib", "PS5", libraryFileName), Path.Combine(depsFolder, libraryFileName)); - - break; - } - case TargetPlatform.XboxOne: - { - // Build for Xbox One x64 - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, "x64", msvcProps); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var filename in binariesToCopyMsvc) - Utilities.FileCopy(Path.Combine(root, "objs", "x64", configurationMsvc, filename), Path.Combine(depsFolder, filename)); - - break; - } - case TargetPlatform.XboxScarlett: - { - // Build for Xbox Scarlett - Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, "x64", msvcProps); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var filename in binariesToCopyMsvc) - Utilities.FileCopy(Path.Combine(root, "objs", "x64", configurationMsvc, filename), Path.Combine(depsFolder, filename)); - - break; - } - case TargetPlatform.Android: - { - // Disable using libpng even if it's found on the system - var cmakeFile = Path.Combine(root, "CMakeLists.txt"); - File.WriteAllText(cmakeFile, - File.ReadAllText(cmakeFile) - .Replace("find_package(PNG)", "") - .Replace("find_package(ZLIB)", "") - .Replace("find_package(BZip2)", "") - ); - - // Build for Android - SetupDirectory(buildDir, true); - RunCmake(buildDir, TargetPlatform.Android, TargetArchitecture.ARM64, ".. -DFT_WITH_BZIP2=OFF -DFT_WITH_ZLIB=OFF -DFT_WITH_PNG=OFF -DCMAKE_BUILD_TYPE=Release"); - BuildCmake(buildDir); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); - Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); - break; - } - case TargetPlatform.Switch: - { - // Build for Switch - SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_BUILD_TYPE=Release"); - BuildCmake(buildDir); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); - Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); - break; - } - case TargetPlatform.Mac: - { - // Build for Mac - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) - { + // Build for Linux SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, architecture, ".. -DCMAKE_BUILD_TYPE=Release"); + var toolchain = UnixToolchain.GetToolchainName(platform, architecture); + Utilities.Run("cmake", string.Format("-G \"Unix Makefiles\" -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DFT_WITH_BZIP2=OFF -DFT_WITH_ZLIB=OFF -DFT_WITH_PNG=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER_TARGET={0} ..", toolchain), null, buildDir, Utilities.RunOptions.DefaultTool, envVars); + Utilities.Run("cmake", "--build .", null, buildDir, Utilities.RunOptions.DefaultTool, envVars); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); + break; + } + case TargetPlatform.PS4: + { + // Get the build data files + Utilities.DirectoryCopy( + Path.Combine(GetBinariesFolder(options, platform), "Data", "freetype"), + Path.Combine(root, "builds", "PS4"), false, true); + + // Build for PS4 + var solutionPath = Path.Combine(root, "builds", "PS4", "freetype.sln"); + Deploy.VCEnvironment.BuildSolution(solutionPath, "Release", "ORBIS"); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); + Utilities.FileCopy(Path.Combine(root, "lib", "PS4", libraryFileName), Path.Combine(depsFolder, libraryFileName)); + + break; + } + case TargetPlatform.PS5: + { + // Get the build data files + Utilities.DirectoryCopy( + Path.Combine(GetBinariesFolder(options, platform), "Data", "freetype"), + Path.Combine(root, "builds", "PS5"), false, true); + Utilities.ReplaceInFile(Path.Combine(root, "include\\freetype\\config\\ftstdlib.h"), "#define ft_getenv getenv", "char* ft_getenv(const char* n);"); + + // Build for PS5 + var solutionPath = Path.Combine(root, "builds", "PS5", "freetype.sln"); + Deploy.VCEnvironment.BuildSolution(solutionPath, "Release", "PROSPERO"); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); + Utilities.FileCopy(Path.Combine(root, "lib", "PS5", libraryFileName), Path.Combine(depsFolder, libraryFileName)); + + break; + } + case TargetPlatform.XboxOne: + { + // Build for Xbox One x64 + Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, "x64", msvcProps); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); + foreach (var filename in binariesToCopyMsvc) + Utilities.FileCopy(Path.Combine(root, "objs", "x64", configurationMsvc, filename), Path.Combine(depsFolder, filename)); + + break; + } + case TargetPlatform.XboxScarlett: + { + // Build for Xbox Scarlett + Deploy.VCEnvironment.BuildSolution(vsSolutionPath, configurationMsvc, "x64", msvcProps); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); + foreach (var filename in binariesToCopyMsvc) + Utilities.FileCopy(Path.Combine(root, "objs", "x64", configurationMsvc, filename), Path.Combine(depsFolder, filename)); + + break; + } + case TargetPlatform.Android: + { + // Disable using libpng even if it's found on the system + var cmakeFile = Path.Combine(root, "CMakeLists.txt"); + File.WriteAllText(cmakeFile, + File.ReadAllText(cmakeFile) + .Replace("find_package(PNG)", "") + .Replace("find_package(ZLIB)", "") + .Replace("find_package(BZip2)", "") + ); + + // Build for Android + SetupDirectory(buildDir, true); + RunCmake(buildDir, TargetPlatform.Android, TargetArchitecture.ARM64, ".. -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DFT_WITH_BZIP2=OFF -DFT_WITH_ZLIB=OFF -DFT_WITH_PNG=OFF -DCMAKE_BUILD_TYPE=Release"); + BuildCmake(buildDir); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); + Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); + break; + } + case TargetPlatform.Switch: + { + // Build for Switch + SetupDirectory(buildDir, true); + RunCmake(buildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_BUILD_TYPE=Release"); + BuildCmake(buildDir); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); + Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); + break; + } + case TargetPlatform.Mac: + { + // Build for Mac + SetupDirectory(buildDir, true); + RunCmake(buildDir, platform, architecture, ".. -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_BUILD_TYPE=Release"); BuildCmake(buildDir); var depsFolder = GetThirdPartyFolder(options, platform, architecture); Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); + break; } - break; - } - case TargetPlatform.iOS: - { - // Fix archive creation issue due to missing ar tool - Utilities.ReplaceInFile(Path.Combine(root, "builds/cmake/iOS.cmake"), "set(CMAKE_SYSTEM_NAME Darwin)", "set(CMAKE_SYSTEM_NAME Darwin)\nset(CMAKE_AR ar CACHE FILEPATH \"\" FORCE)"); + case TargetPlatform.iOS: + { + // Fix archive creation issue due to missing ar tool + Utilities.ReplaceInFile(Path.Combine(root, "builds/cmake/iOS.cmake"), "set(CMAKE_SYSTEM_NAME Darwin)", "set(CMAKE_SYSTEM_NAME Darwin)\nset(CMAKE_AR ar CACHE FILEPATH \"\" FORCE)"); - // Fix freetype toolchain rejecting min iPhone version - Utilities.ReplaceInFile(Path.Combine(root, "builds/cmake/iOS.cmake"), "set(CMAKE_OSX_DEPLOYMENT_TARGET \"\"", "set(CMAKE_OSX_DEPLOYMENT_TARGET \"${CMAKE_OSX_DEPLOYMENT_TARGET}\""); + // Fix freetype toolchain rejecting min iPhone version + Utilities.ReplaceInFile(Path.Combine(root, "builds/cmake/iOS.cmake"), "set(CMAKE_OSX_DEPLOYMENT_TARGET \"\"", "set(CMAKE_OSX_DEPLOYMENT_TARGET \"${CMAKE_OSX_DEPLOYMENT_TARGET}\""); - // Build for iOS - SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, TargetArchitecture.ARM64, ".. -DIOS_PLATFORM=OS -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_BUILD_TYPE=Release -DFT_WITH_BZIP2=OFF -DFT_WITH_ZLIB=OFF -DFT_WITH_PNG=OFF"); - BuildCmake(buildDir); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); - Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); - break; - } + // Build for iOS + SetupDirectory(buildDir, true); + RunCmake(buildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DIOS_PLATFORM=OS -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_BUILD_TYPE=Release -DFT_WITH_BZIP2=OFF -DFT_WITH_ZLIB=OFF -DFT_WITH_PNG=OFF"); + BuildCmake(buildDir); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); + Utilities.FileCopy(Path.Combine(buildDir, libraryFileName), Path.Combine(depsFolder, libraryFileName)); + break; + } + } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs b/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs index a876083f8..32c14a037 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/glslang.cs @@ -1,5 +1,6 @@ // Copyright (c) Wojciech Figat. All rights reserved. +using System; using System.IO; using Flax.Build; @@ -38,13 +39,43 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + case TargetPlatform.Linux: + return new[] + { + TargetArchitecture.x64, + //TargetArchitecture.ARM64, + }; + case TargetPlatform.Mac: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { var root = options.IntermediateFolder; var installDir = Path.Combine(root, "install"); var configuration = "Release"; - var cmakeArgs = string.Format("-DCMAKE_INSTALL_PREFIX=\"{0}\" -DCMAKE_BUILD_TYPE={1} -DENABLE_RTTI=ON -DENABLE_CTEST=OFF -DENABLE_HLSL=ON -DENABLE_SPVREMAPPER=ON -DENABLE_GLSLANG_BINARIES=OFF", installDir, configuration); + var cmakeArgs = $"-DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_INSTALL_PREFIX=\"{installDir}\" -DCMAKE_BUILD_TYPE={configuration} -DENABLE_RTTI=ON -DENABLE_CTEST=OFF -DENABLE_HLSL=ON -DENABLE_SPVREMAPPER=ON -DENABLE_GLSLANG_BINARIES=OFF"; var libsRoot = Path.Combine(installDir, "lib"); // Get the source @@ -52,97 +83,93 @@ namespace Flax.Deps.Dependencies // Setup the external sources // Requires distutils (pip install setuptools) - Utilities.Run("python", "update_glslang_sources.py", null, root, Utilities.RunOptions.ConsoleLogOutput); + if (Utilities.Run(BuildPlatform != TargetPlatform.Mac ? "python" : "python3", "update_glslang_sources.py", null, root, Utilities.RunOptions.ConsoleLogOutput) != 0) + throw new Exception("Failed to update glslang sources, make sure setuptools python package is installed."); foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - { - var outputFiles = new[] - { - Path.Combine(libsRoot, "GenericCodeGen.lib"), - Path.Combine(libsRoot, "MachineIndependent.lib"), - Path.Combine(libsRoot, "HLSL.lib"), - Path.Combine(libsRoot, "OSDependent.lib"), - Path.Combine(libsRoot, "OGLCompiler.lib"), - Path.Combine(libsRoot, "SPIRV-Tools-opt.lib"), - Path.Combine(libsRoot, "SPIRV-Tools.lib"), - Path.Combine(libsRoot, "SPIRV.lib"), - Path.Combine(libsRoot, "glslang.lib"), - }; + BuildStarted(platform, architecture); - // Build for Windows - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + switch (platform) { - var buildDir = Path.Combine(root, "build-" + architecture.ToString()); + case TargetPlatform.Windows: + { + var outputFiles = new[] + { + Path.Combine(libsRoot, "GenericCodeGen.lib"), + Path.Combine(libsRoot, "MachineIndependent.lib"), + Path.Combine(libsRoot, "HLSL.lib"), + Path.Combine(libsRoot, "OSDependent.lib"), + Path.Combine(libsRoot, "OGLCompiler.lib"), + Path.Combine(libsRoot, "SPIRV-Tools-opt.lib"), + Path.Combine(libsRoot, "SPIRV-Tools.lib"), + Path.Combine(libsRoot, "SPIRV.lib"), + Path.Combine(libsRoot, "glslang.lib"), + }; + + // Build for Windows var solutionPath = Path.Combine(buildDir, "glslang.sln"); - SetupDirectory(buildDir, false); - RunCmake(root, platform, architecture, cmakeArgs + $" -B\"{buildDir}\""); - Utilities.Run("cmake", string.Format("--build . --config {0} --target install", configuration), null, buildDir, Utilities.RunOptions.ConsoleLogOutput); + RunCmake(root, platform, architecture, $"-B\"{buildDir}\" " + cmakeArgs); Deploy.VCEnvironment.BuildSolution(solutionPath, configuration, architecture.ToString()); + Utilities.Run("cmake", $"--build \"{buildDir}\" --config {configuration} --target install", null, buildDir, Utilities.RunOptions.ConsoleLogOutput); var depsFolder = GetThirdPartyFolder(options, platform, architecture); foreach (var file in outputFiles) { Utilities.FileCopy(file, Path.Combine(depsFolder, Path.GetFileName(file))); } + break; } - break; - } - case TargetPlatform.Linux: - { - var outputFiles = new[] + case TargetPlatform.Linux: { - Path.Combine(libsRoot, "libGenericCodeGen.a"), - Path.Combine(libsRoot, "libMachineIndependent.a"), - Path.Combine(libsRoot, "libHLSL.a"), - Path.Combine(libsRoot, "libOSDependent.a"), - Path.Combine(libsRoot, "libOGLCompiler.a"), - Path.Combine(libsRoot, "libSPIRV-Tools-opt.a"), - Path.Combine(libsRoot, "libSPIRV-Tools.a"), - Path.Combine(libsRoot, "libSPIRV.a"), - Path.Combine(libsRoot, "libglslang.a"), - }; - var buildDir = root; + var outputFiles = new[] + { + Path.Combine(libsRoot, "libGenericCodeGen.a"), + Path.Combine(libsRoot, "libMachineIndependent.a"), + Path.Combine(libsRoot, "libHLSL.a"), + Path.Combine(libsRoot, "libOSDependent.a"), + Path.Combine(libsRoot, "libOGLCompiler.a"), + Path.Combine(libsRoot, "libSPIRV-Tools-opt.a"), + Path.Combine(libsRoot, "libSPIRV-Tools.a"), + Path.Combine(libsRoot, "libSPIRV.a"), + Path.Combine(libsRoot, "libglslang.a"), + }; - // Build for Linux - RunCmake(root, platform, TargetArchitecture.x64, cmakeArgs); - Utilities.Run("cmake", string.Format("--build . --config {0} --target install", configuration), null, buildDir, Utilities.RunOptions.ConsoleLogOutput); - Utilities.Run("make", null, null, root, Utilities.RunOptions.ConsoleLogOutput); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in outputFiles) - { - var dst = Path.Combine(depsFolder, Path.GetFileName(file)); - Utilities.FileCopy(file, dst); - //Utilities.Run("strip", string.Format("-s \"{0}\"", dst), null, null, Utilities.RunOptions.ConsoleLogOutput); + // Build for Linux + RunCmake(root, platform, architecture, $"-B\"{buildDir}\" " + cmakeArgs); + Utilities.Run("make", null, null, buildDir, Utilities.RunOptions.ConsoleLogOutput); + Utilities.Run("cmake", $"--build \"{buildDir}\" --config {configuration} --target install", null, buildDir, Utilities.RunOptions.ConsoleLogOutput); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); + foreach (var file in outputFiles) + { + var dst = Path.Combine(depsFolder, Path.GetFileName(file)); + Utilities.FileCopy(file, dst); + //Utilities.Run("strip", string.Format("-s \"{0}\"", dst), null, null, Utilities.RunOptions.ConsoleLogOutput); + } + break; } - break; - } - case TargetPlatform.Mac: - { - var outputFiles = new[] + case TargetPlatform.Mac: { - Path.Combine(libsRoot, "libGenericCodeGen.a"), - Path.Combine(libsRoot, "libMachineIndependent.a"), - Path.Combine(libsRoot, "libHLSL.a"), - Path.Combine(libsRoot, "libOSDependent.a"), - Path.Combine(libsRoot, "libOGLCompiler.a"), - Path.Combine(libsRoot, "libSPIRV-Tools-opt.a"), - Path.Combine(libsRoot, "libSPIRV-Tools.a"), - Path.Combine(libsRoot, "libSPIRV.a"), - Path.Combine(libsRoot, "libglslang.a"), - }; - var buildDir = root; + var outputFiles = new[] + { + Path.Combine(libsRoot, "libGenericCodeGen.a"), + Path.Combine(libsRoot, "libMachineIndependent.a"), + Path.Combine(libsRoot, "libHLSL.a"), + Path.Combine(libsRoot, "libOSDependent.a"), + Path.Combine(libsRoot, "libOGLCompiler.a"), + Path.Combine(libsRoot, "libSPIRV-Tools-opt.a"), + Path.Combine(libsRoot, "libSPIRV-Tools.a"), + Path.Combine(libsRoot, "libSPIRV.a"), + Path.Combine(libsRoot, "libglslang.a"), + }; - // Build for Mac - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) - { - RunCmake(root, platform, architecture, cmakeArgs); - Utilities.Run("cmake", string.Format("--build . --config {0} --target install", configuration), null, buildDir, Utilities.RunOptions.ConsoleLogOutput); - Utilities.Run("make", null, null, root, Utilities.RunOptions.ConsoleLogOutput); + // Build for Mac + RunCmake(root, platform, architecture, $"-B\"{buildDir}\" " + cmakeArgs); + Utilities.Run("make", null, null, buildDir, Utilities.RunOptions.ConsoleLogOutput); + Utilities.Run("cmake", $"--build \"{buildDir}\" --config {configuration} --target install", null, buildDir, Utilities.RunOptions.ConsoleLogOutput); var depsFolder = GetThirdPartyFolder(options, platform, architecture); foreach (var file in outputFiles) { @@ -150,9 +177,9 @@ namespace Flax.Deps.Dependencies Utilities.FileCopy(file, dst); Utilities.Run("strip", string.Format("\"{0}\"", dst), null, null, Utilities.RunOptions.ConsoleLogOutput); } + break; + } } - break; - } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs b/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs index 57d2f74fe..a90d1c2a0 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/mono.cs @@ -53,6 +53,48 @@ namespace Flax.Deps.Dependencies } } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + case TargetPlatform.Linux: + return new[] + { + TargetArchitecture.x64, + //TargetArchitecture.ARM64, + }; + case TargetPlatform.Mac: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + case TargetPlatform.XboxOne: + case TargetPlatform.XboxScarlett: + return new[] + { + TargetArchitecture.x64, + }; + case TargetPlatform.Switch: + case TargetPlatform.Android: + return new[] + { + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } + private string root; private string monoPropsPath; private string monoPreprocesorDefines; diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs b/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs index 66909f6b9..f67244c9b 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/nethost.cs @@ -43,6 +43,9 @@ namespace Flax.Deps.Dependencies } } + /// + public override bool BuildByDefault => false; + private string root; private bool cleanArtifacts; @@ -349,24 +352,27 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { - BuildStarted(platform); - var platformData = Path.Combine(GetBinariesFolder(options, platform), "Data", "nethost"); - if (Directory.Exists(platformData)) - Utilities.DirectoryCopy(platformData, root, true, true); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.PS4: - case TargetPlatform.PS5: - case TargetPlatform.XboxOne: - case TargetPlatform.XboxScarlett: - Build(options, platform, TargetArchitecture.x64); + BuildStarted(platform, architecture); + var platformData = Path.Combine(GetBinariesFolder(options, platform), "Data", "nethost"); + if (Directory.Exists(platformData)) + Utilities.DirectoryCopy(platformData, root, true, true); + switch (platform) + { + case TargetPlatform.PS4: + case TargetPlatform.PS5: + case TargetPlatform.XboxOne: + case TargetPlatform.XboxScarlett: + Build(options, platform, TargetArchitecture.x64); break; - case TargetPlatform.Android: - Build(options, platform, TargetArchitecture.ARM64); + case TargetPlatform.Android: + Build(options, platform, TargetArchitecture.ARM64); break; - case TargetPlatform.Switch: - Build(options, platform, TargetArchitecture.ARM64); + case TargetPlatform.Switch: + Build(options, platform, TargetArchitecture.ARM64); break; + } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs b/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs index d1d94b4c1..6f18a9190 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/nvapi.cs @@ -18,6 +18,23 @@ namespace Flax.Deps.Dependencies get => new[] { TargetPlatform.Windows }; } + /// + public override TargetArchitecture[] Architectures + { + get + { + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64 + }; + default: return new TargetArchitecture[0]; + } + } + } + /// public override void Build(BuildOptions options) { @@ -30,7 +47,7 @@ namespace Flax.Deps.Dependencies // Copy files foreach (var platform in options.Platforms) { - BuildStarted(platform); + BuildStarted(platform, TargetArchitecture.x64); var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); Utilities.FileCopy(Path.Combine(root, "amd64/nvapi64.lib"), Path.Combine(depsFolder, "nvapi64.lib")); } diff --git a/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs b/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs index d22f8696f..15ca415da 100644 --- a/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs +++ b/Source/Tools/Flax.Build/Deps/Dependencies/vorbis.cs @@ -15,56 +15,24 @@ namespace Flax.Deps.Dependencies /// class vorbis : Dependency { - /// - public override TargetPlatform[] Platforms - { - get - { - switch (BuildPlatform) - { - case TargetPlatform.Windows: - return new[] - { - TargetPlatform.Windows, - TargetPlatform.XboxOne, - TargetPlatform.PS4, - TargetPlatform.PS5, - TargetPlatform.XboxScarlett, - TargetPlatform.Android, - TargetPlatform.Switch, - }; - case TargetPlatform.Linux: - return new[] - { - TargetPlatform.Linux, - }; - case TargetPlatform.Mac: - return new[] - { - TargetPlatform.Mac, - TargetPlatform.iOS, - }; - default: return new TargetPlatform[0]; - } - } - } - private struct Binary { public string Filename; public string SrcFolder; + public string DstFilename; - public Binary(string filename, string srcFolder) + public Binary(string filename, string srcFolder, string dstFilename = null) { Filename = filename; SrcFolder = srcFolder; + DstFilename = dstFilename; } } private bool hasSourcesReady; private string root; private string rootMsvcLib; - private string configurationMsvc; + private string _configuration = "Release"; private List vcxprojContentsWindows; private string[] vcxprojPathsWindows; @@ -74,22 +42,6 @@ namespace Flax.Deps.Dependencies new Binary("libvorbisfile_static.lib", "libvorbisfile"), }; - private (string, string)[] vorbisBinariesToCopyWindowsCmake = - { - ("vorbis.lib", "libvorbis_static.lib"), - ("vorbisfile.lib", "libvorbisfile_static.lib"), - }; - - private Binary[] oggBinariesToCopyWindows = - { - new Binary("libogg_static.lib", "ogg"), - }; - - private (string, string)[] oggBinariesToCopyWindowsCmake = - { - ("ogg.lib", "libogg_static.lib"), - }; - private void PatchWindowsTargetPlatformVersion(string windowsTargetPlatformVersion, string platformToolset) { // Fix the MSVC project settings for Windows @@ -107,7 +59,6 @@ namespace Flax.Deps.Dependencies return; hasSourcesReady = true; - configurationMsvc = "Release"; string oggRoot = Path.Combine(root, "libogg"); string vorbisRoot = Path.Combine(root, "libvorbis"); @@ -197,7 +148,7 @@ namespace Flax.Deps.Dependencies break; default: throw new InvalidArchitectureException(architecture); } - binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); + binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, _configuration)))); break; } case TargetPlatform.PS4: @@ -216,7 +167,7 @@ namespace Flax.Deps.Dependencies buildDir, true, true); Utilities.FileCopy(Path.Combine(GetBinariesFolder(options, platform), "Data", "ogg", "ogg", "config_types.h"), Path.Combine(root, "libogg", "include", "ogg", "config_types.h")); - binariesToCopy.AddRange(binariesToCopyVorbis.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); + binariesToCopy.AddRange(binariesToCopyVorbis.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, _configuration)))); break; } case TargetPlatform.PS5: @@ -237,7 +188,7 @@ namespace Flax.Deps.Dependencies Utilities.FileCopy( Path.Combine(GetBinariesFolder(options, platform), "Data", "ogg", "ogg", "config_types.h"), Path.Combine(root, "libogg", "include", "ogg", "config_types.h")); - binariesToCopy.AddRange(binariesToCopyVorbis.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); + binariesToCopy.AddRange(binariesToCopyVorbis.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, _configuration)))); break; } case TargetPlatform.XboxOne: @@ -245,21 +196,21 @@ namespace Flax.Deps.Dependencies vcxprojPaths = vcxprojPathsWindows; buildPlatform = "x64"; PatchWindowsTargetPlatformVersion("10.0", "v143"); - binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); + binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, _configuration)))); break; case TargetPlatform.XboxScarlett: buildDir = Path.Combine(rootMsvcLib, "win32", "VS2010"); vcxprojPaths = vcxprojPathsWindows; buildPlatform = "x64"; PatchWindowsTargetPlatformVersion("10.0", "v143"); - binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, configurationMsvc)))); + binariesToCopy.AddRange(vorbisBinariesToCopyWindows.Select(x => new Binary(x.Filename, Path.Combine(buildDir, x.SrcFolder, buildPlatform, _configuration)))); break; default: throw new InvalidPlatformException(platform); } // Build foreach (var vcxprojPath in vcxprojPaths) - Deploy.VCEnvironment.BuildSolution(vcxprojPath, configurationMsvc, buildPlatform); + Deploy.VCEnvironment.BuildSolution(vcxprojPath, _configuration, buildPlatform); // Copy binaries var depsFolder = GetThirdPartyFolder(options, platform, architecture); @@ -273,48 +224,109 @@ namespace Flax.Deps.Dependencies string oggRoot = Path.Combine(root, "libogg"); string vorbisRoot = Path.Combine(root, "libvorbis"); - var oggBuildDir = Path.Combine(oggRoot, "build-" + architecture.ToString()); var vorbisBuildDir = Path.Combine(vorbisRoot, "build-" + architecture.ToString()); + var installDir = Path.Combine(root, "install"); string ext; + string oggConfig = $"-DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_BUILD_TYPE={_configuration} -DCMAKE_INSTALL_PREFIX=\"{installDir}\""; + string vorbisConfig = $"-DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_BUILD_TYPE={_configuration} -DCMAKE_INSTALL_PREFIX=\"{installDir}\""; + string liboggFilename = "libogg"; + Dictionary envVars = new Dictionary(); + (string, string)[] oggBinariesToCopy; + Binary[] vorbisBinariesToCopy; switch (platform) { case TargetPlatform.Windows: case TargetPlatform.UWP: case TargetPlatform.XboxOne: + oggConfig += " -DBUILD_SHARED_LIBS=OFF"; + vorbisConfig += " -DBUILD_SHARED_LIBS=OFF"; ext = ".lib"; + liboggFilename = "ogg"; break; case TargetPlatform.Linux: + oggConfig += " -DCMAKE_POSITION_INDEPENDENT_CODE=ON"; + vorbisConfig += " -DCMAKE_POSITION_INDEPENDENT_CODE=ON"; + envVars = new Dictionary + { + { "CC", "clang-" + Configuration.LinuxClangMinVer }, + { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, + { "CXX", "clang++-" + Configuration.LinuxClangMinVer }, + { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, + }; + ext = ".a"; + break; + case TargetPlatform.Mac: + //oggConfig += $" -DOGG_INCLUDE_DIR=\"{oggRoot}/install/include\" -DOGG_LIBRARY=\"{oggRoot}/install/lib\""; ext = ".a"; break; default: throw new InvalidPlatformException(platform); } - var binariesToCopy = new List<(string, string)>(); - - // Build ogg + switch (platform) { - var solutionPath = Path.Combine(oggBuildDir, "ogg.sln"); - - RunCmake(oggRoot, platform, architecture, $"-B\"{oggBuildDir}\" -DBUILD_SHARED_LIBS=OFF"); - Deploy.VCEnvironment.BuildSolution(solutionPath, configurationMsvc, architecture.ToString()); - foreach (var file in oggBinariesToCopyWindowsCmake) - binariesToCopy.Add((Path.Combine(oggBuildDir, configurationMsvc, file.Item1), file.Item2)); + case TargetPlatform.Windows: + case TargetPlatform.UWP: + case TargetPlatform.XboxOne: + oggBinariesToCopy = + [ + ("ogg.lib", "libogg_static.lib") + ]; + vorbisBinariesToCopy = + [ + new Binary("vorbis.lib", "libvorbis", "libvorbis_static.lib"), + new Binary("vorbisfile.lib", "libvorbisfile", "libvorbisfile_static.lib") + ]; + break; + case TargetPlatform.Linux: + case TargetPlatform.Mac: + oggBinariesToCopy = + [ + ("libogg.a", "libogg.a") + ]; + vorbisBinariesToCopy = + [ + new Binary("libvorbis.a", "lib"), + new Binary("libvorbisenc.a", "lib"), + new Binary("libvorbisfile.a", "lib") + ]; + break; + default: throw new InvalidPlatformException(platform); } + vorbisConfig += $" -DOGG_INCLUDE_DIR=\"{Path.Combine(installDir, "include")}\" -DOGG_LIBRARY=\"{Path.Combine(installDir, "lib", liboggFilename + ext)}\""; + + var binariesToCopy = new List<(string, string)>(); + + SetupDirectory(installDir, true); + // Build ogg + { + SetupDirectory(oggBuildDir, true); + RunCmake(oggRoot, platform, architecture, $"-B\"{oggBuildDir}\" " + oggConfig, envVars); + if (platform == TargetPlatform.Windows) + Deploy.VCEnvironment.BuildSolution(Path.Combine(oggBuildDir, "ogg.sln"), _configuration, architecture.ToString()); + else + BuildCmake(oggBuildDir); + Utilities.Run("cmake", $"--build . --config {_configuration} --target install", null, oggBuildDir, Utilities.RunOptions.DefaultTool); + } // Build vorbis { - var oggLibraryPath = Path.Combine(oggBuildDir, configurationMsvc, "ogg" + ext); - var solutionPath = Path.Combine(vorbisBuildDir, "vorbis.sln"); - - RunCmake(vorbisRoot, platform, architecture, $"-B\"{vorbisBuildDir}\" -DOGG_INCLUDE_DIR=\"{Path.Combine(oggRoot, "include")}\" -DOGG_LIBRARY=\"{oggLibraryPath}\" -DBUILD_SHARED_LIBS=OFF"); - Deploy.VCEnvironment.BuildSolution(solutionPath, configurationMsvc, architecture.ToString()); - foreach (var file in vorbisBinariesToCopyWindowsCmake) - binariesToCopy.Add((Path.Combine(vorbisBuildDir, "lib", configurationMsvc, file.Item1), file.Item2)); + SetupDirectory(vorbisBuildDir, true); + RunCmake(vorbisRoot, platform, architecture, $"-B\"{vorbisBuildDir}\" " + vorbisConfig); + if (platform == TargetPlatform.Windows) + Deploy.VCEnvironment.BuildSolution(Path.Combine(vorbisBuildDir, "vorbis.sln"), _configuration, architecture.ToString()); + else + BuildCmake(vorbisBuildDir); + Utilities.Run("cmake", $"--build . --config {_configuration} --target install", null, vorbisBuildDir, Utilities.RunOptions.DefaultTool); } // Copy binaries + foreach (var file in oggBinariesToCopy) + binariesToCopy.Add((Path.Combine(installDir, "lib", file.Item1), file.Item2)); + foreach (var file in vorbisBinariesToCopy) + binariesToCopy.Add((Path.Combine(installDir, "lib", file.Filename), file.DstFilename ?? file.Filename)); + var depsFolder = GetThirdPartyFolder(options, platform, architecture); foreach (var file in binariesToCopy) Utilities.FileCopy(file.Item1, Path.Combine(depsFolder, file.Item2)); @@ -337,203 +349,140 @@ namespace Flax.Deps.Dependencies foreach (var platform in options.Platforms) { - BuildStarted(platform); - switch (platform) + foreach (var architecture in options.Architectures) { - case TargetPlatform.Windows: - { - BuildCmake(options, TargetPlatform.Windows, TargetArchitecture.x64); - BuildCmake(options, TargetPlatform.Windows, TargetArchitecture.ARM64); - break; - } - case TargetPlatform.UWP: - { - BuildMsbuild(options, TargetPlatform.UWP, TargetArchitecture.x64); - break; - } - case TargetPlatform.XboxOne: - { - BuildMsbuild(options, TargetPlatform.XboxOne, TargetArchitecture.x64); - break; - } - case TargetPlatform.Linux: - { - // Note: assumes the libogg-dev package is pre-installed on the system - - // Get the source - CloneGitRepoFast(root, "https://github.com/xiph/vorbis.git"); - - var envVars = new Dictionary + BuildStarted(platform, architecture); + switch (platform) { - { "CC", "clang-" + Configuration.LinuxClangMinVer }, - { "CC_FOR_BUILD", "clang-" + Configuration.LinuxClangMinVer }, - { "CXX", "clang++-" + Configuration.LinuxClangMinVer }, - { "CMAKE_BUILD_PARALLEL_LEVEL", CmakeBuildParallel }, - }; - var buildDir = Path.Combine(root, "build"); - - Utilities.Run(Path.Combine(root, "autogen.sh"), null, null, root, Utilities.RunOptions.DefaultTool, envVars); - - // Build for Linux - var toolchain = UnixToolchain.GetToolchainName(platform, TargetArchitecture.x64); - Utilities.Run(Path.Combine(root, "configure"), string.Format("--host={0}", toolchain), null, root, Utilities.RunOptions.ThrowExceptionOnError, envVars); - SetupDirectory(buildDir, true); - Utilities.Run("cmake", "-G \"Unix Makefiles\" -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=Release ..", null, buildDir, Utilities.RunOptions.ConsoleLogOutput, envVars); - Utilities.Run("cmake", "--build .", null, buildDir, Utilities.RunOptions.ConsoleLogOutput, envVars); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.x64); - foreach (var file in binariesToCopyUnix) - Utilities.FileCopy(Path.Combine(buildDir, file.SrcFolder, file.Filename), Path.Combine(depsFolder, file.Filename)); - break; - } - case TargetPlatform.PS4: - { - BuildMsbuild(options, TargetPlatform.PS4, TargetArchitecture.x64); - break; - } - case TargetPlatform.PS5: - { - BuildMsbuild(options, TargetPlatform.PS5, TargetArchitecture.x64); - break; - } - case TargetPlatform.XboxScarlett: - { - BuildMsbuild(options, TargetPlatform.XboxScarlett, TargetArchitecture.x64); - break; - } - case TargetPlatform.Android: - { - var oggRoot = Path.Combine(root, "ogg"); - var oggBuildDir = Path.Combine(oggRoot, "build"); - var buildDir = Path.Combine(root, "build"); - - // Get the source - CloneGitRepoFast(root, "https://github.com/xiph/vorbis.git"); - CloneGitRepo(oggRoot, "https://github.com/xiph/ogg.git"); - GitCheckout(oggRoot, "master", "4380566a44b8d5e85ad511c9c17eb04197863ec5"); - - // Build for Android - SetupDirectory(oggBuildDir, true); - RunCmake(oggBuildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=\"../install\""); - Utilities.Run("cmake", "--build . --target install", null, oggBuildDir, Utilities.RunOptions.ConsoleLogOutput); - SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, TargetArchitecture.ARM64, string.Format(".. -DCMAKE_BUILD_TYPE=Release -DOGG_INCLUDE_DIR=\"{0}/install/include\" -DOGG_LIBRARY=\"{0}/install/lib\"", oggRoot)); - BuildCmake(buildDir); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); - foreach (var file in binariesToCopyUnix) - Utilities.FileCopy(Path.Combine(buildDir, file.SrcFolder, file.Filename), Path.Combine(depsFolder, file.Filename)); - break; - } - case TargetPlatform.Switch: - { - var oggRoot = Path.Combine(root, "ogg"); - var oggBuildDir = Path.Combine(oggRoot, "build"); - var buildDir = Path.Combine(root, "build"); - - // Get the source - SetupDirectory(oggRoot, false); - CloneGitRepo(root, "https://github.com/xiph/vorbis.git"); - GitCheckout(root, "master", "98eddc72d36e3421519d54b101c09b57e4d4d10d"); - CloneGitRepo(oggRoot, "https://github.com/xiph/ogg.git"); - GitCheckout(oggRoot, "master", "4380566a44b8d5e85ad511c9c17eb04197863ec5"); - Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data/ogg"), oggRoot, true, true); - Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data/vorbis"), buildDir, true, true); - - // Build for Switch - SetupDirectory(oggBuildDir, true); - RunCmake(oggBuildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=\"../install\""); - Utilities.Run("cmake", "--build . --target install", null, oggBuildDir, Utilities.RunOptions.ConsoleLogOutput); - Utilities.FileCopy(Path.Combine(GetBinariesFolder(options, platform), "Data/ogg", "include", "ogg", "config_types.h"), Path.Combine(oggRoot, "install", "include", "ogg", "config_types.h")); - SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, TargetArchitecture.ARM64, string.Format(".. -DCMAKE_BUILD_TYPE=Release -DOGG_INCLUDE_DIR=\"{0}/install/include\" -DOGG_LIBRARY=\"{0}/install/lib\"", oggRoot)); - BuildCmake(buildDir); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); - foreach (var file in binariesToCopyUnix) - Utilities.FileCopy(Path.Combine(buildDir, file.SrcFolder, file.Filename), Path.Combine(depsFolder, file.Filename)); - break; - } - case TargetPlatform.Mac: - { - var oggRoot = Path.Combine(root, "ogg"); - var oggBuildDir = Path.Combine(oggRoot, "build"); - var buildDir = Path.Combine(root, "build"); - - // Get the source - CloneGitRepoFast(root, "https://github.com/xiph/vorbis.git"); - CloneGitRepo(oggRoot, "https://github.com/xiph/ogg.git"); - GitCheckout(oggRoot, "master", "4380566a44b8d5e85ad511c9c17eb04197863ec5"); - - // Build for Mac - foreach (var architecture in new[] { TargetArchitecture.x64, TargetArchitecture.ARM64 }) + case TargetPlatform.Windows: { + BuildCmake(options, TargetPlatform.Windows, architecture); + break; + } + case TargetPlatform.UWP: + { + BuildMsbuild(options, TargetPlatform.UWP, architecture); + break; + } + case TargetPlatform.XboxOne: + { + BuildMsbuild(options, TargetPlatform.XboxOne, architecture); + break; + } + case TargetPlatform.Linux: + { + BuildCmake(options, TargetPlatform.Linux, architecture); + break; + } + case TargetPlatform.PS4: + { + BuildMsbuild(options, TargetPlatform.PS4, TargetArchitecture.x64); + break; + } + case TargetPlatform.PS5: + { + BuildMsbuild(options, TargetPlatform.PS5, TargetArchitecture.x64); + break; + } + case TargetPlatform.XboxScarlett: + { + BuildMsbuild(options, TargetPlatform.XboxScarlett, TargetArchitecture.x64); + break; + } + case TargetPlatform.Android: + { + var oggRoot = Path.Combine(root, "ogg"); + var oggBuildDir = Path.Combine(oggRoot, "build"); + var buildDir = Path.Combine(root, "build"); + + // Get the source + CloneGitRepoFast(root, "https://github.com/xiph/vorbis.git"); + CloneGitRepo(oggRoot, "https://github.com/xiph/ogg.git"); + GitCheckout(oggRoot, "master", "4380566a44b8d5e85ad511c9c17eb04197863ec5"); + + // Build for Android SetupDirectory(oggBuildDir, true); - RunCmake(oggBuildDir, platform, architecture, ".. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=\"../install\""); - Utilities.Run("cmake", "--build . --target install", null, oggBuildDir, Utilities.RunOptions.ConsoleLogOutput); + RunCmake(oggBuildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=\"../install\""); + Utilities.Run("cmake", "--build . --config Release --target install", null, oggBuildDir, Utilities.RunOptions.ConsoleLogOutput); SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, architecture, string.Format(".. -DCMAKE_BUILD_TYPE=Release -DOGG_INCLUDE_DIR=\"{0}/install/include\" -DOGG_LIBRARY=\"{0}/install/lib\"", oggRoot)); + RunCmake(buildDir, platform, TargetArchitecture.ARM64, string.Format(".. -DCMAKE_BUILD_TYPE=Release -DOGG_INCLUDE_DIR=\"{0}/install/include\" -DOGG_LIBRARY=\"{0}/install/lib\"", oggRoot)); BuildCmake(buildDir); - var depsFolder = GetThirdPartyFolder(options, platform, architecture); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); foreach (var file in binariesToCopyUnix) Utilities.FileCopy(Path.Combine(buildDir, file.SrcFolder, file.Filename), Path.Combine(depsFolder, file.Filename)); + break; } - break; - } - case TargetPlatform.iOS: - { - var oggRoot = Path.Combine(root, "ogg"); - var oggBuildDir = Path.Combine(oggRoot, "build"); - var buildDir = Path.Combine(root, "build"); + case TargetPlatform.Switch: + { + var oggRoot = Path.Combine(root, "ogg"); + var oggBuildDir = Path.Combine(oggRoot, "build"); + var buildDir = Path.Combine(root, "build"); - // Get the source - CloneGitRepoFast(root, "https://github.com/xiph/vorbis.git"); - CloneGitRepo(oggRoot, "https://github.com/xiph/ogg.git"); - GitCheckout(oggRoot, "master", "4380566a44b8d5e85ad511c9c17eb04197863ec5"); + // Get the source + SetupDirectory(oggRoot, false); + CloneGitRepo(root, "https://github.com/xiph/vorbis.git"); + GitCheckout(root, "master", "98eddc72d36e3421519d54b101c09b57e4d4d10d"); + CloneGitRepo(oggRoot, "https://github.com/xiph/ogg.git"); + GitCheckout(oggRoot, "master", "4380566a44b8d5e85ad511c9c17eb04197863ec5"); + Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data/ogg"), oggRoot, true, true); + Utilities.DirectoryCopy(Path.Combine(GetBinariesFolder(options, platform), "Data/vorbis"), buildDir, true, true); - // Build for Mac - SetupDirectory(oggBuildDir, true); - RunCmake(oggBuildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=\"../install\""); - Utilities.Run("cmake", "--build . --target install", null, oggBuildDir, Utilities.RunOptions.ConsoleLogOutput); - SetupDirectory(buildDir, true); - RunCmake(buildDir, platform, TargetArchitecture.ARM64, string.Format(".. -DCMAKE_BUILD_TYPE=Release -DOGG_INCLUDE_DIR=\"{0}/install/include\" -DOGG_LIBRARY=\"{0}/install/lib\"", oggRoot)); - BuildCmake(buildDir); - var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); - foreach (var file in binariesToCopyUnix) - Utilities.FileCopy(Path.Combine(buildDir, file.SrcFolder, file.Filename), Path.Combine(depsFolder, file.Filename)); - break; - } + // Build for Switch + SetupDirectory(oggBuildDir, true); + RunCmake(oggBuildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=\"../install\""); + Utilities.Run("cmake", "--build . --config Release --target install", null, oggBuildDir, Utilities.RunOptions.ConsoleLogOutput); + Utilities.FileCopy(Path.Combine(GetBinariesFolder(options, platform), "Data/ogg", "include", "ogg", "config_types.h"), Path.Combine(oggRoot, "install", "include", "ogg", "config_types.h")); + SetupDirectory(buildDir, true); + RunCmake(buildDir, platform, TargetArchitecture.ARM64, string.Format(".. -DCMAKE_BUILD_TYPE=Release -DOGG_INCLUDE_DIR=\"{0}/install/include\" -DOGG_LIBRARY=\"{0}/install/lib\"", oggRoot)); + BuildCmake(buildDir); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); + foreach (var file in binariesToCopyUnix) + Utilities.FileCopy(Path.Combine(buildDir, file.SrcFolder, file.Filename), Path.Combine(depsFolder, file.Filename)); + break; + } + case TargetPlatform.Mac: + { + BuildCmake(options, TargetPlatform.Mac, architecture); + break; + } + case TargetPlatform.iOS: + { + var oggRoot = Path.Combine(root, "ogg"); + var oggBuildDir = Path.Combine(oggRoot, "build"); + var buildDir = Path.Combine(root, "build"); + + // Get the source + CloneGitRepoFast(root, "https://github.com/xiph/vorbis.git"); + CloneGitRepo(oggRoot, "https://github.com/xiph/ogg.git"); + GitCheckout(oggRoot, "master", "4380566a44b8d5e85ad511c9c17eb04197863ec5"); + + // Build for Mac + SetupDirectory(oggBuildDir, true); + RunCmake(oggBuildDir, platform, TargetArchitecture.ARM64, ".. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=\"../install\""); + Utilities.Run("cmake", "--build . --config Release --target install", null, oggBuildDir, Utilities.RunOptions.ConsoleLogOutput); + SetupDirectory(buildDir, true); + RunCmake(buildDir, platform, TargetArchitecture.ARM64, string.Format(".. -DCMAKE_BUILD_TYPE=Release -DOGG_INCLUDE_DIR=\"{0}/install/include\" -DOGG_LIBRARY=\"{0}/install/lib\"", oggRoot)); + BuildCmake(buildDir); + var depsFolder = GetThirdPartyFolder(options, platform, TargetArchitecture.ARM64); + foreach (var file in binariesToCopyUnix) + Utilities.FileCopy(Path.Combine(buildDir, file.SrcFolder, file.Filename), Path.Combine(depsFolder, file.Filename)); + break; + } + } } } - // Backup files - if (hasSourcesReady) - root = rootMsvcLib; - var srcIncludePath = Path.Combine(root, "include", "vorbis"); - var dstIncludePath = Path.Combine(options.ThirdPartyFolder, "vorbis"); - foreach (var filename in filesToKeep) - { - var src = Path.Combine(dstIncludePath, filename); - var dst = Path.Combine(options.IntermediateFolder, filename + ".tmp"); - Utilities.FileCopy(src, dst); - } + // Setup headers directory + var installDir = Path.Combine(root, "install"); + var oggOut = Path.Combine(options.ThirdPartyFolder, "ogg"); + var vorbisOut = Path.Combine(options.ThirdPartyFolder, "vorbis"); - try - { - // Setup headers directory - SetupDirectory(dstIncludePath, true); + // Deploy header files + Utilities.DirectoryCopy(Path.Combine(installDir, "include", "ogg"), oggOut, true, true); + Utilities.DirectoryCopy(Path.Combine(installDir, "include", "vorbis"), vorbisOut, true, true); - // Deploy header files and restore files - Directory.GetFiles(srcIncludePath, "Makefile*").ToList().ForEach(File.Delete); - Utilities.DirectoryCopy(srcIncludePath, dstIncludePath, true, true); - Utilities.FileCopy(Path.Combine(root, "COPYING"), Path.Combine(dstIncludePath, "COPYING")); - } - finally - { - foreach (var filename in filesToKeep) - { - var src = Path.Combine(options.IntermediateFolder, filename + ".tmp"); - var dst = Path.Combine(dstIncludePath, filename); - Utilities.FileCopy(src, dst); - } - } + Utilities.FileCopy(Path.Combine(root, "libogg", "COPYING"), Path.Combine(oggOut, "COPYING")); + Utilities.FileCopy(Path.Combine(root, "libvorbis", "COPYING"), Path.Combine(vorbisOut, "COPYING")); } } } diff --git a/Source/Tools/Flax.Build/Deps/Dependency.cs b/Source/Tools/Flax.Build/Deps/Dependency.cs index a52b78835..842a57b20 100644 --- a/Source/Tools/Flax.Build/Deps/Dependency.cs +++ b/Source/Tools/Flax.Build/Deps/Dependency.cs @@ -40,6 +40,11 @@ namespace Flax.Deps /// The target platforms to build dependency for (contains only platforms supported by the dependency itself). /// public TargetPlatform[] Platforms; + + /// + /// The target architectures to build dependency for (contains only platforms supported by the dependency itself). + /// + public TargetArchitecture[] Architectures; } /// @@ -47,7 +52,6 @@ namespace Flax.Deps /// protected static TargetPlatform BuildPlatform => Platform.BuildPlatform.Target; - private static Version? _cmakeVersion; protected static Version CMakeVersion { @@ -55,11 +59,19 @@ namespace Flax.Deps { if (_cmakeVersion == null) { - var versionOutput = Utilities.ReadProcessOutput("cmake", "--version"); - var versionStart = versionOutput.IndexOf("cmake version ") + "cmake version ".Length; - var versionEnd = versionOutput.IndexOfAny(['-', '\n', '\r'], versionStart); // End of line or dash before Git hash - var versionString = versionOutput.Substring(versionStart, versionEnd - versionStart); - _cmakeVersion = new Version(versionString); + try + { + var versionOutput = Utilities.ReadProcessOutput("cmake", "--version"); + var versionStart = versionOutput.IndexOf("cmake version ") + "cmake version ".Length; + var versionEnd = versionOutput.IndexOfAny(['-', '\n', '\r'], versionStart); // End of line or dash before Git hash + var versionString = versionOutput.Substring(versionStart, versionEnd - versionStart); + _cmakeVersion = new Version(versionString); + } + catch (Exception) + { + // Assume old version by default (in case of errors) + _cmakeVersion = new Version(3, 0); + } } return _cmakeVersion; } @@ -68,7 +80,95 @@ namespace Flax.Deps /// /// Gets the platforms list supported by this dependency to build on the current build platform (based on ). /// - public abstract TargetPlatform[] Platforms { get; } + public virtual TargetPlatform[] Platforms + { + get + { + // The most common build setup + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetPlatform.Windows, + TargetPlatform.XboxOne, + TargetPlatform.XboxScarlett, + TargetPlatform.PS4, + TargetPlatform.PS5, + TargetPlatform.Android, + TargetPlatform.Switch, + }; + case TargetPlatform.Linux: + return new[] + { + TargetPlatform.Linux, + }; + case TargetPlatform.Mac: + return new[] + { + TargetPlatform.Mac, + TargetPlatform.iOS, + }; + default: return new TargetPlatform[0]; + } + } + } + + /// + /// Gets the architectures list supported by this dependency to build on the current build platform (based on ). + /// + public virtual TargetArchitecture[] Architectures + { + get + { + // Default value returns all supported architectures for all supported platforms + switch (BuildPlatform) + { + case TargetPlatform.Windows: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + case TargetPlatform.Linux: + return new[] + { + TargetArchitecture.x64, + //TargetArchitecture.ARM64, + }; + case TargetPlatform.Mac: + return new[] + { + TargetArchitecture.x64, + TargetArchitecture.ARM64, + }; + case TargetPlatform.XboxOne: + case TargetPlatform.XboxScarlett: + case TargetPlatform.PS4: + case TargetPlatform.PS5: + return new[] + { + TargetArchitecture.x64, + }; + case TargetPlatform.Switch: + return new[] + { + TargetArchitecture.ARM64, + }; + case TargetPlatform.Android: + return new[] + { + TargetArchitecture.ARM64, + }; + case TargetPlatform.iOS: + return new[] + { + TargetArchitecture.ARM64, + }; + default: return new TargetArchitecture[0]; + } + } + } /// /// True if build dependency by default, otherwise only when explicitly specified via command line. @@ -85,9 +185,9 @@ namespace Flax.Deps /// Logs build process start. /// /// Target platform. - protected void BuildStarted(TargetPlatform platform) + protected void BuildStarted(TargetPlatform platform, TargetArchitecture architecture) { - Log.Info($"Building {GetType().Name} for {platform}"); + Log.Info($"Building {GetType().Name} for {platform}{(architecture != TargetArchitecture.AnyCPU ? $" ({architecture})" : "")}"); } /// diff --git a/Source/Tools/Flax.Build/Deps/DepsBuilder.cs b/Source/Tools/Flax.Build/Deps/DepsBuilder.cs index c43c39ea3..1b8389080 100644 --- a/Source/Tools/Flax.Build/Deps/DepsBuilder.cs +++ b/Source/Tools/Flax.Build/Deps/DepsBuilder.cs @@ -38,20 +38,21 @@ namespace Flax.Deps var platforms = Globals.AllPlatforms; if (Configuration.BuildPlatforms != null && Configuration.BuildPlatforms.Length != 0) platforms = Configuration.BuildPlatforms; - platforms = platforms.Where(x => buildPlatform.CanBuildPlatform(x)).ToArray(); - Log.Verbose("Building deps for platforms:"); + platforms = platforms.Where(buildPlatform.CanBuildPlatform).ToArray(); + var architectures = Globals.AllArchitectures; + if (Configuration.BuildArchitectures != null && Configuration.BuildArchitectures.Length != 0) + architectures = Configuration.BuildArchitectures; + architectures = architectures.Where(buildPlatform.CanBuildArchitecture).ToArray(); + Log.Verbose($"Building deps for platforms {string.Join(',', platforms)}, {string.Join(',', architectures)}:"); foreach (var platform in platforms) { - Log.Verbose(" - " + platform); + foreach (var architecture in architectures) + { + Log.Verbose($" - {platform} ({architecture})"); - if (Platform.IsPlatformSupported(platform, TargetArchitecture.x64)) - SetupDepsOutputFolder(options, platform, TargetArchitecture.x64); - if (Platform.IsPlatformSupported(platform, TargetArchitecture.x86)) - SetupDepsOutputFolder(options, platform, TargetArchitecture.x86); - if (Platform.IsPlatformSupported(platform, TargetArchitecture.ARM)) - SetupDepsOutputFolder(options, platform, TargetArchitecture.ARM); - if (Platform.IsPlatformSupported(platform, TargetArchitecture.ARM64)) - SetupDepsOutputFolder(options, platform, TargetArchitecture.ARM64); + if (Platform.IsPlatformSupported(platform, architecture)) + SetupDepsOutputFolder(options, platform, architecture); + } } // Get all deps @@ -80,6 +81,14 @@ namespace Flax.Deps continue; } + options.Architectures = architectures.Intersect(dependency.Architectures).ToArray(); + if (options.Architectures.Length == 0) + { + Log.Info(string.Format("Skipping {0} ({1}/{2})", name, i + 1, dependencies.Length)); + Log.Verbose("Architecture not used on any of the build platforms."); + continue; + } + Log.Info(string.Format("Building {0} ({1}/{2})", name, i + 1, dependencies.Length)); options.IntermediateFolder = Path.Combine(Environment.CurrentDirectory, "Cache", "Intermediate", "Deps", name).Replace('\\', '/'); diff --git a/Source/Tools/Flax.Build/Utilities/Utilities.cs b/Source/Tools/Flax.Build/Utilities/Utilities.cs index 049d1be2d..7571ea0a0 100644 --- a/Source/Tools/Flax.Build/Utilities/Utilities.cs +++ b/Source/Tools/Flax.Build/Utilities/Utilities.cs @@ -42,21 +42,14 @@ namespace Flax.Build } /// - /// Restores a targets nuget packages. + /// Gets the NuGet packages cache folder path. /// - /// The task graph. - /// The target. - /// The dotnet path. - public static void RestoreNugetPackages(Graph.TaskGraph graph, Target target) + /// The path. + public static string GetNugetPackagesPath() { - var dotNetPath = GetDotNetPath(); - var task = graph.Add(); - task.WorkingDirectory = target.FolderPath; - task.InfoMessage = $"Restoring Nuget Packages for {target.Name}"; - task.CommandPath = dotNetPath; - task.CommandArguments = $"restore"; + return Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), ".nuget", "packages"); } - + /// /// Gets the hash code for the string (the same for all platforms). Matches Engine algorithm for string hashing. ///