From 2bec653b81a48a9043df3fbab1766ed9f4bdeba7 Mon Sep 17 00:00:00 2001 From: Edu Garcia <28616+Arcnor@users.noreply.github.com> Date: Sat, 7 Oct 2023 10:12:37 +0100 Subject: [PATCH 01/53] Add color picker option to accept changes on dismissal --- Source/Editor/GUI/Dialogs/ColorPickerDialog.cs | 5 ++++- Source/Editor/Options/InterfaceOptions.cs | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Source/Editor/GUI/Dialogs/ColorPickerDialog.cs b/Source/Editor/GUI/Dialogs/ColorPickerDialog.cs index 161b3f4ae..0995635b0 100644 --- a/Source/Editor/GUI/Dialogs/ColorPickerDialog.cs +++ b/Source/Editor/GUI/Dialogs/ColorPickerDialog.cs @@ -362,7 +362,10 @@ namespace FlaxEditor.GUI.Dialogs _disableEvents = true; // Restore color if modified - if (_useDynamicEditing && _initialValue != _value) + var options = Editor.Instance.Options.Options; + + if (!options.Interface.ColorPickerAlwaysChangesColor && + _useDynamicEditing && _initialValue != _value) { _onChanged?.Invoke(_initialValue, false); } diff --git a/Source/Editor/Options/InterfaceOptions.cs b/Source/Editor/Options/InterfaceOptions.cs index 95a273f19..6d362602f 100644 --- a/Source/Editor/Options/InterfaceOptions.cs +++ b/Source/Editor/Options/InterfaceOptions.cs @@ -148,6 +148,13 @@ namespace FlaxEditor.Options [EditorDisplay("Interface"), EditorOrder(280), Tooltip("Editor content window orientation.")] public FlaxEngine.GUI.Orientation ContentWindowOrientation { get; set; } = FlaxEngine.GUI.Orientation.Horizontal; + /// + /// Gets or sets the editor content window orientation. + /// + [DefaultValue(false)] + [EditorDisplay("Interface"), EditorOrder(300), Tooltip("If checked, color pickers will always modify the color unless 'Cancel' if pressed, otherwise color won't change unless 'Ok' is pressed (default)")] + public bool ColorPickerAlwaysChangesColor { get; set; } = false; + /// /// Gets or sets the timestamps prefix mode for output log messages. /// From 5d61e45ecde7ced2294cdb8debd1bff077ea978f Mon Sep 17 00:00:00 2001 From: ruan Date: Sun, 4 Feb 2024 10:42:29 -0400 Subject: [PATCH 02/53] Add rename multiple actors --- .../Windows/SceneTreeWindow.ContextMenu.cs | 2 - .../Windows/SceneTreeWindow.RenameWindow.cs | 177 ++++++++++++++++++ Source/Editor/Windows/SceneTreeWindow.cs | 21 ++- 3 files changed, 194 insertions(+), 6 deletions(-) create mode 100644 Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs diff --git a/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs b/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs index 0c8e0f283..b37353f8f 100644 --- a/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs +++ b/Source/Editor/Windows/SceneTreeWindow.ContextMenu.cs @@ -55,8 +55,6 @@ namespace FlaxEditor.Windows // Basic editing options b = contextMenu.AddButton("Rename", inputOptions.Rename, Rename); - b.Enabled = isSingleActorSelected; - b = contextMenu.AddButton("Duplicate", inputOptions.Duplicate, Editor.SceneEditing.Duplicate); b.Enabled = hasSthSelected; diff --git a/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs b/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs new file mode 100644 index 000000000..fdef598b2 --- /dev/null +++ b/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs @@ -0,0 +1,177 @@ +using System.Text; +using FlaxEditor.CustomEditors; +using FlaxEditor.GUI; +using FlaxEditor.Windows.Assets; +using FlaxEngine; +using FlaxEngine.GUI; + +namespace FlaxEditor.Windows +{ + /// + /// A window used to rename multiple actors. + /// + public class RenameWindow : EditorWindow + { + /// + private class RenameUndoAction : IUndoAction + { + /// + /// The old actors name to use on action. + /// + public string[] OldNames; + + /// + /// The new actors name to use on action. + /// + public string[] NewNames; + + /// + /// All actors to rename. + /// + public Actor[] ActorsToRename; + + /// + /// Create a undo action. + /// + /// + public RenameUndoAction(Actor[] nodes) + { + ActorsToRename = nodes; + OldNames = new string[nodes.Length]; + + for (int i = 0; i < nodes.Length; i++) + OldNames[i] = nodes[i].Name; + } + + /// + public void Do() + { + for (int i = 0; i < ActorsToRename.Length; i++) + ActorsToRename[i].Name = NewNames[i]; + } + + /// + public void Undo() + { + for (int i = 0; i < ActorsToRename.Length; i++) + ActorsToRename[i].Name = OldNames[i]; + } + + /// + public string ActionString => "Renaming actors."; + + /// + public void Dispose() { } + } + + /// + /// Rename options. + /// + private enum RenameOptions + { + OnlyName, + UsePrefix, + UseSufix + } + + private Label _label; + private TextBox _textBox; + private EnumComboBox _renameOptions; + private Button _renameButton; + + private Actor[] _actorsToRename; + + /// + /// Create an instance of the to rename actors. + /// + /// All actors to rename + /// The editor. + public RenameWindow(Actor[] actorsToRename, Editor editor) : base(editor, true, FlaxEngine.GUI.ScrollBars.None) + { + Title = "Rename"; + _actorsToRename = actorsToRename; + + var container = new VerticalPanel + { + Parent = this, + AnchorPreset = AnchorPresets.StretchAll, + Offset = Vector2.Zero, + + }; + + _label = new Label + { + Text = "New Name", + AnchorPreset = AnchorPresets.TopLeft, + Parent = container, + Size = new Float2(100, 25) + }; + + _textBox = new TextBox + { + Text = "Actor", + AnchorPreset = AnchorPresets.TopLeft, + Parent = container, + Size = new Float2(200, 25) + }; + + var renameOptionLabel = new Label + { + Text = "Rename Option", + AnchorPreset = AnchorPresets.TopLeft, + Parent = container, + Size = new Float2(100, 25) + }; + + _renameOptions = new EnumComboBox(typeof(RenameOptions)) + { + Parent = container, + Value = 0 + }; + + _renameButton = new Button + { + Text = "Rename", + AnchorPreset = AnchorPresets.TopLeft, + Parent = container, + Size = new Float2(200, 25), + }; + + _renameButton.Clicked += () => + { + var renameUndoAction = new RenameUndoAction(_actorsToRename); + Editor.Instance.SceneEditing.Undo.AddAction(renameUndoAction); + renameUndoAction.NewNames = new string[_actorsToRename.Length]; + for (int i = 0; i < _actorsToRename.Length; i++) + { + var actor = _actorsToRename[i]; + if (!actor) + continue; + var newName = new StringBuilder(_textBox.Text); + if (_renameOptions.Value == (int)RenameOptions.UsePrefix) + { + newName = new StringBuilder(); + newName.Append(i); + newName.Append(_textBox.Text); + } + else if (_renameOptions.Value == (int)RenameOptions.UseSufix) + newName.Append(i.ToString()); + var newNameStr = newName.ToString(); + actor.Name = newNameStr; + renameUndoAction.NewNames[i] = newNameStr; + } + Editor.Instance.Scene.MarkAllScenesEdited(); + Close(); + }; + } + + ~RenameWindow() + { + _actorsToRename = null; + _renameButton = null; + _label = null; + _textBox = null; + _renameOptions = null; + } + } +} diff --git a/Source/Editor/Windows/SceneTreeWindow.cs b/Source/Editor/Windows/SceneTreeWindow.cs index 63ba7b960..8ebd508a5 100644 --- a/Source/Editor/Windows/SceneTreeWindow.cs +++ b/Source/Editor/Windows/SceneTreeWindow.cs @@ -13,7 +13,6 @@ using FlaxEditor.Scripting; using FlaxEditor.States; using FlaxEngine; using FlaxEngine.GUI; -using static FlaxEditor.GUI.ItemsListContextMenu; namespace FlaxEditor.Windows { @@ -138,10 +137,24 @@ namespace FlaxEditor.Windows private void Rename() { var selection = Editor.SceneEditing.Selection; - if (selection.Count != 0 && selection[0] is ActorNode actor) + var selectionCount = selection.Count; + + // Show a window with options to rename multiple actors. + if (selectionCount > 1) { - if (selection.Count != 0) - Editor.SceneEditing.Select(actor); + var selectedActors = new Actor[selectionCount]; + + for (int i = 0; i < selectionCount; i++) + if (selection[i] is ActorNode actorNode) + selectedActors[i] = actorNode.Actor; + + new RenameWindow(selectedActors, Editor).Show(); + return; + } + + if (selectionCount != 0 && selection[0] is ActorNode actor) + { + Editor.SceneEditing.Select(actor); actor.TreeNode.StartRenaming(this, _sceneTreePanel); } } From c6515da8c9d2cbec8394d92cbe198732fd3a98fe Mon Sep 17 00:00:00 2001 From: ruan Date: Sat, 10 Feb 2024 20:36:42 -0400 Subject: [PATCH 03/53] Improve rename window style and refactor --- .../Windows/SceneTreeWindow.RenameWindow.cs | 195 ++++++++++++------ Source/Editor/Windows/SceneTreeWindow.cs | 2 +- 2 files changed, 133 insertions(+), 64 deletions(-) diff --git a/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs b/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs index fdef598b2..597efeb80 100644 --- a/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs +++ b/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs @@ -1,9 +1,7 @@ using System.Text; -using FlaxEditor.CustomEditors; -using FlaxEditor.GUI; -using FlaxEditor.Windows.Assets; using FlaxEngine; using FlaxEngine.GUI; +using FlaxEditor.GUI; namespace FlaxEditor.Windows { @@ -74,21 +72,19 @@ namespace FlaxEditor.Windows UseSufix } - private Label _label; - private TextBox _textBox; - private EnumComboBox _renameOptions; - private Button _renameButton; - + private string _newActorsName; + private RenameOptions _renameOption; private Actor[] _actorsToRename; - /// - /// Create an instance of the to rename actors. - /// - /// All actors to rename - /// The editor. - public RenameWindow(Actor[] actorsToRename, Editor editor) : base(editor, true, FlaxEngine.GUI.ScrollBars.None) + private static RenameWindow _currentOpenedWindow; + + private RenameWindow(Actor[] actorsToRename, Editor editor) : base(editor, true, FlaxEngine.GUI.ScrollBars.None) { Title = "Rename"; + Size = new Float2(300, 110); + + _newActorsName = "Actor "; + _renameOption = RenameOptions.UseSufix; _actorsToRename = actorsToRename; var container = new VerticalPanel @@ -96,82 +92,155 @@ namespace FlaxEditor.Windows Parent = this, AnchorPreset = AnchorPresets.StretchAll, Offset = Vector2.Zero, - + AutoSize = false, + Bounds = Rectangle.Empty }; - _label = new Label + var nameContainer = new HorizontalPanel + { + Parent = container, + AnchorPreset = AnchorPresets.TopLeft, + Bounds = new Rectangle(0, 0, 300, 22), + Offset = Vector2.Zero, + AutoSize = false, + Spacing = 2, + CullChildren = false, + ClipChildren = false, + }; + + var optionsContainer = new HorizontalPanel + { + Parent = container, + AnchorPreset = AnchorPresets.TopLeft, + Bounds = new Rectangle(0, 22, 300, 22), + Offset = Vector2.Zero, + AutoSize = false, + Spacing = 2, + CullChildren = false, + ClipChildren = false, + }; + + var renameLabel = new Label { Text = "New Name", - AnchorPreset = AnchorPresets.TopLeft, - Parent = container, - Size = new Float2(100, 25) + AnchorPreset = AnchorPresets.Custom, + AnchorMin = Float2.Zero, + AnchorMax = new Float2(0.5f, 0), + Parent = nameContainer, + HorizontalAlignment = TextAlignment.Near, + Size = new Float2(150, 22), + Offsets = Margin.Zero, }; - _textBox = new TextBox + var newNameTextBox = new TextBox { - Text = "Actor", - AnchorPreset = AnchorPresets.TopLeft, - Parent = container, - Size = new Float2(200, 25) + Text = _newActorsName, + AnchorPreset = AnchorPresets.Custom, + AnchorMin = new Float2(0.5f, 0), + AnchorMax = new Float2(1, 0), + Parent = nameContainer, + Size = new Float2(150, 22), + Offsets = Margin.Zero, }; - var renameOptionLabel = new Label + var optionNameLabel = new Label { Text = "Rename Option", - AnchorPreset = AnchorPresets.TopLeft, - Parent = container, - Size = new Float2(100, 25) + HorizontalAlignment = TextAlignment.Near, + AnchorPreset = AnchorPresets.Custom, + AnchorMin = Float2.Zero, + AnchorMax = new Float2(0.5f, 0), + Parent = optionsContainer, + Size = new Float2(150, 22), + Offsets = Margin.Zero, }; - _renameOptions = new EnumComboBox(typeof(RenameOptions)) + var renameOptions = new EnumComboBox(typeof(RenameOptions)) { - Parent = container, - Value = 0 + Parent = optionsContainer, + Value = (int)_renameOption, + AnchorPreset = AnchorPresets.Custom, + AnchorMin = new Float2(0.5f, 0f), + AnchorMax = new Float2(1, 0), + Size = new Float2(150, 22), + Offsets = Margin.Zero, }; - _renameButton = new Button + var renameButton = new Button { Text = "Rename", AnchorPreset = AnchorPresets.TopLeft, Parent = container, - Size = new Float2(200, 25), }; - _renameButton.Clicked += () => + newNameTextBox.TextBoxEditEnd += textBox => { - var renameUndoAction = new RenameUndoAction(_actorsToRename); - Editor.Instance.SceneEditing.Undo.AddAction(renameUndoAction); - renameUndoAction.NewNames = new string[_actorsToRename.Length]; - for (int i = 0; i < _actorsToRename.Length; i++) - { - var actor = _actorsToRename[i]; - if (!actor) - continue; - var newName = new StringBuilder(_textBox.Text); - if (_renameOptions.Value == (int)RenameOptions.UsePrefix) - { - newName = new StringBuilder(); - newName.Append(i); - newName.Append(_textBox.Text); - } - else if (_renameOptions.Value == (int)RenameOptions.UseSufix) - newName.Append(i.ToString()); - var newNameStr = newName.ToString(); - actor.Name = newNameStr; - renameUndoAction.NewNames[i] = newNameStr; - } - Editor.Instance.Scene.MarkAllScenesEdited(); - Close(); + _newActorsName = textBox.Text; }; + + renameOptions.EnumValueChanged += combo => + { + _renameOption = (RenameOptions)combo.Value; + }; + + newNameTextBox.Focus(); + newNameTextBox.KeyDown += k => + { + if (k == KeyboardKeys.Return) + { + _newActorsName = newNameTextBox.Text; + RenameActors(); + } + }; + + renameButton.Clicked += RenameActors; } - ~RenameWindow() + private void RenameActors() { - _actorsToRename = null; - _renameButton = null; - _label = null; - _textBox = null; - _renameOptions = null; + var renameUndoAction = new RenameUndoAction(_actorsToRename); + Editor.Instance.SceneEditing.Undo.AddAction(renameUndoAction); + renameUndoAction.NewNames = new string[_actorsToRename.Length]; + for (int i = 0; i < _actorsToRename.Length; i++) + { + var actor = _actorsToRename[i]; + if (!actor) + continue; + var newName = new StringBuilder(_newActorsName); + if (_renameOption == RenameOptions.UsePrefix) + { + newName = new StringBuilder(); + newName.Append(i); + newName.Append(_newActorsName); + } + else if (_renameOption == RenameOptions.UseSufix) + newName.Append(i.ToString()); + + var newNameStr = newName.ToString(); + actor.Name = newNameStr; + renameUndoAction.NewNames[i] = newNameStr; + } + Editor.Instance.Scene.MarkAllScenesEdited(); + Close(); + } + + /// + /// Create an instance of the to rename actors and show the window. + /// + /// All actors to rename + /// The editor. + public static void Show(Actor[] actorsToRename, Editor editor) + { + // Can only one window opened. + if (_currentOpenedWindow != null) + _currentOpenedWindow.Close(ClosingReason.CloseEvent); + + _currentOpenedWindow = new RenameWindow(actorsToRename, editor); + _currentOpenedWindow.ShowFloating(new Float2(300, 110)); + _currentOpenedWindow.RootWindow.Window.Closed += () => + { + _currentOpenedWindow = null; + }; } } } diff --git a/Source/Editor/Windows/SceneTreeWindow.cs b/Source/Editor/Windows/SceneTreeWindow.cs index 8ebd508a5..b44b838cf 100644 --- a/Source/Editor/Windows/SceneTreeWindow.cs +++ b/Source/Editor/Windows/SceneTreeWindow.cs @@ -148,7 +148,7 @@ namespace FlaxEditor.Windows if (selection[i] is ActorNode actorNode) selectedActors[i] = actorNode.Actor; - new RenameWindow(selectedActors, Editor).Show(); + RenameWindow.Show(selectedActors, Editor); return; } From 203f5d06d192ad72ce46043ee429d16471f5e1ee Mon Sep 17 00:00:00 2001 From: ruan Date: Sat, 10 Feb 2024 20:46:05 -0400 Subject: [PATCH 04/53] Fix typo --- Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs b/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs index 597efeb80..6278a5f3b 100644 --- a/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs +++ b/Source/Editor/Windows/SceneTreeWindow.RenameWindow.cs @@ -69,7 +69,7 @@ namespace FlaxEditor.Windows { OnlyName, UsePrefix, - UseSufix + UseSuffix } private string _newActorsName; @@ -84,7 +84,7 @@ namespace FlaxEditor.Windows Size = new Float2(300, 110); _newActorsName = "Actor "; - _renameOption = RenameOptions.UseSufix; + _renameOption = RenameOptions.UseSuffix; _actorsToRename = actorsToRename; var container = new VerticalPanel @@ -213,7 +213,7 @@ namespace FlaxEditor.Windows newName.Append(i); newName.Append(_newActorsName); } - else if (_renameOption == RenameOptions.UseSufix) + else if (_renameOption == RenameOptions.UseSuffix) newName.Append(i.ToString()); var newNameStr = newName.ToString(); From dd7739f95e581f41a28360083f2e83e2542d212a Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 16 Mar 2024 13:55:54 +0200 Subject: [PATCH 05/53] Avoid deserializing clipboard content in Custom Editor paste checks --- Source/Editor/CustomEditors/CustomEditor.cs | 15 +---------- .../CustomEditors/Editors/GenericEditor.cs | 27 +------------------ .../Surface/VisjectSurface.CopyPaste.cs | 20 +++++++------- 3 files changed, 12 insertions(+), 50 deletions(-) diff --git a/Source/Editor/CustomEditors/CustomEditor.cs b/Source/Editor/CustomEditors/CustomEditor.cs index ef6302e8e..3b7ccae5f 100644 --- a/Source/Editor/CustomEditors/CustomEditor.cs +++ b/Source/Editor/CustomEditors/CustomEditor.cs @@ -662,20 +662,7 @@ namespace FlaxEditor.CustomEditors /// /// Gets a value indicating whether can paste value from the system clipboard to the property value container. /// - public bool CanPaste - { - get - { - try - { - return GetClipboardObject(out _, false); - } - catch - { - return false; - } - } - } + public bool CanPaste => !string.IsNullOrEmpty(Clipboard.Text); /// /// Sets the value from the system clipboard. diff --git a/Source/Editor/CustomEditors/Editors/GenericEditor.cs b/Source/Editor/CustomEditors/Editors/GenericEditor.cs index 489e6aaba..0f4c70309 100644 --- a/Source/Editor/CustomEditors/Editors/GenericEditor.cs +++ b/Source/Editor/CustomEditors/Editors/GenericEditor.cs @@ -474,32 +474,7 @@ namespace FlaxEditor.CustomEditors.Editors } if (layout.Editors.Count != 0) { - var sb = Clipboard.Text; - if (!string.IsNullOrEmpty(sb)) - { - try - { - var data = JsonSerializer.Deserialize(sb); - if (data == null || data.Length != layout.Editors.Count) - return false; - for (var i = 0; i < layout.Editors.Count; i++) - { - Clipboard.Text = data[i]; - if (!layout.Editors[i].CanPaste) - return false; - } - return true; - } - catch - { - return false; - } - finally - { - Clipboard.Text = sb; - } - } - return false; + return !string.IsNullOrEmpty(Clipboard.Text); } if (layout.Children.Any(x => x is LayoutElementsContainer)) { diff --git a/Source/Editor/Surface/VisjectSurface.CopyPaste.cs b/Source/Editor/Surface/VisjectSurface.CopyPaste.cs index 158492460..94d41a624 100644 --- a/Source/Editor/Surface/VisjectSurface.CopyPaste.cs +++ b/Source/Editor/Surface/VisjectSurface.CopyPaste.cs @@ -190,15 +190,7 @@ namespace FlaxEditor.Surface if (data == null || data.Length < 2) return false; - try - { - var model = JsonConvert.DeserializeObject(data); - return model?.Nodes != null && model.Nodes.Length != 0; - } - catch (Exception) - { - return false; - } + return true; } /// @@ -215,7 +207,15 @@ namespace FlaxEditor.Surface try { // Load Mr Json - var model = FlaxEngine.Json.JsonSerializer.Deserialize(data); + DataModel model; + try + { + model = FlaxEngine.Json.JsonSerializer.Deserialize(data); + } + catch + { + return; + } if (model.Nodes == null) model.Nodes = new DataModelNode[0]; From 7aa4ae17825cb19d5c6b6c2893f1bc2216420f90 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 16 Mar 2024 14:10:57 +0200 Subject: [PATCH 06/53] Fix assigning null values into value types in Custom Editor Resets back to previous value instead of setting the editor value to empty. --- Source/Editor/CustomEditors/CustomEditor.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Editor/CustomEditors/CustomEditor.cs b/Source/Editor/CustomEditors/CustomEditor.cs index 3b7ccae5f..5289a72af 100644 --- a/Source/Editor/CustomEditors/CustomEditor.cs +++ b/Source/Editor/CustomEditors/CustomEditor.cs @@ -650,7 +650,7 @@ namespace FlaxEditor.CustomEditors } } - if (obj == null || Values.Type.IsInstanceOfType(obj)) + if ((obj == null && !Values.Type.IsValueType) || Values.Type.IsInstanceOfType(obj)) { result = obj; return true; From 7454e9abd21a684f8ca217d052a4064ff097f73f Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 25 May 2024 11:30:48 +0300 Subject: [PATCH 07/53] Update OpenFBX Updated to commit 365f52c1edad6bd283c8a645f1d8d2347dbd1e35 --- .../Tools/ModelTool/ModelTool.OpenFBX.cpp | 141 +- Source/ThirdParty/OpenFBX/libdeflate.cpp | 4193 +++++++++++++++++ Source/ThirdParty/OpenFBX/libdeflate.h | 411 ++ Source/ThirdParty/OpenFBX/ofbx.cpp | 2924 +++++++----- Source/ThirdParty/OpenFBX/ofbx.h | 447 +- 5 files changed, 6693 insertions(+), 1423 deletions(-) create mode 100644 Source/ThirdParty/OpenFBX/libdeflate.cpp create mode 100644 Source/ThirdParty/OpenFBX/libdeflate.h diff --git a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp index cf124f977..253bb6669 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp @@ -49,7 +49,7 @@ Quaternion ToQuaternion(const ofbx::Quat& v) return Quaternion((float)v.x, (float)v.y, (float)v.z, (float)v.w); } -Matrix ToMatrix(const ofbx::Matrix& mat) +Matrix ToMatrix(const ofbx::DMatrix& mat) { Matrix result; for (int32 i = 0; i < 16; i++) @@ -445,7 +445,7 @@ Matrix GetOffsetMatrix(OpenFbxImporterData& data, const ofbx::Mesh* mesh, const bool IsMeshInvalid(const ofbx::Mesh* aMesh) { - return aMesh->getGeometry()->getVertexCount() == 0; + return aMesh->getGeometry()->getGeometryData().getPositions().count == 0; } bool ImportBones(OpenFbxImporterData& data, String& errorMsg) @@ -524,56 +524,56 @@ bool ImportBones(OpenFbxImporterData& data, String& errorMsg) return false; } -bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* aMesh, MeshData& mesh, String& errorMsg, int32 triangleStart, int32 triangleEnd) +bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* aMesh, MeshData& mesh, String& errorMsg, int partitionIndex) { PROFILE_CPU(); mesh.Name = aMesh->name; ZoneText(*mesh.Name, mesh.Name.Length()); - const int32 firstVertexOffset = triangleStart * 3; - const int32 lastVertexOffset = triangleEnd * 3; const ofbx::Geometry* aGeometry = aMesh->getGeometry(); - const int vertexCount = lastVertexOffset - firstVertexOffset + 3; - ASSERT(firstVertexOffset + vertexCount <= aGeometry->getVertexCount()); - const ofbx::Vec3* vertices = aGeometry->getVertices(); - const ofbx::Vec3* normals = aGeometry->getNormals(); - const ofbx::Vec3* tangents = aGeometry->getTangents(); - const ofbx::Vec4* colors = aGeometry->getColors(); - const ofbx::Vec2* uvs = aGeometry->getUVs(); + const ofbx::GeometryData& geometryData = aMesh->getGeometryData(); + const ofbx::GeometryPartition& partition = geometryData.getPartition(partitionIndex); + const int vertexCount = partition.triangles_count * 3; + const ofbx::Vec3Attributes& positions = geometryData.getPositions(); + const ofbx::Vec2Attributes& uvs = geometryData.getUVs(); + const ofbx::Vec3Attributes& normals = geometryData.getNormals(); + const ofbx::Vec3Attributes& tangents = geometryData.getTangents(); + const ofbx::Vec4Attributes& colors = geometryData.getColors(); const ofbx::Skin* skin = aGeometry->getSkin(); const ofbx::BlendShape* blendShape = aGeometry->getBlendShape(); + static Array triangulatedIndices; + triangulatedIndices.Resize(vertexCount, false); + // Properties const ofbx::Material* aMaterial = nullptr; if (aMesh->getMaterialCount() > 0) - { - if (aGeometry->getMaterials()) - aMaterial = aMesh->getMaterial(aGeometry->getMaterials()[triangleStart]); - else - aMaterial = aMesh->getMaterial(0); - } + aMaterial = aMesh->getMaterial(partitionIndex); mesh.MaterialSlotIndex = data.AddMaterial(result, aMaterial); // Vertex positions mesh.Positions.Resize(vertexCount, false); - for (int i = 0; i < vertexCount; i++) - mesh.Positions.Get()[i] = ToFloat3(vertices[i + firstVertexOffset]); + { + int numVertsProcessed = 0; + for (int i = 0; i < partition.polygon_count; i++) + { + int numVerts = ofbx::triangulate(geometryData, partition.polygons[i], &triangulatedIndices[numVertsProcessed]); + for (int j = numVertsProcessed; j < numVertsProcessed + numVerts; j++) + mesh.Positions.Get()[j] = ToFloat3(positions.get(triangulatedIndices[j])); + numVertsProcessed += numVerts; + } + } // Indices (dummy index buffer) - if (vertexCount % 3 != 0) - { - errorMsg = TEXT("Invalid vertex count. It must be multiple of 3."); - return true; - } mesh.Indices.Resize(vertexCount, false); for (int i = 0; i < vertexCount; i++) mesh.Indices.Get()[i] = i; // Texture coordinates - if (uvs) + if (uvs.values) { mesh.UVs.Resize(vertexCount, false); for (int i = 0; i < vertexCount; i++) - mesh.UVs.Get()[i] = ToFloat2(uvs[i + firstVertexOffset]); + mesh.UVs.Get()[i] = ToFloat2(uvs.get(triangulatedIndices[i])); if (data.ConvertRH) { for (int32 v = 0; v < vertexCount; v++) @@ -582,7 +582,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* } // Normals - if (data.Options.CalculateNormals || !normals) + if (data.Options.CalculateNormals || !normals.values) { if (mesh.GenerateNormals(data.Options.SmoothingNormalsAngle)) { @@ -590,11 +590,11 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* return true; } } - else if (normals) + else if (normals.values) { mesh.Normals.Resize(vertexCount, false); for (int i = 0; i < vertexCount; i++) - mesh.Normals.Get()[i] = ToFloat3(normals[i + firstVertexOffset]); + mesh.Normals.Get()[i] = ToFloat3(normals.get(triangulatedIndices[i])); if (data.ConvertRH) { // Mirror normals along the Z axis @@ -604,15 +604,15 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* } // Tangents - if ((data.Options.CalculateTangents || !tangents) && mesh.UVs.HasItems()) + if ((data.Options.CalculateTangents || !tangents.values) && mesh.UVs.HasItems()) { // Generated after full mesh data conversion } - else if (tangents) + else if (tangents.values) { mesh.Tangents.Resize(vertexCount, false); for (int i = 0; i < vertexCount; i++) - mesh.Tangents.Get()[i] = ToFloat3(tangents[i + firstVertexOffset]); + mesh.Tangents.Get()[i] = ToFloat3(tangents.get(triangulatedIndices[i])); if (data.ConvertRH) { // Mirror tangents along the Z axis @@ -658,12 +658,12 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* } // Check if has that channel texcoords - const auto lightmapUVs = aGeometry->getUVs(inputChannelIndex); - if (lightmapUVs) + const auto lightmapUVs = geometryData.getUVs(inputChannelIndex); + if (lightmapUVs.values) { mesh.LightmapUVs.Resize(vertexCount, false); for (int i = 0; i < vertexCount; i++) - mesh.LightmapUVs.Get()[i] = ToFloat2(lightmapUVs[i + firstVertexOffset]); + mesh.LightmapUVs.Get()[i] = ToFloat2(lightmapUVs.get(triangulatedIndices[i])); if (data.ConvertRH) { for (int32 v = 0; v < vertexCount; v++) @@ -677,11 +677,11 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* } // Vertex Colors - if (data.Options.ImportVertexColors && colors) + if (data.Options.ImportVertexColors && colors.values) { mesh.Colors.Resize(vertexCount, false); for (int i = 0; i < vertexCount; i++) - mesh.Colors.Get()[i] = ToColor(colors[i + firstVertexOffset]); + mesh.Colors.Get()[i] = ToColor(colors.get(triangulatedIndices[i])); } // Blend Indices and Blend Weights @@ -718,7 +718,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* const double* clusterWeights = cluster->getWeights(); for (int j = 0; j < cluster->getIndicesCount(); j++) { - int vtxIndex = clusterIndices[j] - firstVertexOffset; + int vtxIndex = clusterIndices[j]; float vtxWeight = (float)clusterWeights[j]; if (vtxWeight <= 0 || vtxIndex < 0 || vtxIndex >= vertexCount) continue; @@ -762,9 +762,9 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* continue; const ofbx::Shape* shape = channel->getShape(targetShapeCount - 1); - if (shape->getVertexCount() != aGeometry->getVertexCount()) + if (shape->getVertexCount() != vertexCount) { - LOG(Error, "Blend shape '{0}' in mesh '{1}' has different amount of vertices ({2}) than mesh ({3})", String(shape->name), mesh.Name, shape->getVertexCount(), aGeometry->getVertexCount()); + LOG(Error, "Blend shape '{0}' in mesh '{1}' has different amount of vertices ({2}) than mesh ({3})", String(shape->name), mesh.Name, shape->getVertexCount(), vertexCount); continue; } @@ -779,14 +779,14 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* auto shapeVertices = shape->getVertices(); for (int32 i = 0; i < blendShapeData.Vertices.Count(); i++) { - auto delta = ToFloat3(shapeVertices[i + firstVertexOffset]) - mesh.Positions.Get()[i]; + auto delta = ToFloat3(shapeVertices[i]) - mesh.Positions.Get()[i]; blendShapeData.Vertices.Get()[i].PositionDelta = delta; } auto shapeNormals = shape->getNormals(); for (int32 i = 0; i < blendShapeData.Vertices.Count(); i++) { - auto delta = ToFloat3(shapeNormals[i + firstVertexOffset]); + auto delta = ToFloat3(shapeNormals[i]); if (data.ConvertRH) { // Mirror normals along the Z axis @@ -820,7 +820,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* Swap(mesh.Indices.Get()[i], mesh.Indices.Get()[i + 2]); } - if ((data.Options.CalculateTangents || !tangents) && mesh.UVs.HasItems()) + if ((data.Options.CalculateTangents || !tangents.values) && mesh.UVs.HasItems()) { if (mesh.GenerateTangents(data.Options.SmoothingTangentsAngle)) { @@ -858,7 +858,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* return false; } -bool ImportMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* aMesh, String& errorMsg, int32 triangleStart, int32 triangleEnd) +bool ImportMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* aMesh, String& errorMsg, int partitionIndex) { PROFILE_CPU(); @@ -899,7 +899,7 @@ bool ImportMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* // Import mesh data MeshData* meshData = New(); - if (ProcessMesh(result, data, aMesh, *meshData, errorMsg, triangleStart, triangleEnd)) + if (ProcessMesh(result, data, aMesh, *meshData, errorMsg, partitionIndex)) return true; // Link mesh @@ -917,35 +917,17 @@ bool ImportMesh(int32 index, ModelData& result, OpenFbxImporterData& data, Strin { const auto aMesh = data.Scene->getMesh(index); const auto aGeometry = aMesh->getGeometry(); - const auto trianglesCount = aGeometry->getVertexCount() / 3; if (IsMeshInvalid(aMesh)) return false; - if (aMesh->getMaterialCount() < 2 || !aGeometry->getMaterials()) + const auto& geomData = aMesh->getGeometryData(); + for (int i = 0; i < geomData.getPartitionCount(); i++) { - // Fast path if mesh is using single material for all triangles - if (ImportMesh(result, data, aMesh, errorMsg, 0, trianglesCount - 1)) - return true; - } - else - { - // Create mesh for each sequence of triangles that share the same material - const auto materials = aGeometry->getMaterials(); - int32 rangeStart = 0; - int32 rangeStartVal = materials[rangeStart]; - for (int32 triangleIndex = 1; triangleIndex < trianglesCount; triangleIndex++) - { - if (rangeStartVal != materials[triangleIndex]) - { - if (ImportMesh(result, data, aMesh, errorMsg, rangeStart, triangleIndex - 1)) - return true; + const auto& partition = geomData.getPartition(i); + if (partition.polygon_count == 0) + continue; - // Start a new range - rangeStart = triangleIndex; - rangeStartVal = materials[triangleIndex]; - } - } - if (ImportMesh(result, data, aMesh, errorMsg, rangeStart, trianglesCount - 1)) + if (ImportMesh(result, data, aMesh, errorMsg, i)) return true; } return false; @@ -962,24 +944,24 @@ struct AnimInfo struct Frame { - ofbx::Vec3 Translation; - ofbx::Vec3 Rotation; - ofbx::Vec3 Scaling; + ofbx::DVec3 Translation; + ofbx::DVec3 Rotation; + ofbx::DVec3 Scaling; }; -void ExtractKeyframePosition(const ofbx::Object* bone, ofbx::Vec3& trans, const Frame& localFrame, Float3& keyframe) +void ExtractKeyframePosition(const ofbx::Object* bone, ofbx::DVec3& trans, const Frame& localFrame, Float3& keyframe) { const Matrix frameTrans = ToMatrix(bone->evalLocal(trans, localFrame.Rotation, localFrame.Scaling)); keyframe = frameTrans.GetTranslation(); } -void ExtractKeyframeRotation(const ofbx::Object* bone, ofbx::Vec3& trans, const Frame& localFrame, Quaternion& keyframe) +void ExtractKeyframeRotation(const ofbx::Object* bone, ofbx::DVec3& trans, const Frame& localFrame, Quaternion& keyframe) { const Matrix frameTrans = ToMatrix(bone->evalLocal(localFrame.Translation, trans, { 1.0, 1.0, 1.0 })); Quaternion::RotationMatrix(frameTrans, keyframe); } -void ExtractKeyframeScale(const ofbx::Object* bone, ofbx::Vec3& trans, const Frame& localFrame, Float3& keyframe) +void ExtractKeyframeScale(const ofbx::Object* bone, ofbx::DVec3& trans, const Frame& localFrame, Float3& keyframe) { // Fix empty scale case if (Math::IsZero(trans.x) && Math::IsZero(trans.y) && Math::IsZero(trans.z)) @@ -990,7 +972,7 @@ void ExtractKeyframeScale(const ofbx::Object* bone, ofbx::Vec3& trans, const Fra } template -void ImportCurve(const ofbx::AnimationCurveNode* curveNode, LinearCurve& curve, AnimInfo& info, void (*ExtractKeyframe)(const ofbx::Object*, ofbx::Vec3&, const Frame&, T&)) +void ImportCurve(const ofbx::AnimationCurveNode* curveNode, LinearCurve& curve, AnimInfo& info, void (*ExtractKeyframe)(const ofbx::Object*, ofbx::DVec3&, const Frame&, T&)) { if (curveNode == nullptr) return; @@ -1008,7 +990,7 @@ void ImportCurve(const ofbx::AnimationCurveNode* curveNode, LinearCurve& curv key.Time = (float)i; - ofbx::Vec3 trans = curveNode->getNodeLocalTransform(t); + ofbx::DVec3 trans = curveNode->getNodeLocalTransform(t); ExtractKeyframe(bone, trans, localFrame, key.Value); } } @@ -1125,10 +1107,9 @@ bool ModelTool::ImportDataOpenFBX(const String& path, ModelData& data, Options& errorMsg = TEXT("Cannot load file."); return true; } - ofbx::u64 loadFlags = 0; + ofbx::u16 loadFlags = 0; if (EnumHasAnyFlags(options.ImportTypes, ImportDataTypes::Geometry)) { - loadFlags |= (ofbx::u64)ofbx::LoadFlags::TRIANGULATE; if (!options.ImportBlendShapes) loadFlags |= (ofbx::u64)ofbx::LoadFlags::IGNORE_BLEND_SHAPES; } diff --git a/Source/ThirdParty/OpenFBX/libdeflate.cpp b/Source/ThirdParty/OpenFBX/libdeflate.cpp new file mode 100644 index 000000000..e421d7911 --- /dev/null +++ b/Source/ThirdParty/OpenFBX/libdeflate.cpp @@ -0,0 +1,4193 @@ +// ofbx changes : removed unused code, single .h and .c +/* + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * --------------------------------------------------------------------------- + * + * This is a highly optimized DEFLATE decompressor. It is much faster than + * vanilla zlib, typically well over twice as fast, though results vary by CPU. + * + * Why this is faster than vanilla zlib: + * + * - Word accesses rather than byte accesses when reading input + * - Word accesses rather than byte accesses when copying matches + * - Faster Huffman decoding combined with various DEFLATE-specific tricks + * - Larger bitbuffer variable that doesn't need to be refilled as often + * - Other optimizations to remove unnecessary branches + * - Only full-buffer decompression is supported, so the code doesn't need to + * support stopping and resuming decompression. + * - On x86_64, a version of the decompression routine is compiled with BMI2 + * instructions enabled and is used automatically at runtime when supported. + */ + +/* + * lib_common.h - internal header included by all library code + */ + +#ifndef LIB_LIB_COMMON_H +#define LIB_LIB_COMMON_H + +#ifdef LIBDEFLATE_H + /* + * When building the library, LIBDEFLATEAPI needs to be defined properly before + * including libdeflate.h. + */ +# error "lib_common.h must always be included before libdeflate.h" +#endif + +#if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__)) +# define LIBDEFLATE_EXPORT_SYM __declspec(dllexport) +#elif defined(__GNUC__) +# define LIBDEFLATE_EXPORT_SYM __attribute__((visibility("default"))) +#else +# define LIBDEFLATE_EXPORT_SYM +#endif + +/* + * On i386, gcc assumes that the stack is 16-byte aligned at function entry. + * However, some compilers (e.g. MSVC) and programming languages (e.g. Delphi) + * only guarantee 4-byte alignment when calling functions. This is mainly an + * issue on Windows, but it has been seen on Linux too. Work around this ABI + * incompatibility by realigning the stack pointer when entering libdeflate. + * This prevents crashes in SSE/AVX code. + */ +#if defined(__GNUC__) && defined(__i386__) +# define LIBDEFLATE_ALIGN_STACK __attribute__((force_align_arg_pointer)) +#else +# define LIBDEFLATE_ALIGN_STACK +#endif + +#define LIBDEFLATEAPI LIBDEFLATE_EXPORT_SYM LIBDEFLATE_ALIGN_STACK + +/* + * common_defs.h + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef COMMON_DEFS_H +#define COMMON_DEFS_H + +#include "libdeflate.h" + +#include +#include /* for size_t */ +#include +#ifdef _MSC_VER +# include /* for _BitScan*() and other intrinsics */ +# include /* for _byteswap_*() */ + /* Disable MSVC warnings that are expected. */ + /* /W2 */ +# pragma warning(disable : 4146) /* unary minus on unsigned type */ + /* /W3 */ +# pragma warning(disable : 4018) /* signed/unsigned mismatch */ +# pragma warning(disable : 4244) /* possible loss of data */ +# pragma warning(disable : 4267) /* possible loss of precision */ +# pragma warning(disable : 4310) /* cast truncates constant value */ + /* /W4 */ +# pragma warning(disable : 4100) /* unreferenced formal parameter */ +# pragma warning(disable : 4127) /* conditional expression is constant */ +# pragma warning(disable : 4189) /* local variable initialized but not referenced */ +# pragma warning(disable : 4232) /* nonstandard extension used */ +# pragma warning(disable : 4245) /* conversion from 'int' to 'unsigned int' */ +# pragma warning(disable : 4295) /* array too small to include terminating null */ +#endif +#ifndef FREESTANDING +# include /* for memcpy() */ +#endif + +/* ========================================================================== */ +/* Target architecture */ +/* ========================================================================== */ + +/* If possible, define a compiler-independent ARCH_* macro. */ +#undef ARCH_X86_64 +#undef ARCH_X86_32 +#undef ARCH_ARM64 +#undef ARCH_ARM32 +#ifdef _MSC_VER +# if defined(_M_X64) +# define ARCH_X86_64 +# elif defined(_M_IX86) +# define ARCH_X86_32 +# elif defined(_M_ARM64) +# define ARCH_ARM64 +# elif defined(_M_ARM) +# define ARCH_ARM32 +# endif +#else +# if defined(__x86_64__) +# define ARCH_X86_64 +# elif defined(__i386__) +# define ARCH_X86_32 +# elif defined(__aarch64__) +# define ARCH_ARM64 +# elif defined(__arm__) +# define ARCH_ARM32 +# endif +#endif + +/* ========================================================================== */ +/* Type definitions */ +/* ========================================================================== */ + +/* Fixed-width integer types */ +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; + +/* ssize_t, if not available in */ +#ifdef _MSC_VER +# ifdef _WIN64 + typedef long long ssize_t; +# else + typedef long ssize_t; +# endif +#endif + +/* + * Word type of the target architecture. Use 'size_t' instead of + * 'unsigned long' to account for platforms such as Windows that use 32-bit + * 'unsigned long' on 64-bit architectures. + */ +typedef size_t machine_word_t; + +/* Number of bytes in a word */ +#define WORDBYTES ((int)sizeof(machine_word_t)) + +/* Number of bits in a word */ +#define WORDBITS (8 * WORDBYTES) + +/* ========================================================================== */ +/* Optional compiler features */ +/* ========================================================================== */ + +/* Compiler version checks. Only use when absolutely necessary. */ +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) +# define GCC_PREREQ(major, minor) \ + (__GNUC__ > (major) || \ + (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) +#else +# define GCC_PREREQ(major, minor) 0 +#endif +#ifdef __clang__ +# ifdef __apple_build_version__ +# define CLANG_PREREQ(major, minor, apple_version) \ + (__apple_build_version__ >= (apple_version)) +# else +# define CLANG_PREREQ(major, minor, apple_version) \ + (__clang_major__ > (major) || \ + (__clang_major__ == (major) && __clang_minor__ >= (minor))) +# endif +#else +# define CLANG_PREREQ(major, minor, apple_version) 0 +#endif + +/* + * Macros to check for compiler support for attributes and builtins. clang + * implements these macros, but gcc doesn't, so generally any use of one of + * these macros must also be combined with a gcc version check. + */ +#ifndef __has_attribute +# define __has_attribute(attribute) 0 +#endif +#ifndef __has_builtin +# define __has_builtin(builtin) 0 +#endif + +/* inline - suggest that a function be inlined */ +#ifdef _MSC_VER +# define inline __inline +#endif /* else assume 'inline' is usable as-is */ + +/* forceinline - force a function to be inlined, if possible */ +#if defined(__GNUC__) || __has_attribute(always_inline) +# define forceinline inline __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define forceinline __forceinline +#else +# define forceinline inline +#endif + +/* MAYBE_UNUSED - mark a function or variable as maybe unused */ +#if defined(__GNUC__) || __has_attribute(unused) +# define MAYBE_UNUSED __attribute__((unused)) +#else +# define MAYBE_UNUSED +#endif + +/* + * restrict - hint that writes only occur through the given pointer. + * + * Don't use MSVC's __restrict, since it has nonstandard behavior. + * Standard restrict is okay, if it is supported. + */ +#if !defined(__STDC_VERSION__) || (__STDC_VERSION__ < 201112L) +# if defined(__GNUC__) || defined(__clang__) +# define restrict __restrict__ +# else +# define restrict +# endif +#endif /* else assume 'restrict' is usable as-is */ + +/* likely(expr) - hint that an expression is usually true */ +#if defined(__GNUC__) || __has_builtin(__builtin_expect) +# define likely(expr) __builtin_expect(!!(expr), 1) +#else +# define likely(expr) (expr) +#endif + +/* unlikely(expr) - hint that an expression is usually false */ +#if defined(__GNUC__) || __has_builtin(__builtin_expect) +# define unlikely(expr) __builtin_expect(!!(expr), 0) +#else +# define unlikely(expr) (expr) +#endif + +/* prefetchr(addr) - prefetch into L1 cache for read */ +#undef prefetchr +#if defined(__GNUC__) || __has_builtin(__builtin_prefetch) +# define prefetchr(addr) __builtin_prefetch((addr), 0) +#elif defined(_MSC_VER) +# if defined(ARCH_X86_32) || defined(ARCH_X86_64) +# define prefetchr(addr) _mm_prefetch((addr), _MM_HINT_T0) +# elif defined(ARCH_ARM64) +# define prefetchr(addr) __prefetch2((addr), 0x00 /* prfop=PLDL1KEEP */) +# elif defined(ARCH_ARM32) +# define prefetchr(addr) __prefetch(addr) +# endif +#endif +#ifndef prefetchr +# define prefetchr(addr) +#endif + +/* prefetchw(addr) - prefetch into L1 cache for write */ +#undef prefetchw +#if defined(__GNUC__) || __has_builtin(__builtin_prefetch) +# define prefetchw(addr) __builtin_prefetch((addr), 1) +#elif defined(_MSC_VER) +# if defined(ARCH_X86_32) || defined(ARCH_X86_64) +# define prefetchw(addr) _m_prefetchw(addr) +# elif defined(ARCH_ARM64) +# define prefetchw(addr) __prefetch2((addr), 0x10 /* prfop=PSTL1KEEP */) +# elif defined(ARCH_ARM32) +# define prefetchw(addr) __prefetchw(addr) +# endif +#endif +#ifndef prefetchw +# define prefetchw(addr) +#endif + +/* + * _aligned_attribute(n) - declare that the annotated variable, or variables of + * the annotated type, must be aligned on n-byte boundaries. + */ +#undef _aligned_attribute +#if defined(__GNUC__) || __has_attribute(aligned) +# define _aligned_attribute(n) __attribute__((aligned(n))) +#elif defined(_MSC_VER) +# define _aligned_attribute(n) __declspec(align(n)) +#endif + +/* + * _target_attribute(attrs) - override the compilation target for a function. + * + * This accepts one or more comma-separated suffixes to the -m prefix jointly + * forming the name of a machine-dependent option. On gcc-like compilers, this + * enables codegen for the given targets, including arbitrary compiler-generated + * code as well as the corresponding intrinsics. On other compilers this macro + * expands to nothing, though MSVC allows intrinsics to be used anywhere anyway. + */ +#if GCC_PREREQ(4, 4) || __has_attribute(target) +# define _target_attribute(attrs) __attribute__((target(attrs))) +# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 1 +#else +# define _target_attribute(attrs) +# define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0 +#endif + +/* ========================================================================== */ +/* Miscellaneous macros */ +/* ========================================================================== */ + +#define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0])) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) +#define MAX(a, b) ((a) >= (b) ? (a) : (b)) +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)])) +#define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1)) +#define ROUND_UP(n, d) ((d) * DIV_ROUND_UP((n), (d))) + +/* ========================================================================== */ +/* Endianness handling */ +/* ========================================================================== */ + +/* + * CPU_IS_LITTLE_ENDIAN() - 1 if the CPU is little endian, or 0 if it is big + * endian. When possible this is a compile-time macro that can be used in + * preprocessor conditionals. As a fallback, a generic method is used that + * can't be used in preprocessor conditionals but should still be optimized out. + */ +#if defined(__BYTE_ORDER__) /* gcc v4.6+ and clang */ +# define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#elif defined(_MSC_VER) +# define CPU_IS_LITTLE_ENDIAN() true +#else +static forceinline bool CPU_IS_LITTLE_ENDIAN(void) +{ + union { + u32 w; + u8 b; + } u; + + u.w = 1; + return u.b; +} +#endif + +/* bswap16(v) - swap the bytes of a 16-bit integer */ +static forceinline u16 bswap16(u16 v) +{ +#if GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) + return __builtin_bswap16(v); +#elif defined(_MSC_VER) + return _byteswap_ushort(v); +#else + return (v << 8) | (v >> 8); +#endif +} + +/* bswap32(v) - swap the bytes of a 32-bit integer */ +static forceinline u32 bswap32(u32 v) +{ +#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32) + return __builtin_bswap32(v); +#elif defined(_MSC_VER) + return _byteswap_ulong(v); +#else + return ((v & 0x000000FF) << 24) | + ((v & 0x0000FF00) << 8) | + ((v & 0x00FF0000) >> 8) | + ((v & 0xFF000000) >> 24); +#endif +} + +/* bswap64(v) - swap the bytes of a 64-bit integer */ +static forceinline u64 bswap64(u64 v) +{ +#if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64) + return __builtin_bswap64(v); +#elif defined(_MSC_VER) + return _byteswap_uint64(v); +#else + return ((v & 0x00000000000000FF) << 56) | + ((v & 0x000000000000FF00) << 40) | + ((v & 0x0000000000FF0000) << 24) | + ((v & 0x00000000FF000000) << 8) | + ((v & 0x000000FF00000000) >> 8) | + ((v & 0x0000FF0000000000) >> 24) | + ((v & 0x00FF000000000000) >> 40) | + ((v & 0xFF00000000000000) >> 56); +#endif +} + +#define le16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap16(v)) +#define le32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap32(v)) +#define le64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? (v) : bswap64(v)) +#define be16_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap16(v) : (v)) +#define be32_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap32(v) : (v)) +#define be64_bswap(v) (CPU_IS_LITTLE_ENDIAN() ? bswap64(v) : (v)) + +/* ========================================================================== */ +/* Unaligned memory accesses */ +/* ========================================================================== */ + +/* + * UNALIGNED_ACCESS_IS_FAST() - 1 if unaligned memory accesses can be performed + * efficiently on the target platform, otherwise 0. + */ +#if (defined(__GNUC__) || defined(__clang__)) && \ + (defined(ARCH_X86_64) || defined(ARCH_X86_32) || \ + defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) || \ + /* + * For all compilation purposes, WebAssembly behaves like any other CPU + * instruction set. Even though WebAssembly engine might be running on + * top of different actual CPU architectures, the WebAssembly spec + * itself permits unaligned access and it will be fast on most of those + * platforms, and simulated at the engine level on others, so it's + * worth treating it as a CPU architecture with fast unaligned access. + */ defined(__wasm__)) +# define UNALIGNED_ACCESS_IS_FAST 1 +#elif defined(_MSC_VER) +# define UNALIGNED_ACCESS_IS_FAST 1 +#else +# define UNALIGNED_ACCESS_IS_FAST 0 +#endif + +/* + * Implementing unaligned memory accesses using memcpy() is portable, and it + * usually gets optimized appropriately by modern compilers. I.e., each + * memcpy() of 1, 2, 4, or WORDBYTES bytes gets compiled to a load or store + * instruction, not to an actual function call. + * + * We no longer use the "packed struct" approach to unaligned accesses, as that + * is nonstandard, has unclear semantics, and doesn't receive enough testing + * (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994). + * + * arm32 with __ARM_FEATURE_UNALIGNED in gcc 5 and earlier is a known exception + * where memcpy() generates inefficient code + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67366). However, we no longer + * consider that one case important enough to maintain different code for. + * If you run into it, please just use a newer version of gcc (or use clang). + */ + +#ifdef FREESTANDING +# define MEMCOPY __builtin_memcpy +#else +# define MEMCOPY memcpy +#endif + +/* Unaligned loads and stores without endianness conversion */ + +#define DEFINE_UNALIGNED_TYPE(type) \ +static forceinline type \ +load_##type##_unaligned(const void *p) \ +{ \ + type v; \ + \ + MEMCOPY(&v, p, sizeof(v)); \ + return v; \ +} \ + \ +static forceinline void \ +store_##type##_unaligned(type v, void *p) \ +{ \ + MEMCOPY(p, &v, sizeof(v)); \ +} + +DEFINE_UNALIGNED_TYPE(u16) +DEFINE_UNALIGNED_TYPE(u32) +DEFINE_UNALIGNED_TYPE(u64) +DEFINE_UNALIGNED_TYPE(machine_word_t) + +#undef MEMCOPY + +#define load_word_unaligned load_machine_word_t_unaligned +#define store_word_unaligned store_machine_word_t_unaligned + +/* Unaligned loads with endianness conversion */ + +static forceinline u16 +get_unaligned_le16(const u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) + return le16_bswap(load_u16_unaligned(p)); + else + return ((u16)p[1] << 8) | p[0]; +} + +static forceinline u16 +get_unaligned_be16(const u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) + return be16_bswap(load_u16_unaligned(p)); + else + return ((u16)p[0] << 8) | p[1]; +} + +static forceinline u32 +get_unaligned_le32(const u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) + return le32_bswap(load_u32_unaligned(p)); + else + return ((u32)p[3] << 24) | ((u32)p[2] << 16) | + ((u32)p[1] << 8) | p[0]; +} + +static forceinline u32 +get_unaligned_be32(const u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) + return be32_bswap(load_u32_unaligned(p)); + else + return ((u32)p[0] << 24) | ((u32)p[1] << 16) | + ((u32)p[2] << 8) | p[3]; +} + +static forceinline u64 +get_unaligned_le64(const u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) + return le64_bswap(load_u64_unaligned(p)); + else + return ((u64)p[7] << 56) | ((u64)p[6] << 48) | + ((u64)p[5] << 40) | ((u64)p[4] << 32) | + ((u64)p[3] << 24) | ((u64)p[2] << 16) | + ((u64)p[1] << 8) | p[0]; +} + +static forceinline machine_word_t +get_unaligned_leword(const u8 *p) +{ + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); + if (WORDBITS == 32) + return get_unaligned_le32(p); + else + return get_unaligned_le64(p); +} + +/* Unaligned stores with endianness conversion */ + +static forceinline void +put_unaligned_le16(u16 v, u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) { + store_u16_unaligned(le16_bswap(v), p); + } else { + p[0] = (u8)(v >> 0); + p[1] = (u8)(v >> 8); + } +} + +static forceinline void +put_unaligned_be16(u16 v, u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) { + store_u16_unaligned(be16_bswap(v), p); + } else { + p[0] = (u8)(v >> 8); + p[1] = (u8)(v >> 0); + } +} + +static forceinline void +put_unaligned_le32(u32 v, u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) { + store_u32_unaligned(le32_bswap(v), p); + } else { + p[0] = (u8)(v >> 0); + p[1] = (u8)(v >> 8); + p[2] = (u8)(v >> 16); + p[3] = (u8)(v >> 24); + } +} + +static forceinline void +put_unaligned_be32(u32 v, u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) { + store_u32_unaligned(be32_bswap(v), p); + } else { + p[0] = (u8)(v >> 24); + p[1] = (u8)(v >> 16); + p[2] = (u8)(v >> 8); + p[3] = (u8)(v >> 0); + } +} + +static forceinline void +put_unaligned_le64(u64 v, u8 *p) +{ + if (UNALIGNED_ACCESS_IS_FAST) { + store_u64_unaligned(le64_bswap(v), p); + } else { + p[0] = (u8)(v >> 0); + p[1] = (u8)(v >> 8); + p[2] = (u8)(v >> 16); + p[3] = (u8)(v >> 24); + p[4] = (u8)(v >> 32); + p[5] = (u8)(v >> 40); + p[6] = (u8)(v >> 48); + p[7] = (u8)(v >> 56); + } +} + +static forceinline void +put_unaligned_leword(machine_word_t v, u8 *p) +{ + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); + if (WORDBITS == 32) + put_unaligned_le32(v, p); + else + put_unaligned_le64(v, p); +} + +/* ========================================================================== */ +/* Bit manipulation functions */ +/* ========================================================================== */ + +/* + * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least + * significant end) of the *most* significant 1 bit in the input value. The + * input value must be nonzero! + */ + +static forceinline unsigned +bsr32(u32 v) +{ +#if defined(__GNUC__) || __has_builtin(__builtin_clz) + return 31 - __builtin_clz(v); +#elif defined(_MSC_VER) + unsigned long i; + + _BitScanReverse(&i, v); + return i; +#else + unsigned i = 0; + + while ((v >>= 1) != 0) + i++; + return i; +#endif +} + +static forceinline unsigned +bsr64(u64 v) +{ +#if defined(__GNUC__) || __has_builtin(__builtin_clzll) + return 63 - __builtin_clzll(v); +#elif defined(_MSC_VER) && defined(_WIN64) + unsigned long i; + + _BitScanReverse64(&i, v); + return i; +#else + unsigned i = 0; + + while ((v >>= 1) != 0) + i++; + return i; +#endif +} + +static forceinline unsigned +bsrw(machine_word_t v) +{ + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); + if (WORDBITS == 32) + return bsr32(v); + else + return bsr64(v); +} + +/* + * Bit Scan Forward (BSF) - find the 0-based index (relative to the least + * significant end) of the *least* significant 1 bit in the input value. The + * input value must be nonzero! + */ + +static forceinline unsigned +bsf32(u32 v) +{ +#if defined(__GNUC__) || __has_builtin(__builtin_ctz) + return __builtin_ctz(v); +#elif defined(_MSC_VER) + unsigned long i; + + _BitScanForward(&i, v); + return i; +#else + unsigned i = 0; + + for (; (v & 1) == 0; v >>= 1) + i++; + return i; +#endif +} + +static forceinline unsigned +bsf64(u64 v) +{ +#if defined(__GNUC__) || __has_builtin(__builtin_ctzll) + return __builtin_ctzll(v); +#elif defined(_MSC_VER) && defined(_WIN64) + unsigned long i; + + _BitScanForward64(&i, v); + return i; +#else + unsigned i = 0; + + for (; (v & 1) == 0; v >>= 1) + i++; + return i; +#endif +} + +static forceinline unsigned +bsfw(machine_word_t v) +{ + STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); + if (WORDBITS == 32) + return bsf32(v); + else + return bsf64(v); +} + +/* + * rbit32(v): reverse the bits in a 32-bit integer. This doesn't have a + * fallback implementation; use '#ifdef rbit32' to check if this is available. + */ +#undef rbit32 +#if (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM32) && \ + (__ARM_ARCH >= 7 || (__ARM_ARCH == 6 && defined(__ARM_ARCH_6T2__))) +static forceinline u32 +rbit32(u32 v) +{ + __asm__("rbit %0, %1" : "=r" (v) : "r" (v)); + return v; +} +#define rbit32 rbit32 +#elif (defined(__GNUC__) || defined(__clang__)) && defined(ARCH_ARM64) +static forceinline u32 +rbit32(u32 v) +{ + __asm__("rbit %w0, %w1" : "=r" (v) : "r" (v)); + return v; +} +#define rbit32 rbit32 +#endif + +#endif /* COMMON_DEFS_H */ + + +typedef void *(*malloc_func_t)(size_t); +typedef void (*free_func_t)(void *); + +extern malloc_func_t libdeflate_default_malloc_func; +extern free_func_t libdeflate_default_free_func; + +void *libdeflate_aligned_malloc(malloc_func_t malloc_func, + size_t alignment, size_t size); +void libdeflate_aligned_free(free_func_t free_func, void *ptr); + +#ifdef FREESTANDING +/* + * With -ffreestanding, may be missing, and we must provide + * implementations of memset(), memcpy(), memmove(), and memcmp(). + * See https://gcc.gnu.org/onlinedocs/gcc/Standards.html + * + * Also, -ffreestanding disables interpreting calls to these functions as + * built-ins. E.g., calling memcpy(&v, p, WORDBYTES) will make a function call, + * not be optimized to a single load instruction. For performance reasons we + * don't want that. So, declare these functions as macros that expand to the + * corresponding built-ins. This approach is recommended in the gcc man page. + * We still need the actual function definitions in case gcc calls them. + */ +void *memset(void *s, int c, size_t n); +#define memset(s, c, n) __builtin_memset((s), (c), (n)) + +void *memcpy(void *dest, const void *src, size_t n); +#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n)) + +void *memmove(void *dest, const void *src, size_t n); +#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n)) + +int memcmp(const void *s1, const void *s2, size_t n); +#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n)) + +#undef LIBDEFLATE_ENABLE_ASSERTIONS +#else +#include +#endif + +/* + * Runtime assertion support. Don't enable this in production builds; it may + * hurt performance significantly. + */ +#ifdef LIBDEFLATE_ENABLE_ASSERTIONS +void libdeflate_assertion_failed(const char *expr, const char *file, int line); +#define ASSERT(expr) { if (unlikely(!(expr))) \ + libdeflate_assertion_failed(#expr, __FILE__, __LINE__); } +#else +#define ASSERT(expr) (void)(expr) +#endif + +#define CONCAT_IMPL(a, b) a##b +#define CONCAT(a, b) CONCAT_IMPL(a, b) +#define ADD_SUFFIX(name) CONCAT(name, SUFFIX) + +#endif /* LIB_LIB_COMMON_H */ + +/* + * deflate_constants.h - constants for the DEFLATE compression format + */ + +#ifndef LIB_DEFLATE_CONSTANTS_H +#define LIB_DEFLATE_CONSTANTS_H + +/* Valid block types */ +#define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0 +#define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1 +#define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2 + +/* Minimum and maximum supported match lengths (in bytes) */ +#define DEFLATE_MIN_MATCH_LEN 3 +#define DEFLATE_MAX_MATCH_LEN 258 + +/* Maximum supported match offset (in bytes) */ +#define DEFLATE_MAX_MATCH_OFFSET 32768 + +/* log2 of DEFLATE_MAX_MATCH_OFFSET */ +#define DEFLATE_WINDOW_ORDER 15 + +/* Number of symbols in each Huffman code. Note: for the literal/length + * and offset codes, these are actually the maximum values; a given block + * might use fewer symbols. */ +#define DEFLATE_NUM_PRECODE_SYMS 19 +#define DEFLATE_NUM_LITLEN_SYMS 288 +#define DEFLATE_NUM_OFFSET_SYMS 32 + +/* The maximum number of symbols across all codes */ +#define DEFLATE_MAX_NUM_SYMS 288 + +/* Division of symbols in the literal/length code */ +#define DEFLATE_NUM_LITERALS 256 +#define DEFLATE_END_OF_BLOCK 256 +#define DEFLATE_FIRST_LEN_SYM 257 + +/* Maximum codeword length, in bits, within each Huffman code */ +#define DEFLATE_MAX_PRE_CODEWORD_LEN 7 +#define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15 +#define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15 + +/* The maximum codeword length across all codes */ +#define DEFLATE_MAX_CODEWORD_LEN 15 + +/* Maximum possible overrun when decoding codeword lengths */ +#define DEFLATE_MAX_LENS_OVERRUN 137 + +/* + * Maximum number of extra bits that may be required to represent a match + * length or offset. + */ +#define DEFLATE_MAX_EXTRA_LENGTH_BITS 5 +#define DEFLATE_MAX_EXTRA_OFFSET_BITS 13 + +#endif /* LIB_DEFLATE_CONSTANTS_H */ + +/* + * cpu_features_common.h - code shared by all lib/$arch/cpu_features.c + * + * Copyright 2020 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIB_CPU_FEATURES_COMMON_H +#define LIB_CPU_FEATURES_COMMON_H + +#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING) + /* for strdup() and strtok_r() */ +# undef _ANSI_SOURCE +# ifndef __APPLE__ +# undef _GNU_SOURCE +# define _GNU_SOURCE +# endif +# include +# include +# include +#endif + +struct cpu_feature { + u32 bit; + const char *name; +}; + +#if defined(TEST_SUPPORT__DO_NOT_USE) && !defined(FREESTANDING) +/* Disable any features that are listed in $LIBDEFLATE_DISABLE_CPU_FEATURES. */ +static inline void +disable_cpu_features_for_testing(u32 *features, + const struct cpu_feature *feature_table, + size_t feature_table_length) +{ + char *env_value, *strbuf, *p, *saveptr = NULL; + size_t i; + + env_value = getenv("LIBDEFLATE_DISABLE_CPU_FEATURES"); + if (!env_value) + return; + strbuf = strdup(env_value); + if (!strbuf) + abort(); + p = strtok_r(strbuf, ",", &saveptr); + while (p) { + for (i = 0; i < feature_table_length; i++) { + if (strcmp(p, feature_table[i].name) == 0) { + *features &= ~feature_table[i].bit; + break; + } + } + if (i == feature_table_length) { + fprintf(stderr, + "unrecognized feature in LIBDEFLATE_DISABLE_CPU_FEATURES: \"%s\"\n", + p); + abort(); + } + p = strtok_r(NULL, ",", &saveptr); + } + free(strbuf); +} +#else /* TEST_SUPPORT__DO_NOT_USE */ +static inline void +disable_cpu_features_for_testing(u32 *features, + const struct cpu_feature *feature_table, + size_t feature_table_length) +{ +} +#endif /* !TEST_SUPPORT__DO_NOT_USE */ + +#endif /* LIB_CPU_FEATURES_COMMON_H */ + +/* + * x86/cpu_features.h - feature detection for x86 CPUs + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIB_X86_CPU_FEATURES_H +#define LIB_X86_CPU_FEATURES_H + +#define HAVE_DYNAMIC_X86_CPU_FEATURES 0 + +#if defined(ARCH_X86_32) || defined(ARCH_X86_64) + +#if COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE || defined(_MSC_VER) +# undef HAVE_DYNAMIC_X86_CPU_FEATURES +# define HAVE_DYNAMIC_X86_CPU_FEATURES 1 +#endif + +#define X86_CPU_FEATURE_SSE2 0x00000001 +#define X86_CPU_FEATURE_PCLMUL 0x00000002 +#define X86_CPU_FEATURE_AVX 0x00000004 +#define X86_CPU_FEATURE_AVX2 0x00000008 +#define X86_CPU_FEATURE_BMI2 0x00000010 + +#define HAVE_SSE2(features) (HAVE_SSE2_NATIVE || ((features) & X86_CPU_FEATURE_SSE2)) +#define HAVE_PCLMUL(features) (HAVE_PCLMUL_NATIVE || ((features) & X86_CPU_FEATURE_PCLMUL)) +#define HAVE_AVX(features) (HAVE_AVX_NATIVE || ((features) & X86_CPU_FEATURE_AVX)) +#define HAVE_AVX2(features) (HAVE_AVX2_NATIVE || ((features) & X86_CPU_FEATURE_AVX2)) +#define HAVE_BMI2(features) (HAVE_BMI2_NATIVE || ((features) & X86_CPU_FEATURE_BMI2)) + +#if HAVE_DYNAMIC_X86_CPU_FEATURES +#define X86_CPU_FEATURES_KNOWN 0x80000000 +extern volatile u32 libdeflate_x86_cpu_features; + +void libdeflate_init_x86_cpu_features(void); + +static inline u32 get_x86_cpu_features(void) +{ + if (libdeflate_x86_cpu_features == 0) + libdeflate_init_x86_cpu_features(); + return libdeflate_x86_cpu_features; +} +#else /* HAVE_DYNAMIC_X86_CPU_FEATURES */ +static inline u32 get_x86_cpu_features(void) { return 0; } +#endif /* !HAVE_DYNAMIC_X86_CPU_FEATURES */ + +/* + * Prior to gcc 4.9 (r200349) and clang 3.8 (r239883), x86 intrinsics not + * available in the main target couldn't be used in 'target' attribute + * functions. Unfortunately clang has no feature test macro for this, so we + * have to check its version. + */ +#if HAVE_DYNAMIC_X86_CPU_FEATURES && \ + (GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000) || defined(_MSC_VER)) +# define HAVE_TARGET_INTRINSICS 1 +#else +# define HAVE_TARGET_INTRINSICS 0 +#endif + +/* SSE2 */ +#if defined(__SSE2__) || \ + (defined(_MSC_VER) && \ + (defined(ARCH_X86_64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2))) +# define HAVE_SSE2_NATIVE 1 +#else +# define HAVE_SSE2_NATIVE 0 +#endif +#define HAVE_SSE2_INTRIN (HAVE_SSE2_NATIVE || HAVE_TARGET_INTRINSICS) + +/* PCLMUL */ +#if defined(__PCLMUL__) || (defined(_MSC_VER) && defined(__AVX2__)) +# define HAVE_PCLMUL_NATIVE 1 +#else +# define HAVE_PCLMUL_NATIVE 0 +#endif +#if HAVE_PCLMUL_NATIVE || (HAVE_TARGET_INTRINSICS && \ + (GCC_PREREQ(4, 4) || CLANG_PREREQ(3, 2, 0) || \ + defined(_MSC_VER))) +# define HAVE_PCLMUL_INTRIN 1 +#else +# define HAVE_PCLMUL_INTRIN 0 +#endif + +/* AVX */ +#ifdef __AVX__ +# define HAVE_AVX_NATIVE 1 +#else +# define HAVE_AVX_NATIVE 0 +#endif +#if HAVE_AVX_NATIVE || (HAVE_TARGET_INTRINSICS && \ + (GCC_PREREQ(4, 6) || CLANG_PREREQ(3, 0, 0) || \ + defined(_MSC_VER))) +# define HAVE_AVX_INTRIN 1 +#else +# define HAVE_AVX_INTRIN 0 +#endif + +/* AVX2 */ +#ifdef __AVX2__ +# define HAVE_AVX2_NATIVE 1 +#else +# define HAVE_AVX2_NATIVE 0 +#endif +#if HAVE_AVX2_NATIVE || (HAVE_TARGET_INTRINSICS && \ + (GCC_PREREQ(4, 7) || CLANG_PREREQ(3, 1, 0) || \ + defined(_MSC_VER))) +# define HAVE_AVX2_INTRIN 1 +#else +# define HAVE_AVX2_INTRIN 0 +#endif + +/* BMI2 */ +#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__)) +# define HAVE_BMI2_NATIVE 1 +#else +# define HAVE_BMI2_NATIVE 0 +#endif +#if HAVE_BMI2_NATIVE || (HAVE_TARGET_INTRINSICS && \ + (GCC_PREREQ(4, 7) || CLANG_PREREQ(3, 1, 0) || \ + defined(_MSC_VER))) +# define HAVE_BMI2_INTRIN 1 +#else +# define HAVE_BMI2_INTRIN 0 +#endif + +#endif /* ARCH_X86_32 || ARCH_X86_64 */ + +#endif /* LIB_X86_CPU_FEATURES_H */ + + +/* + * If the expression passed to SAFETY_CHECK() evaluates to false, then the + * decompression routine immediately returns LIBDEFLATE_BAD_DATA, indicating the + * compressed data is invalid. + * + * Theoretically, these checks could be disabled for specialized applications + * where all input to the decompressor will be trusted. + */ +#if 0 +# pragma message("UNSAFE DECOMPRESSION IS ENABLED. THIS MUST ONLY BE USED IF THE DECOMPRESSOR INPUT WILL ALWAYS BE TRUSTED!") +# define SAFETY_CHECK(expr) (void)(expr) +#else +# define SAFETY_CHECK(expr) if (unlikely(!(expr))) return LIBDEFLATE_BAD_DATA +#endif + +/***************************************************************************** + * Input bitstream * + *****************************************************************************/ + +/* + * The state of the "input bitstream" consists of the following variables: + * + * - in_next: a pointer to the next unread byte in the input buffer + * + * - in_end: a pointer to just past the end of the input buffer + * + * - bitbuf: a word-sized variable containing bits that have been read from + * the input buffer or from the implicit appended zero bytes + * + * - bitsleft: the number of bits in 'bitbuf' available to be consumed. + * After REFILL_BITS_BRANCHLESS(), 'bitbuf' can actually + * contain more bits than this. However, only the bits counted + * by 'bitsleft' can actually be consumed; the rest can only be + * used for preloading. + * + * As a micro-optimization, we allow bits 8 and higher of + * 'bitsleft' to contain garbage. When consuming the bits + * associated with a decode table entry, this allows us to do + * 'bitsleft -= entry' instead of 'bitsleft -= (u8)entry'. + * On some CPUs, this helps reduce instruction dependencies. + * This does have the disadvantage that 'bitsleft' sometimes + * needs to be cast to 'u8', such as when it's used as a shift + * amount in REFILL_BITS_BRANCHLESS(). But that one happens + * for free since most CPUs ignore high bits in shift amounts. + * + * - overread_count: the total number of implicit appended zero bytes that + * have been loaded into the bitbuffer, including any + * counted by 'bitsleft' and any already consumed + */ + +/* + * The type for the bitbuffer variable ('bitbuf' described above). For best + * performance, this should have size equal to a machine word. + * + * 64-bit platforms have a significant advantage: they get a bigger bitbuffer + * which they don't have to refill as often. + */ +typedef machine_word_t bitbuf_t; +#define BITBUF_NBITS (8 * (int)sizeof(bitbuf_t)) + +/* BITMASK(n) returns a bitmask of length 'n'. */ +#define BITMASK(n) (((bitbuf_t)1 << (n)) - 1) + +/* + * MAX_BITSLEFT is the maximum number of consumable bits, i.e. the maximum value + * of '(u8)bitsleft'. This is the size of the bitbuffer variable, minus 1 if + * the branchless refill method is being used (see REFILL_BITS_BRANCHLESS()). + */ +#define MAX_BITSLEFT \ + (UNALIGNED_ACCESS_IS_FAST ? BITBUF_NBITS - 1 : BITBUF_NBITS) + +/* + * CONSUMABLE_NBITS is the minimum number of bits that are guaranteed to be + * consumable (counted in 'bitsleft') immediately after refilling the bitbuffer. + * Since only whole bytes can be added to 'bitsleft', the worst case is + * 'MAX_BITSLEFT - 7': the smallest amount where another byte doesn't fit. + */ +#define CONSUMABLE_NBITS (MAX_BITSLEFT - 7) + +/* + * FASTLOOP_PRELOADABLE_NBITS is the minimum number of bits that are guaranteed + * to be preloadable immediately after REFILL_BITS_IN_FASTLOOP(). (It is *not* + * guaranteed after REFILL_BITS(), since REFILL_BITS() falls back to a + * byte-at-a-time refill method near the end of input.) This may exceed the + * number of consumable bits (counted by 'bitsleft'). Any bits not counted in + * 'bitsleft' can only be used for precomputation and cannot be consumed. + */ +#define FASTLOOP_PRELOADABLE_NBITS \ + (UNALIGNED_ACCESS_IS_FAST ? BITBUF_NBITS : CONSUMABLE_NBITS) + +/* + * PRELOAD_SLACK is the minimum number of bits that are guaranteed to be + * preloadable but not consumable, following REFILL_BITS_IN_FASTLOOP() and any + * subsequent consumptions. This is 1 bit if the branchless refill method is + * being used, and 0 bits otherwise. + */ +#define PRELOAD_SLACK MAX(0, FASTLOOP_PRELOADABLE_NBITS - MAX_BITSLEFT) + +/* + * CAN_CONSUME(n) is true if it's guaranteed that if the bitbuffer has just been + * refilled, then it's always possible to consume 'n' bits from it. 'n' should + * be a compile-time constant, to enable compile-time evaluation. + */ +#define CAN_CONSUME(n) (CONSUMABLE_NBITS >= (n)) + +/* + * CAN_CONSUME_AND_THEN_PRELOAD(consume_nbits, preload_nbits) is true if it's + * guaranteed that after REFILL_BITS_IN_FASTLOOP(), it's always possible to + * consume 'consume_nbits' bits, then preload 'preload_nbits' bits. The + * arguments should be compile-time constants to enable compile-time evaluation. + */ +#define CAN_CONSUME_AND_THEN_PRELOAD(consume_nbits, preload_nbits) \ + (CONSUMABLE_NBITS >= (consume_nbits) && \ + FASTLOOP_PRELOADABLE_NBITS >= (consume_nbits) + (preload_nbits)) + +/* + * REFILL_BITS_BRANCHLESS() branchlessly refills the bitbuffer variable by + * reading the next word from the input buffer and updating 'in_next' and + * 'bitsleft' based on how many bits were refilled -- counting whole bytes only. + * This is much faster than reading a byte at a time, at least if the CPU is + * little endian and supports fast unaligned memory accesses. + * + * The simplest way of branchlessly updating 'bitsleft' would be: + * + * bitsleft += (MAX_BITSLEFT - bitsleft) & ~7; + * + * To make it faster, we define MAX_BITSLEFT to be 'WORDBITS - 1' rather than + * WORDBITS, so that in binary it looks like 111111 or 11111. Then, we update + * 'bitsleft' by just setting the bits above the low 3 bits: + * + * bitsleft |= MAX_BITSLEFT & ~7; + * + * That compiles down to a single instruction like 'or $0x38, %rbp'. Using + * 'MAX_BITSLEFT == WORDBITS - 1' also has the advantage that refills can be + * done when 'bitsleft == MAX_BITSLEFT' without invoking undefined behavior. + * + * The simplest way of branchlessly updating 'in_next' would be: + * + * in_next += (MAX_BITSLEFT - bitsleft) >> 3; + * + * With 'MAX_BITSLEFT == WORDBITS - 1' we could use an XOR instead, though this + * isn't really better: + * + * in_next += (MAX_BITSLEFT ^ bitsleft) >> 3; + * + * An alternative which can be marginally better is the following: + * + * in_next += sizeof(bitbuf_t) - 1; + * in_next -= (bitsleft >> 3) & 0x7; + * + * It seems this would increase the number of CPU instructions from 3 (sub, shr, + * add) to 4 (add, shr, and, sub). However, if the CPU has a bitfield + * extraction instruction (e.g. arm's ubfx), it stays at 3, and is potentially + * more efficient because the length of the longest dependency chain decreases + * from 3 to 2. This alternative also has the advantage that it ignores the + * high bits in 'bitsleft', so it is compatible with the micro-optimization we + * use where we let the high bits of 'bitsleft' contain garbage. + */ +#define REFILL_BITS_BRANCHLESS() \ +do { \ + bitbuf |= get_unaligned_leword(in_next) << (u8)bitsleft; \ + in_next += sizeof(bitbuf_t) - 1; \ + in_next -= (bitsleft >> 3) & 0x7; \ + bitsleft |= MAX_BITSLEFT & ~7; \ +} while (0) + +/* + * REFILL_BITS() loads bits from the input buffer until the bitbuffer variable + * contains at least CONSUMABLE_NBITS consumable bits. + * + * This checks for the end of input, and it doesn't guarantee + * FASTLOOP_PRELOADABLE_NBITS, so it can't be used in the fastloop. + * + * If we would overread the input buffer, we just don't read anything, leaving + * the bits zeroed but marking them filled. This simplifies the decompressor + * because it removes the need to always be able to distinguish between real + * overreads and overreads caused only by the decompressor's own lookahead. + * + * We do still keep track of the number of bytes that have been overread, for + * two reasons. First, it allows us to determine the exact number of bytes that + * were consumed once the stream ends or an uncompressed block is reached. + * Second, it allows us to stop early if the overread amount gets so large (more + * than sizeof bitbuf) that it can only be caused by a real overread. (The + * second part is arguably unneeded, since libdeflate is buffer-based; given + * infinite zeroes, it will eventually either completely fill the output buffer + * or return an error. However, we do it to be slightly more friendly to the + * not-recommended use case of decompressing with an unknown output size.) + */ +#define REFILL_BITS() \ +do { \ + if (UNALIGNED_ACCESS_IS_FAST && \ + likely(in_end - in_next >= sizeof(bitbuf_t))) { \ + REFILL_BITS_BRANCHLESS(); \ + } else { \ + while ((u8)bitsleft < CONSUMABLE_NBITS) { \ + if (likely(in_next != in_end)) { \ + bitbuf |= (bitbuf_t)*in_next++ << \ + (u8)bitsleft; \ + } else { \ + overread_count++; \ + SAFETY_CHECK(overread_count <= \ + sizeof(bitbuf_t)); \ + } \ + bitsleft += 8; \ + } \ + } \ +} while (0) + +/* + * REFILL_BITS_IN_FASTLOOP() is like REFILL_BITS(), but it doesn't check for the + * end of the input. It can only be used in the fastloop. + */ +#define REFILL_BITS_IN_FASTLOOP() \ +do { \ + STATIC_ASSERT(UNALIGNED_ACCESS_IS_FAST || \ + FASTLOOP_PRELOADABLE_NBITS == CONSUMABLE_NBITS); \ + if (UNALIGNED_ACCESS_IS_FAST) { \ + REFILL_BITS_BRANCHLESS(); \ + } else { \ + while ((u8)bitsleft < CONSUMABLE_NBITS) { \ + bitbuf |= (bitbuf_t)*in_next++ << (u8)bitsleft; \ + bitsleft += 8; \ + } \ + } \ +} while (0) + +/* + * This is the worst-case maximum number of output bytes that are written to + * during each iteration of the fastloop. The worst case is 2 literals, then a + * match of length DEFLATE_MAX_MATCH_LEN. Additionally, some slack space must + * be included for the intentional overrun in the match copy implementation. + */ +#define FASTLOOP_MAX_BYTES_WRITTEN \ + (2 + DEFLATE_MAX_MATCH_LEN + (5 * WORDBYTES) - 1) + +/* + * This is the worst-case maximum number of input bytes that are read during + * each iteration of the fastloop. To get this value, we first compute the + * greatest number of bits that can be refilled during a loop iteration. The + * refill at the beginning can add at most MAX_BITSLEFT, and the amount that can + * be refilled later is no more than the maximum amount that can be consumed by + * 2 literals that don't need a subtable, then a match. We convert this value + * to bytes, rounding up; this gives the maximum number of bytes that 'in_next' + * can be advanced. Finally, we add sizeof(bitbuf_t) to account for + * REFILL_BITS_BRANCHLESS() reading a word past 'in_next'. + */ +#define FASTLOOP_MAX_BYTES_READ \ + (DIV_ROUND_UP(MAX_BITSLEFT + (2 * LITLEN_TABLEBITS) + \ + LENGTH_MAXBITS + OFFSET_MAXBITS, 8) + \ + sizeof(bitbuf_t)) + +/***************************************************************************** + * Huffman decoding * + *****************************************************************************/ + +/* + * The fastest way to decode Huffman-encoded data is basically to use a decode + * table that maps the next TABLEBITS bits of data to their symbol. Each entry + * decode_table[i] maps to the symbol whose codeword is a prefix of 'i'. A + * symbol with codeword length 'n' has '2**(TABLEBITS-n)' entries in the table. + * + * Ideally, TABLEBITS and the maximum codeword length would be the same; some + * compression formats are designed with this goal in mind. Unfortunately, in + * DEFLATE, the maximum litlen and offset codeword lengths are 15 bits, which is + * too large for a practical TABLEBITS. It's not *that* much larger, though, so + * the workaround is to use a single level of subtables. In the main table, + * entries for prefixes of codewords longer than TABLEBITS contain a "pointer" + * to the appropriate subtable along with the number of bits it is indexed with. + * + * The most efficient way to allocate subtables is to allocate them dynamically + * after the main table. The worst-case number of table entries needed, + * including subtables, is precomputable; see the ENOUGH constants below. + * + * A useful optimization is to store the codeword lengths in the decode table so + * that they don't have to be looked up by indexing a separate table that maps + * symbols to their codeword lengths. We basically do this; however, for the + * litlen and offset codes we also implement some DEFLATE-specific optimizations + * that build in the consideration of the "extra bits" and the + * literal/length/end-of-block division. For the exact decode table entry + * format we use, see the definitions of the *_decode_results[] arrays below. + */ + + +/* + * These are the TABLEBITS values we use for each of the DEFLATE Huffman codes, + * along with their corresponding ENOUGH values. + * + * For the precode, we use PRECODE_TABLEBITS == 7 since this is the maximum + * precode codeword length. This avoids ever needing subtables. + * + * For the litlen and offset codes, we cannot realistically avoid ever needing + * subtables, since litlen and offset codewords can be up to 15 bits. A higher + * TABLEBITS reduces the number of lookups that need a subtable, which increases + * performance; however, it increases memory usage and makes building the table + * take longer, which decreases performance. We choose values that work well in + * practice, making subtables rarely needed without making the tables too large. + * + * Our choice of OFFSET_TABLEBITS == 8 is a bit low; without any special + * considerations, 9 would fit the trade-off curve better. However, there is a + * performance benefit to using exactly 8 bits when it is a compile-time + * constant, as many CPUs can take the low byte more easily than the low 9 bits. + * + * zlib treats its equivalents of TABLEBITS as maximum values; whenever it + * builds a table, it caps the actual table_bits to the longest codeword. This + * makes sense in theory, as there's no need for the table to be any larger than + * needed to support the longest codeword. However, having the table bits be a + * compile-time constant is beneficial to the performance of the decode loop, so + * there is a trade-off. libdeflate currently uses the dynamic table_bits + * strategy for the litlen table only, due to its larger maximum size. + * PRECODE_TABLEBITS and OFFSET_TABLEBITS are smaller, so going dynamic there + * isn't as useful, and OFFSET_TABLEBITS=8 is useful as mentioned above. + * + * Each TABLEBITS value has a corresponding ENOUGH value that gives the + * worst-case maximum number of decode table entries, including the main table + * and all subtables. The ENOUGH value depends on three parameters: + * + * (1) the maximum number of symbols in the code (DEFLATE_NUM_*_SYMS) + * (2) the maximum number of main table bits (*_TABLEBITS) + * (3) the maximum allowed codeword length (DEFLATE_MAX_*_CODEWORD_LEN) + * + * The ENOUGH values were computed using the utility program 'enough' from zlib. + */ +#define PRECODE_TABLEBITS 7 +#define PRECODE_ENOUGH 128 /* enough 19 7 7 */ +#define LITLEN_TABLEBITS 11 +#define LITLEN_ENOUGH 2342 /* enough 288 11 15 */ +#define OFFSET_TABLEBITS 8 +#define OFFSET_ENOUGH 402 /* enough 32 8 15 */ + +/* + * make_decode_table_entry() creates a decode table entry for the given symbol + * by combining the static part 'decode_results[sym]' with the dynamic part + * 'len', which is the remaining codeword length (the codeword length for main + * table entries, or the codeword length minus TABLEBITS for subtable entries). + * + * In all cases, we add 'len' to each of the two low-order bytes to create the + * appropriately-formatted decode table entry. See the definitions of the + * *_decode_results[] arrays below, where the entry format is described. + */ +static forceinline u32 +make_decode_table_entry(const u32 decode_results[], u32 sym, u32 len) +{ + return decode_results[sym] + (len << 8) + len; +} + +/* + * Here is the format of our precode decode table entries. Bits not explicitly + * described contain zeroes: + * + * Bit 20-16: presym + * Bit 10-8: codeword length [not used] + * Bit 2-0: codeword length + * + * The precode decode table never has subtables, since we use + * PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN. + * + * precode_decode_results[] contains the static part of the entry for each + * symbol. make_decode_table_entry() produces the final entries. + */ +static const u32 precode_decode_results[] = { +#define ENTRY(presym) ((u32)presym << 16) + ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) , + ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) , + ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) , + ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) , + ENTRY(16) , ENTRY(17) , ENTRY(18) , +#undef ENTRY +}; + +/* Litlen and offset decode table entry flags */ + +/* Indicates a literal entry in the litlen decode table */ +#define HUFFDEC_LITERAL 0x80000000 + +/* Indicates that HUFFDEC_SUBTABLE_POINTER or HUFFDEC_END_OF_BLOCK is set */ +#define HUFFDEC_EXCEPTIONAL 0x00008000 + +/* Indicates a subtable pointer entry in the litlen or offset decode table */ +#define HUFFDEC_SUBTABLE_POINTER 0x00004000 + +/* Indicates an end-of-block entry in the litlen decode table */ +#define HUFFDEC_END_OF_BLOCK 0x00002000 + +/* Maximum number of bits that can be consumed by decoding a match length */ +#define LENGTH_MAXBITS (DEFLATE_MAX_LITLEN_CODEWORD_LEN + \ + DEFLATE_MAX_EXTRA_LENGTH_BITS) +#define LENGTH_MAXFASTBITS (LITLEN_TABLEBITS /* no subtable needed */ + \ + DEFLATE_MAX_EXTRA_LENGTH_BITS) + +/* + * Here is the format of our litlen decode table entries. Bits not explicitly + * described contain zeroes: + * + * Literals: + * Bit 31: 1 (HUFFDEC_LITERAL) + * Bit 23-16: literal value + * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL) + * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER) + * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK) + * Bit 11-8: remaining codeword length [not used] + * Bit 3-0: remaining codeword length + * Lengths: + * Bit 31: 0 (!HUFFDEC_LITERAL) + * Bit 24-16: length base value + * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL) + * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER) + * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK) + * Bit 11-8: remaining codeword length + * Bit 4-0: remaining codeword length + number of extra bits + * End of block: + * Bit 31: 0 (!HUFFDEC_LITERAL) + * Bit 15: 1 (HUFFDEC_EXCEPTIONAL) + * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER) + * Bit 13: 1 (HUFFDEC_END_OF_BLOCK) + * Bit 11-8: remaining codeword length [not used] + * Bit 3-0: remaining codeword length + * Subtable pointer: + * Bit 31: 0 (!HUFFDEC_LITERAL) + * Bit 30-16: index of start of subtable + * Bit 15: 1 (HUFFDEC_EXCEPTIONAL) + * Bit 14: 1 (HUFFDEC_SUBTABLE_POINTER) + * Bit 13: 0 (!HUFFDEC_END_OF_BLOCK) + * Bit 11-8: number of subtable bits + * Bit 3-0: number of main table bits + * + * This format has several desirable properties: + * + * - The codeword length, length slot base, and number of extra length bits + * are all built in. This eliminates the need to separately look up this + * information by indexing separate arrays by symbol or length slot. + * + * - The HUFFDEC_* flags enable easily distinguishing between the different + * types of entries. The HUFFDEC_LITERAL flag enables a fast path for + * literals; the high bit is used for this, as some CPUs can test the + * high bit more easily than other bits. The HUFFDEC_EXCEPTIONAL flag + * makes it possible to detect the two unlikely cases (subtable pointer + * and end of block) in a single bit flag test. + * + * - The low byte is the number of bits that need to be removed from the + * bitstream; this makes this value easily accessible, and it enables the + * micro-optimization of doing 'bitsleft -= entry' instead of + * 'bitsleft -= (u8)entry'. It also includes the number of extra bits, + * so they don't need to be removed separately. + * + * - The flags in bits 15-13 are arranged to be 0 when the + * "remaining codeword length" in bits 11-8 is needed, making this value + * fairly easily accessible as well via a shift and downcast. + * + * - Similarly, bits 13-12 are 0 when the "subtable bits" in bits 11-8 are + * needed, making it possible to extract this value with '& 0x3F' rather + * than '& 0xF'. This value is only used as a shift amount, so this can + * save an 'and' instruction as the masking by 0x3F happens implicitly. + * + * litlen_decode_results[] contains the static part of the entry for each + * symbol. make_decode_table_entry() produces the final entries. + */ +static const u32 litlen_decode_results[] = { + + /* Literals */ +#define ENTRY(literal) (HUFFDEC_LITERAL | ((u32)literal << 16)) + ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) , + ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) , + ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) , + ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) , + ENTRY(16) , ENTRY(17) , ENTRY(18) , ENTRY(19) , + ENTRY(20) , ENTRY(21) , ENTRY(22) , ENTRY(23) , + ENTRY(24) , ENTRY(25) , ENTRY(26) , ENTRY(27) , + ENTRY(28) , ENTRY(29) , ENTRY(30) , ENTRY(31) , + ENTRY(32) , ENTRY(33) , ENTRY(34) , ENTRY(35) , + ENTRY(36) , ENTRY(37) , ENTRY(38) , ENTRY(39) , + ENTRY(40) , ENTRY(41) , ENTRY(42) , ENTRY(43) , + ENTRY(44) , ENTRY(45) , ENTRY(46) , ENTRY(47) , + ENTRY(48) , ENTRY(49) , ENTRY(50) , ENTRY(51) , + ENTRY(52) , ENTRY(53) , ENTRY(54) , ENTRY(55) , + ENTRY(56) , ENTRY(57) , ENTRY(58) , ENTRY(59) , + ENTRY(60) , ENTRY(61) , ENTRY(62) , ENTRY(63) , + ENTRY(64) , ENTRY(65) , ENTRY(66) , ENTRY(67) , + ENTRY(68) , ENTRY(69) , ENTRY(70) , ENTRY(71) , + ENTRY(72) , ENTRY(73) , ENTRY(74) , ENTRY(75) , + ENTRY(76) , ENTRY(77) , ENTRY(78) , ENTRY(79) , + ENTRY(80) , ENTRY(81) , ENTRY(82) , ENTRY(83) , + ENTRY(84) , ENTRY(85) , ENTRY(86) , ENTRY(87) , + ENTRY(88) , ENTRY(89) , ENTRY(90) , ENTRY(91) , + ENTRY(92) , ENTRY(93) , ENTRY(94) , ENTRY(95) , + ENTRY(96) , ENTRY(97) , ENTRY(98) , ENTRY(99) , + ENTRY(100) , ENTRY(101) , ENTRY(102) , ENTRY(103) , + ENTRY(104) , ENTRY(105) , ENTRY(106) , ENTRY(107) , + ENTRY(108) , ENTRY(109) , ENTRY(110) , ENTRY(111) , + ENTRY(112) , ENTRY(113) , ENTRY(114) , ENTRY(115) , + ENTRY(116) , ENTRY(117) , ENTRY(118) , ENTRY(119) , + ENTRY(120) , ENTRY(121) , ENTRY(122) , ENTRY(123) , + ENTRY(124) , ENTRY(125) , ENTRY(126) , ENTRY(127) , + ENTRY(128) , ENTRY(129) , ENTRY(130) , ENTRY(131) , + ENTRY(132) , ENTRY(133) , ENTRY(134) , ENTRY(135) , + ENTRY(136) , ENTRY(137) , ENTRY(138) , ENTRY(139) , + ENTRY(140) , ENTRY(141) , ENTRY(142) , ENTRY(143) , + ENTRY(144) , ENTRY(145) , ENTRY(146) , ENTRY(147) , + ENTRY(148) , ENTRY(149) , ENTRY(150) , ENTRY(151) , + ENTRY(152) , ENTRY(153) , ENTRY(154) , ENTRY(155) , + ENTRY(156) , ENTRY(157) , ENTRY(158) , ENTRY(159) , + ENTRY(160) , ENTRY(161) , ENTRY(162) , ENTRY(163) , + ENTRY(164) , ENTRY(165) , ENTRY(166) , ENTRY(167) , + ENTRY(168) , ENTRY(169) , ENTRY(170) , ENTRY(171) , + ENTRY(172) , ENTRY(173) , ENTRY(174) , ENTRY(175) , + ENTRY(176) , ENTRY(177) , ENTRY(178) , ENTRY(179) , + ENTRY(180) , ENTRY(181) , ENTRY(182) , ENTRY(183) , + ENTRY(184) , ENTRY(185) , ENTRY(186) , ENTRY(187) , + ENTRY(188) , ENTRY(189) , ENTRY(190) , ENTRY(191) , + ENTRY(192) , ENTRY(193) , ENTRY(194) , ENTRY(195) , + ENTRY(196) , ENTRY(197) , ENTRY(198) , ENTRY(199) , + ENTRY(200) , ENTRY(201) , ENTRY(202) , ENTRY(203) , + ENTRY(204) , ENTRY(205) , ENTRY(206) , ENTRY(207) , + ENTRY(208) , ENTRY(209) , ENTRY(210) , ENTRY(211) , + ENTRY(212) , ENTRY(213) , ENTRY(214) , ENTRY(215) , + ENTRY(216) , ENTRY(217) , ENTRY(218) , ENTRY(219) , + ENTRY(220) , ENTRY(221) , ENTRY(222) , ENTRY(223) , + ENTRY(224) , ENTRY(225) , ENTRY(226) , ENTRY(227) , + ENTRY(228) , ENTRY(229) , ENTRY(230) , ENTRY(231) , + ENTRY(232) , ENTRY(233) , ENTRY(234) , ENTRY(235) , + ENTRY(236) , ENTRY(237) , ENTRY(238) , ENTRY(239) , + ENTRY(240) , ENTRY(241) , ENTRY(242) , ENTRY(243) , + ENTRY(244) , ENTRY(245) , ENTRY(246) , ENTRY(247) , + ENTRY(248) , ENTRY(249) , ENTRY(250) , ENTRY(251) , + ENTRY(252) , ENTRY(253) , ENTRY(254) , ENTRY(255) , +#undef ENTRY + + /* End of block */ + HUFFDEC_EXCEPTIONAL | HUFFDEC_END_OF_BLOCK, + + /* Lengths */ +#define ENTRY(length_base, num_extra_bits) \ + (((u32)(length_base) << 16) | (num_extra_bits)) + ENTRY(3 , 0) , ENTRY(4 , 0) , ENTRY(5 , 0) , ENTRY(6 , 0), + ENTRY(7 , 0) , ENTRY(8 , 0) , ENTRY(9 , 0) , ENTRY(10 , 0), + ENTRY(11 , 1) , ENTRY(13 , 1) , ENTRY(15 , 1) , ENTRY(17 , 1), + ENTRY(19 , 2) , ENTRY(23 , 2) , ENTRY(27 , 2) , ENTRY(31 , 2), + ENTRY(35 , 3) , ENTRY(43 , 3) , ENTRY(51 , 3) , ENTRY(59 , 3), + ENTRY(67 , 4) , ENTRY(83 , 4) , ENTRY(99 , 4) , ENTRY(115, 4), + ENTRY(131, 5) , ENTRY(163, 5) , ENTRY(195, 5) , ENTRY(227, 5), + ENTRY(258, 0) , ENTRY(258, 0) , ENTRY(258, 0) , +#undef ENTRY +}; + +/* Maximum number of bits that can be consumed by decoding a match offset */ +#define OFFSET_MAXBITS (DEFLATE_MAX_OFFSET_CODEWORD_LEN + \ + DEFLATE_MAX_EXTRA_OFFSET_BITS) +#define OFFSET_MAXFASTBITS (OFFSET_TABLEBITS /* no subtable needed */ + \ + DEFLATE_MAX_EXTRA_OFFSET_BITS) + +/* + * Here is the format of our offset decode table entries. Bits not explicitly + * described contain zeroes: + * + * Offsets: + * Bit 31-16: offset base value + * Bit 15: 0 (!HUFFDEC_EXCEPTIONAL) + * Bit 14: 0 (!HUFFDEC_SUBTABLE_POINTER) + * Bit 11-8: remaining codeword length + * Bit 4-0: remaining codeword length + number of extra bits + * Subtable pointer: + * Bit 31-16: index of start of subtable + * Bit 15: 1 (HUFFDEC_EXCEPTIONAL) + * Bit 14: 1 (HUFFDEC_SUBTABLE_POINTER) + * Bit 11-8: number of subtable bits + * Bit 3-0: number of main table bits + * + * These work the same way as the length entries and subtable pointer entries in + * the litlen decode table; see litlen_decode_results[] above. + */ +static const u32 offset_decode_results[] = { +#define ENTRY(offset_base, num_extra_bits) \ + (((u32)(offset_base) << 16) | (num_extra_bits)) + ENTRY(1 , 0) , ENTRY(2 , 0) , ENTRY(3 , 0) , ENTRY(4 , 0) , + ENTRY(5 , 1) , ENTRY(7 , 1) , ENTRY(9 , 2) , ENTRY(13 , 2) , + ENTRY(17 , 3) , ENTRY(25 , 3) , ENTRY(33 , 4) , ENTRY(49 , 4) , + ENTRY(65 , 5) , ENTRY(97 , 5) , ENTRY(129 , 6) , ENTRY(193 , 6) , + ENTRY(257 , 7) , ENTRY(385 , 7) , ENTRY(513 , 8) , ENTRY(769 , 8) , + ENTRY(1025 , 9) , ENTRY(1537 , 9) , ENTRY(2049 , 10) , ENTRY(3073 , 10) , + ENTRY(4097 , 11) , ENTRY(6145 , 11) , ENTRY(8193 , 12) , ENTRY(12289 , 12) , + ENTRY(16385 , 13) , ENTRY(24577 , 13) , ENTRY(24577 , 13) , ENTRY(24577 , 13) , +#undef ENTRY +}; + +/* + * The main DEFLATE decompressor structure. Since libdeflate only supports + * full-buffer decompression, this structure doesn't store the entire + * decompression state, most of which is in stack variables. Instead, this + * struct just contains the decode tables and some temporary arrays used for + * building them, as these are too large to comfortably allocate on the stack. + * + * Storing the decode tables in the decompressor struct also allows the decode + * tables for the static codes to be reused whenever two static Huffman blocks + * are decoded without an intervening dynamic block, even across streams. + */ +struct libdeflate_decompressor { + + /* + * The arrays aren't all needed at the same time. 'precode_lens' and + * 'precode_decode_table' are unneeded after 'lens' has been filled. + * Furthermore, 'lens' need not be retained after building the litlen + * and offset decode tables. In fact, 'lens' can be in union with + * 'litlen_decode_table' provided that 'offset_decode_table' is separate + * and is built first. + */ + + union { + u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS]; + + struct { + u8 lens[DEFLATE_NUM_LITLEN_SYMS + + DEFLATE_NUM_OFFSET_SYMS + + DEFLATE_MAX_LENS_OVERRUN]; + + u32 precode_decode_table[PRECODE_ENOUGH]; + } l; + + u32 litlen_decode_table[LITLEN_ENOUGH]; + } u; + + u32 offset_decode_table[OFFSET_ENOUGH]; + + /* used only during build_decode_table() */ + u16 sorted_syms[DEFLATE_MAX_NUM_SYMS]; + + bool static_codes_loaded; + unsigned litlen_tablebits; + + /* The free() function for this struct, chosen at allocation time */ + free_func_t free_func; +}; + +/* + * Build a table for fast decoding of symbols from a Huffman code. As input, + * this function takes the codeword length of each symbol which may be used in + * the code. As output, it produces a decode table for the canonical Huffman + * code described by the codeword lengths. The decode table is built with the + * assumption that it will be indexed with "bit-reversed" codewords, where the + * low-order bit is the first bit of the codeword. This format is used for all + * Huffman codes in DEFLATE. + * + * @decode_table + * The array in which the decode table will be generated. This array must + * have sufficient length; see the definition of the ENOUGH numbers. + * @lens + * An array which provides, for each symbol, the length of the + * corresponding codeword in bits, or 0 if the symbol is unused. This may + * alias @decode_table, since nothing is written to @decode_table until all + * @lens have been consumed. All codeword lengths are assumed to be <= + * @max_codeword_len but are otherwise considered untrusted. If they do + * not form a valid Huffman code, then the decode table is not built and + * %false is returned. + * @num_syms + * The number of symbols in the code, including all unused symbols. + * @decode_results + * An array which gives the incomplete decode result for each symbol. The + * needed values in this array will be combined with codeword lengths to + * make the final decode table entries using make_decode_table_entry(). + * @table_bits + * The log base-2 of the number of main table entries to use. + * If @table_bits_ret != NULL, then @table_bits is treated as a maximum + * value and it will be decreased if a smaller table would be sufficient. + * @max_codeword_len + * The maximum allowed codeword length for this Huffman code. + * Must be <= DEFLATE_MAX_CODEWORD_LEN. + * @sorted_syms + * A temporary array of length @num_syms. + * @table_bits_ret + * If non-NULL, then the dynamic table_bits is enabled, and the actual + * table_bits value will be returned here. + * + * Returns %true if successful; %false if the codeword lengths do not form a + * valid Huffman code. + */ +static bool +build_decode_table(u32 decode_table[], + const u8 lens[], + const unsigned num_syms, + const u32 decode_results[], + unsigned table_bits, + unsigned max_codeword_len, + u16 *sorted_syms, + unsigned *table_bits_ret) +{ + unsigned len_counts[DEFLATE_MAX_CODEWORD_LEN + 1]; + unsigned offsets[DEFLATE_MAX_CODEWORD_LEN + 1]; + unsigned sym; /* current symbol */ + unsigned codeword; /* current codeword, bit-reversed */ + unsigned len; /* current codeword length in bits */ + unsigned count; /* num codewords remaining with this length */ + u32 codespace_used; /* codespace used out of '2^max_codeword_len' */ + unsigned cur_table_end; /* end index of current table */ + unsigned subtable_prefix; /* codeword prefix of current subtable */ + unsigned subtable_start; /* start index of current subtable */ + unsigned subtable_bits; /* log2 of current subtable length */ + + /* Count how many codewords have each length, including 0. */ + for (len = 0; len <= max_codeword_len; len++) + len_counts[len] = 0; + for (sym = 0; sym < num_syms; sym++) + len_counts[lens[sym]]++; + + /* + * Determine the actual maximum codeword length that was used, and + * decrease table_bits to it if allowed. + */ + while (max_codeword_len > 1 && len_counts[max_codeword_len] == 0) + max_codeword_len--; + if (table_bits_ret != NULL) { + table_bits = MIN(table_bits, max_codeword_len); + *table_bits_ret = table_bits; + } + + /* + * Sort the symbols primarily by increasing codeword length and + * secondarily by increasing symbol value; or equivalently by their + * codewords in lexicographic order, since a canonical code is assumed. + * + * For efficiency, also compute 'codespace_used' in the same pass over + * 'len_counts[]' used to build 'offsets[]' for sorting. + */ + + /* Ensure that 'codespace_used' cannot overflow. */ + STATIC_ASSERT(sizeof(codespace_used) == 4); + STATIC_ASSERT(UINT32_MAX / (1U << (DEFLATE_MAX_CODEWORD_LEN - 1)) >= + DEFLATE_MAX_NUM_SYMS); + + offsets[0] = 0; + offsets[1] = len_counts[0]; + codespace_used = 0; + for (len = 1; len < max_codeword_len; len++) { + offsets[len + 1] = offsets[len] + len_counts[len]; + codespace_used = (codespace_used << 1) + len_counts[len]; + } + codespace_used = (codespace_used << 1) + len_counts[len]; + + for (sym = 0; sym < num_syms; sym++) + sorted_syms[offsets[lens[sym]]++] = sym; + + sorted_syms += offsets[0]; /* Skip unused symbols */ + + /* lens[] is done being used, so we can write to decode_table[] now. */ + + /* + * Check whether the lengths form a complete code (exactly fills the + * codespace), an incomplete code (doesn't fill the codespace), or an + * overfull code (overflows the codespace). A codeword of length 'n' + * uses proportion '1/(2^n)' of the codespace. An overfull code is + * nonsensical, so is considered invalid. An incomplete code is + * considered valid only in two specific cases; see below. + */ + + /* overfull code? */ + if (unlikely(codespace_used > (1U << max_codeword_len))) + return false; + + /* incomplete code? */ + if (unlikely(codespace_used < (1U << max_codeword_len))) { + u32 entry; + unsigned i; + + if (codespace_used == 0) { + /* + * An empty code is allowed. This can happen for the + * offset code in DEFLATE, since a dynamic Huffman block + * need not contain any matches. + */ + + /* sym=0, len=1 (arbitrary) */ + entry = make_decode_table_entry(decode_results, 0, 1); + } else { + /* + * Allow codes with a single used symbol, with codeword + * length 1. The DEFLATE RFC is unclear regarding this + * case. What zlib's decompressor does is permit this + * for the litlen and offset codes and assume the + * codeword is '0' rather than '1'. We do the same + * except we allow this for precodes too, since there's + * no convincing reason to treat the codes differently. + * We also assign both codewords '0' and '1' to the + * symbol to avoid having to handle '1' specially. + */ + if (codespace_used != (1U << (max_codeword_len - 1)) || + len_counts[1] != 1) + return false; + entry = make_decode_table_entry(decode_results, + *sorted_syms, 1); + } + /* + * Note: the decode table still must be fully initialized, in + * case the stream is malformed and contains bits from the part + * of the codespace the incomplete code doesn't use. + */ + for (i = 0; i < (1U << table_bits); i++) + decode_table[i] = entry; + return true; + } + + /* + * The lengths form a complete code. Now, enumerate the codewords in + * lexicographic order and fill the decode table entries for each one. + * + * First, process all codewords with len <= table_bits. Each one gets + * '2^(table_bits-len)' direct entries in the table. + * + * Since DEFLATE uses bit-reversed codewords, these entries aren't + * consecutive but rather are spaced '2^len' entries apart. This makes + * filling them naively somewhat awkward and inefficient, since strided + * stores are less cache-friendly and preclude the use of word or + * vector-at-a-time stores to fill multiple entries per instruction. + * + * To optimize this, we incrementally double the table size. When + * processing codewords with length 'len', the table is treated as + * having only '2^len' entries, so each codeword uses just one entry. + * Then, each time 'len' is incremented, the table size is doubled and + * the first half is copied to the second half. This significantly + * improves performance over naively doing strided stores. + * + * Note that some entries copied for each table doubling may not have + * been initialized yet, but it doesn't matter since they're guaranteed + * to be initialized later (because the Huffman code is complete). + */ + codeword = 0; + len = 1; + while ((count = len_counts[len]) == 0) + len++; + cur_table_end = 1U << len; + while (len <= table_bits) { + /* Process all 'count' codewords with length 'len' bits. */ + do { + unsigned bit; + + /* Fill the first entry for the current codeword. */ + decode_table[codeword] = + make_decode_table_entry(decode_results, + *sorted_syms++, len); + + if (codeword == cur_table_end - 1) { + /* Last codeword (all 1's) */ + for (; len < table_bits; len++) { + memcpy(&decode_table[cur_table_end], + decode_table, + cur_table_end * + sizeof(decode_table[0])); + cur_table_end <<= 1; + } + return true; + } + /* + * To advance to the lexicographically next codeword in + * the canonical code, the codeword must be incremented, + * then 0's must be appended to the codeword as needed + * to match the next codeword's length. + * + * Since the codeword is bit-reversed, appending 0's is + * a no-op. However, incrementing it is nontrivial. To + * do so efficiently, use the 'bsr' instruction to find + * the last (highest order) 0 bit in the codeword, set + * it, and clear any later (higher order) 1 bits. But + * 'bsr' actually finds the highest order 1 bit, so to + * use it first flip all bits in the codeword by XOR'ing + * it with (1U << len) - 1 == cur_table_end - 1. + */ + bit = 1U << bsr32(codeword ^ (cur_table_end - 1)); + codeword &= bit - 1; + codeword |= bit; + } while (--count); + + /* Advance to the next codeword length. */ + do { + if (++len <= table_bits) { + memcpy(&decode_table[cur_table_end], + decode_table, + cur_table_end * sizeof(decode_table[0])); + cur_table_end <<= 1; + } + } while ((count = len_counts[len]) == 0); + } + + /* Process codewords with len > table_bits. These require subtables. */ + cur_table_end = 1U << table_bits; + subtable_prefix = -1; + subtable_start = 0; + for (;;) { + u32 entry; + unsigned i; + unsigned stride; + unsigned bit; + + /* + * Start a new subtable if the first 'table_bits' bits of the + * codeword don't match the prefix of the current subtable. + */ + if ((codeword & ((1U << table_bits) - 1)) != subtable_prefix) { + subtable_prefix = (codeword & ((1U << table_bits) - 1)); + subtable_start = cur_table_end; + /* + * Calculate the subtable length. If the codeword has + * length 'table_bits + n', then the subtable needs + * '2^n' entries. But it may need more; if fewer than + * '2^n' codewords of length 'table_bits + n' remain, + * then the length will need to be incremented to bring + * in longer codewords until the subtable can be + * completely filled. Note that because the Huffman + * code is complete, it will always be possible to fill + * the subtable eventually. + */ + subtable_bits = len - table_bits; + codespace_used = count; + while (codespace_used < (1U << subtable_bits)) { + subtable_bits++; + codespace_used = (codespace_used << 1) + + len_counts[table_bits + subtable_bits]; + } + cur_table_end = subtable_start + (1U << subtable_bits); + + /* + * Create the entry that points from the main table to + * the subtable. + */ + decode_table[subtable_prefix] = + ((u32)subtable_start << 16) | + HUFFDEC_EXCEPTIONAL | + HUFFDEC_SUBTABLE_POINTER | + (subtable_bits << 8) | table_bits; + } + + /* Fill the subtable entries for the current codeword. */ + entry = make_decode_table_entry(decode_results, *sorted_syms++, + len - table_bits); + i = subtable_start + (codeword >> table_bits); + stride = 1U << (len - table_bits); + do { + decode_table[i] = entry; + i += stride; + } while (i < cur_table_end); + + /* Advance to the next codeword. */ + if (codeword == (1U << len) - 1) /* last codeword (all 1's)? */ + return true; + bit = 1U << bsr32(codeword ^ ((1U << len) - 1)); + codeword &= bit - 1; + codeword |= bit; + count--; + while (count == 0) + count = len_counts[++len]; + } +} + +/* Build the decode table for the precode. */ +static bool +build_precode_decode_table(struct libdeflate_decompressor *d) +{ + /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */ + STATIC_ASSERT(PRECODE_TABLEBITS == 7 && PRECODE_ENOUGH == 128); + + STATIC_ASSERT(ARRAY_LEN(precode_decode_results) == + DEFLATE_NUM_PRECODE_SYMS); + + return build_decode_table(d->u.l.precode_decode_table, + d->u.precode_lens, + DEFLATE_NUM_PRECODE_SYMS, + precode_decode_results, + PRECODE_TABLEBITS, + DEFLATE_MAX_PRE_CODEWORD_LEN, + d->sorted_syms, + NULL); +} + +/* Build the decode table for the literal/length code. */ +static bool +build_litlen_decode_table(struct libdeflate_decompressor *d, + unsigned num_litlen_syms, unsigned num_offset_syms) +{ + /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */ + STATIC_ASSERT(LITLEN_TABLEBITS == 11 && LITLEN_ENOUGH == 2342); + + STATIC_ASSERT(ARRAY_LEN(litlen_decode_results) == + DEFLATE_NUM_LITLEN_SYMS); + + return build_decode_table(d->u.litlen_decode_table, + d->u.l.lens, + num_litlen_syms, + litlen_decode_results, + LITLEN_TABLEBITS, + DEFLATE_MAX_LITLEN_CODEWORD_LEN, + d->sorted_syms, + &d->litlen_tablebits); +} + +/* Build the decode table for the offset code. */ +static bool +build_offset_decode_table(struct libdeflate_decompressor *d, + unsigned num_litlen_syms, unsigned num_offset_syms) +{ + /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */ + STATIC_ASSERT(OFFSET_TABLEBITS == 8 && OFFSET_ENOUGH == 402); + + STATIC_ASSERT(ARRAY_LEN(offset_decode_results) == + DEFLATE_NUM_OFFSET_SYMS); + + return build_decode_table(d->offset_decode_table, + d->u.l.lens + num_litlen_syms, + num_offset_syms, + offset_decode_results, + OFFSET_TABLEBITS, + DEFLATE_MAX_OFFSET_CODEWORD_LEN, + d->sorted_syms, + NULL); +} + +/***************************************************************************** + * Main decompression routine + *****************************************************************************/ + +typedef enum libdeflate_result (*decompress_func_t) + (struct libdeflate_decompressor * restrict d, + const void * restrict in, size_t in_nbytes, + void * restrict out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret); + +#define FUNCNAME deflate_decompress_default +#undef ATTRIBUTES +#undef EXTRACT_VARBITS +#undef EXTRACT_VARBITS8 +/* + * decompress_template.h + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * This is the actual DEFLATE decompression routine, lifted out of + * deflate_decompress.c so that it can be compiled multiple times with different + * target instruction sets. + */ + +#ifndef ATTRIBUTES +# define ATTRIBUTES +#endif +#ifndef EXTRACT_VARBITS +# define EXTRACT_VARBITS(word, count) ((word) & BITMASK(count)) +#endif +#ifndef EXTRACT_VARBITS8 +# define EXTRACT_VARBITS8(word, count) ((word) & BITMASK((u8)(count))) +#endif + +static enum libdeflate_result ATTRIBUTES MAYBE_UNUSED +FUNCNAME(struct libdeflate_decompressor * restrict d, + const void * restrict in, size_t in_nbytes, + void * restrict out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret) +{ + u8 *out_next = out; + u8 * const out_end = out_next + out_nbytes_avail; + u8 * const out_fastloop_end = + out_end - MIN(out_nbytes_avail, FASTLOOP_MAX_BYTES_WRITTEN); + + /* Input bitstream state; see deflate_decompress.c for documentation */ + const u8 *in_next = in; + const u8 * const in_end = in_next + in_nbytes; + const u8 * const in_fastloop_end = + in_end - MIN(in_nbytes, FASTLOOP_MAX_BYTES_READ); + bitbuf_t bitbuf = 0; + bitbuf_t saved_bitbuf; + u32 bitsleft = 0; + size_t overread_count = 0; + + bool is_final_block; + unsigned block_type; + unsigned num_litlen_syms; + unsigned num_offset_syms; + bitbuf_t litlen_tablemask; + u32 entry; + +next_block: + /* Starting to read the next block */ + ; + + STATIC_ASSERT(CAN_CONSUME(1 + 2 + 5 + 5 + 4 + 3)); + REFILL_BITS(); + + /* BFINAL: 1 bit */ + is_final_block = bitbuf & BITMASK(1); + + /* BTYPE: 2 bits */ + block_type = (bitbuf >> 1) & BITMASK(2); + + if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) { + + /* Dynamic Huffman block */ + + /* The order in which precode lengths are stored */ + static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + }; + + unsigned num_explicit_precode_lens; + unsigned i; + + /* Read the codeword length counts. */ + + STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 257 + BITMASK(5)); + num_litlen_syms = 257 + ((bitbuf >> 3) & BITMASK(5)); + + STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 1 + BITMASK(5)); + num_offset_syms = 1 + ((bitbuf >> 8) & BITMASK(5)); + + STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == 4 + BITMASK(4)); + num_explicit_precode_lens = 4 + ((bitbuf >> 13) & BITMASK(4)); + + d->static_codes_loaded = false; + + /* + * Read the precode codeword lengths. + * + * A 64-bit bitbuffer is just one bit too small to hold the + * maximum number of precode lens, so to minimize branches we + * merge one len with the previous fields. + */ + STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1); + if (CAN_CONSUME(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) { + d->u.precode_lens[deflate_precode_lens_permutation[0]] = + (bitbuf >> 17) & BITMASK(3); + bitbuf >>= 20; + bitsleft -= 20; + REFILL_BITS(); + i = 1; + do { + d->u.precode_lens[deflate_precode_lens_permutation[i]] = + bitbuf & BITMASK(3); + bitbuf >>= 3; + bitsleft -= 3; + } while (++i < num_explicit_precode_lens); + } else { + bitbuf >>= 17; + bitsleft -= 17; + i = 0; + do { + if ((u8)bitsleft < 3) + REFILL_BITS(); + d->u.precode_lens[deflate_precode_lens_permutation[i]] = + bitbuf & BITMASK(3); + bitbuf >>= 3; + bitsleft -= 3; + } while (++i < num_explicit_precode_lens); + } + for (; i < DEFLATE_NUM_PRECODE_SYMS; i++) + d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0; + + /* Build the decode table for the precode. */ + SAFETY_CHECK(build_precode_decode_table(d)); + + /* Decode the litlen and offset codeword lengths. */ + i = 0; + do { + unsigned presym; + u8 rep_val; + unsigned rep_count; + + if ((u8)bitsleft < DEFLATE_MAX_PRE_CODEWORD_LEN + 7) + REFILL_BITS(); + + /* + * The code below assumes that the precode decode table + * doesn't have any subtables. + */ + STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN); + + /* Decode the next precode symbol. */ + entry = d->u.l.precode_decode_table[ + bitbuf & BITMASK(DEFLATE_MAX_PRE_CODEWORD_LEN)]; + bitbuf >>= (u8)entry; + bitsleft -= entry; /* optimization: subtract full entry */ + presym = entry >> 16; + + if (presym < 16) { + /* Explicit codeword length */ + d->u.l.lens[i++] = presym; + continue; + } + + /* Run-length encoded codeword lengths */ + + /* + * Note: we don't need to immediately verify that the + * repeat count doesn't overflow the number of elements, + * since we've sized the lens array to have enough extra + * space to allow for the worst-case overrun (138 zeroes + * when only 1 length was remaining). + * + * In the case of the small repeat counts (presyms 16 + * and 17), it is fastest to always write the maximum + * number of entries. That gets rid of branches that + * would otherwise be required. + * + * It is not just because of the numerical order that + * our checks go in the order 'presym < 16', 'presym == + * 16', and 'presym == 17'. For typical data this is + * ordered from most frequent to least frequent case. + */ + STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1); + + if (presym == 16) { + /* Repeat the previous length 3 - 6 times. */ + SAFETY_CHECK(i != 0); + rep_val = d->u.l.lens[i - 1]; + STATIC_ASSERT(3 + BITMASK(2) == 6); + rep_count = 3 + (bitbuf & BITMASK(2)); + bitbuf >>= 2; + bitsleft -= 2; + d->u.l.lens[i + 0] = rep_val; + d->u.l.lens[i + 1] = rep_val; + d->u.l.lens[i + 2] = rep_val; + d->u.l.lens[i + 3] = rep_val; + d->u.l.lens[i + 4] = rep_val; + d->u.l.lens[i + 5] = rep_val; + i += rep_count; + } else if (presym == 17) { + /* Repeat zero 3 - 10 times. */ + STATIC_ASSERT(3 + BITMASK(3) == 10); + rep_count = 3 + (bitbuf & BITMASK(3)); + bitbuf >>= 3; + bitsleft -= 3; + d->u.l.lens[i + 0] = 0; + d->u.l.lens[i + 1] = 0; + d->u.l.lens[i + 2] = 0; + d->u.l.lens[i + 3] = 0; + d->u.l.lens[i + 4] = 0; + d->u.l.lens[i + 5] = 0; + d->u.l.lens[i + 6] = 0; + d->u.l.lens[i + 7] = 0; + d->u.l.lens[i + 8] = 0; + d->u.l.lens[i + 9] = 0; + i += rep_count; + } else { + /* Repeat zero 11 - 138 times. */ + STATIC_ASSERT(11 + BITMASK(7) == 138); + rep_count = 11 + (bitbuf & BITMASK(7)); + bitbuf >>= 7; + bitsleft -= 7; + memset(&d->u.l.lens[i], 0, + rep_count * sizeof(d->u.l.lens[i])); + i += rep_count; + } + } while (i < num_litlen_syms + num_offset_syms); + + /* Unnecessary, but check this for consistency with zlib. */ + SAFETY_CHECK(i == num_litlen_syms + num_offset_syms); + + } else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) { + u16 len, nlen; + + /* + * Uncompressed block: copy 'len' bytes literally from the input + * buffer to the output buffer. + */ + + bitsleft -= 3; /* for BTYPE and BFINAL */ + + /* + * Align the bitstream to the next byte boundary. This means + * the next byte boundary as if we were reading a byte at a + * time. Therefore, we have to rewind 'in_next' by any bytes + * that have been refilled but not actually consumed yet (not + * counting overread bytes, which don't increment 'in_next'). + */ + bitsleft = (u8)bitsleft; + SAFETY_CHECK(overread_count <= (bitsleft >> 3)); + in_next -= (bitsleft >> 3) - overread_count; + overread_count = 0; + bitbuf = 0; + bitsleft = 0; + + SAFETY_CHECK(in_end - in_next >= 4); + len = get_unaligned_le16(in_next); + nlen = get_unaligned_le16(in_next + 2); + in_next += 4; + + SAFETY_CHECK(len == (u16)~nlen); + if (unlikely(len > out_end - out_next)) + return LIBDEFLATE_INSUFFICIENT_SPACE; + SAFETY_CHECK(len <= in_end - in_next); + + memcpy(out_next, in_next, len); + in_next += len; + out_next += len; + + goto block_done; + + } else { + unsigned i; + + SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN); + + /* + * Static Huffman block: build the decode tables for the static + * codes. Skip doing so if the tables are already set up from + * an earlier static block; this speeds up decompression of + * degenerate input of many empty or very short static blocks. + * + * Afterwards, the remainder is the same as decompressing a + * dynamic Huffman block. + */ + + bitbuf >>= 3; /* for BTYPE and BFINAL */ + bitsleft -= 3; + + if (d->static_codes_loaded) + goto have_decode_tables; + + d->static_codes_loaded = true; + + STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288); + STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32); + + for (i = 0; i < 144; i++) + d->u.l.lens[i] = 8; + for (; i < 256; i++) + d->u.l.lens[i] = 9; + for (; i < 280; i++) + d->u.l.lens[i] = 7; + for (; i < 288; i++) + d->u.l.lens[i] = 8; + + for (; i < 288 + 32; i++) + d->u.l.lens[i] = 5; + + num_litlen_syms = 288; + num_offset_syms = 32; + } + + /* Decompressing a Huffman block (either dynamic or static) */ + + SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms)); + SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms)); +have_decode_tables: + litlen_tablemask = BITMASK(d->litlen_tablebits); + + /* + * This is the "fastloop" for decoding literals and matches. It does + * bounds checks on in_next and out_next in the loop conditions so that + * additional bounds checks aren't needed inside the loop body. + * + * To reduce latency, the bitbuffer is refilled and the next litlen + * decode table entry is preloaded before each loop iteration. + */ + if (in_next >= in_fastloop_end || out_next >= out_fastloop_end) + goto generic_loop; + REFILL_BITS_IN_FASTLOOP(); + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + do { + u32 length, offset, lit; + const u8 *src; + u8 *dst; + + /* + * Consume the bits for the litlen decode table entry. Save the + * original bitbuf for later, in case the extra match length + * bits need to be extracted from it. + */ + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; /* optimization: subtract full entry */ + + /* + * Begin by checking for a "fast" literal, i.e. a literal that + * doesn't need a subtable. + */ + if (entry & HUFFDEC_LITERAL) { + /* + * On 64-bit platforms, we decode up to 2 extra fast + * literals in addition to the primary item, as this + * increases performance and still leaves enough bits + * remaining for what follows. We could actually do 3, + * assuming LITLEN_TABLEBITS=11, but that actually + * decreases performance slightly (perhaps by messing + * with the branch prediction of the conditional refill + * that happens later while decoding the match offset). + * + * Note: the definitions of FASTLOOP_MAX_BYTES_WRITTEN + * and FASTLOOP_MAX_BYTES_READ need to be updated if the + * number of extra literals decoded here is changed. + */ + if (/* enough bits for 2 fast literals + length + offset preload? */ + CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS + + LENGTH_MAXBITS, + OFFSET_TABLEBITS) && + /* enough bits for 2 fast literals + slow literal + litlen preload? */ + CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS + + DEFLATE_MAX_LITLEN_CODEWORD_LEN, + LITLEN_TABLEBITS)) { + /* 1st extra fast literal */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + *out_next++ = lit; + if (entry & HUFFDEC_LITERAL) { + /* 2nd extra fast literal */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + *out_next++ = lit; + if (entry & HUFFDEC_LITERAL) { + /* + * Another fast literal, but + * this one is in lieu of the + * primary item, so it doesn't + * count as one of the extras. + */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + *out_next++ = lit; + continue; + } + } + } else { + /* + * Decode a literal. While doing so, preload + * the next litlen decode table entry and refill + * the bitbuffer. To reduce latency, we've + * arranged for there to be enough "preloadable" + * bits remaining to do the table preload + * independently of the refill. + */ + STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD( + LITLEN_TABLEBITS, LITLEN_TABLEBITS)); + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + *out_next++ = lit; + continue; + } + } + + /* + * It's not a literal entry, so it can be a length entry, a + * subtable pointer entry, or an end-of-block entry. Detect the + * two unlikely cases by testing the HUFFDEC_EXCEPTIONAL flag. + */ + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + /* Subtable pointer or end-of-block entry */ + + if (unlikely(entry & HUFFDEC_END_OF_BLOCK)) + goto block_done; + + /* + * A subtable is required. Load and consume the + * subtable entry. The subtable entry can be of any + * type: literal, length, or end-of-block. + */ + entry = d->u.litlen_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + + /* + * 32-bit platforms that use the byte-at-a-time refill + * method have to do a refill here for there to always + * be enough bits to decode a literal that requires a + * subtable, then preload the next litlen decode table + * entry; or to decode a match length that requires a + * subtable, then preload the offset decode table entry. + */ + if (!CAN_CONSUME_AND_THEN_PRELOAD(DEFLATE_MAX_LITLEN_CODEWORD_LEN, + LITLEN_TABLEBITS) || + !CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXBITS, + OFFSET_TABLEBITS)) + REFILL_BITS_IN_FASTLOOP(); + if (entry & HUFFDEC_LITERAL) { + /* Decode a literal that required a subtable. */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + *out_next++ = lit; + continue; + } + if (unlikely(entry & HUFFDEC_END_OF_BLOCK)) + goto block_done; + /* Else, it's a length that required a subtable. */ + } + + /* + * Decode the match length: the length base value associated + * with the litlen symbol (which we extract from the decode + * table entry), plus the extra length bits. We don't need to + * consume the extra length bits here, as they were included in + * the bits consumed by the entry earlier. We also don't need + * to check for too-long matches here, as this is inside the + * fastloop where it's already been verified that the output + * buffer has enough space remaining to copy a max-length match. + */ + length = entry >> 16; + length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8); + + /* + * Decode the match offset. There are enough "preloadable" bits + * remaining to preload the offset decode table entry, but a + * refill might be needed before consuming it. + */ + STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXFASTBITS, + OFFSET_TABLEBITS)); + entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)]; + if (CAN_CONSUME_AND_THEN_PRELOAD(OFFSET_MAXBITS, + LITLEN_TABLEBITS)) { + /* + * Decoding a match offset on a 64-bit platform. We may + * need to refill once, but then we can decode the whole + * offset and preload the next litlen table entry. + */ + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + /* Offset codeword requires a subtable */ + if (unlikely((u8)bitsleft < OFFSET_MAXBITS + + LITLEN_TABLEBITS - PRELOAD_SLACK)) + REFILL_BITS_IN_FASTLOOP(); + bitbuf >>= OFFSET_TABLEBITS; + bitsleft -= OFFSET_TABLEBITS; + entry = d->offset_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + } else if (unlikely((u8)bitsleft < OFFSET_MAXFASTBITS + + LITLEN_TABLEBITS - PRELOAD_SLACK)) + REFILL_BITS_IN_FASTLOOP(); + } else { + /* Decoding a match offset on a 32-bit platform */ + REFILL_BITS_IN_FASTLOOP(); + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + /* Offset codeword requires a subtable */ + bitbuf >>= OFFSET_TABLEBITS; + bitsleft -= OFFSET_TABLEBITS; + entry = d->offset_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + REFILL_BITS_IN_FASTLOOP(); + /* No further refill needed before extra bits */ + STATIC_ASSERT(CAN_CONSUME( + OFFSET_MAXBITS - OFFSET_TABLEBITS)); + } else { + /* No refill needed before extra bits */ + STATIC_ASSERT(CAN_CONSUME(OFFSET_MAXFASTBITS)); + } + } + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; /* optimization: subtract full entry */ + offset = entry >> 16; + offset += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8); + + /* Validate the match offset; needed even in the fastloop. */ + SAFETY_CHECK(offset <= out_next - (const u8 *)out); + src = out_next - offset; + dst = out_next; + out_next += length; + + /* + * Before starting to issue the instructions to copy the match, + * refill the bitbuffer and preload the litlen decode table + * entry for the next loop iteration. This can increase + * performance by allowing the latency of the match copy to + * overlap with these other operations. To further reduce + * latency, we've arranged for there to be enough bits remaining + * to do the table preload independently of the refill, except + * on 32-bit platforms using the byte-at-a-time refill method. + */ + if (!CAN_CONSUME_AND_THEN_PRELOAD( + MAX(OFFSET_MAXBITS - OFFSET_TABLEBITS, + OFFSET_MAXFASTBITS), + LITLEN_TABLEBITS) && + unlikely((u8)bitsleft < LITLEN_TABLEBITS - PRELOAD_SLACK)) + REFILL_BITS_IN_FASTLOOP(); + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + + /* + * Copy the match. On most CPUs the fastest method is a + * word-at-a-time copy, unconditionally copying about 5 words + * since this is enough for most matches without being too much. + * + * The normal word-at-a-time copy works for offset >= WORDBYTES, + * which is most cases. The case of offset == 1 is also common + * and is worth optimizing for, since it is just RLE encoding of + * the previous byte, which is the result of compressing long + * runs of the same byte. + * + * Writing past the match 'length' is allowed here, since it's + * been ensured there is enough output space left for a slight + * overrun. FASTLOOP_MAX_BYTES_WRITTEN needs to be updated if + * the maximum possible overrun here is changed. + */ + if (UNALIGNED_ACCESS_IS_FAST && offset >= WORDBYTES) { + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + while (dst < out_next) { + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + } + } else if (UNALIGNED_ACCESS_IS_FAST && offset == 1) { + machine_word_t v; + + /* + * This part tends to get auto-vectorized, so keep it + * copying a multiple of 16 bytes at a time. + */ + v = (machine_word_t)0x0101010101010101 * src[0]; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + while (dst < out_next) { + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + } + } else if (UNALIGNED_ACCESS_IS_FAST) { + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + do { + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + } while (dst < out_next); + } else { + *dst++ = *src++; + *dst++ = *src++; + do { + *dst++ = *src++; + } while (dst < out_next); + } + } while (in_next < in_fastloop_end && out_next < out_fastloop_end); + + /* + * This is the generic loop for decoding literals and matches. This + * handles cases where in_next and out_next are close to the end of + * their respective buffers. Usually this loop isn't performance- + * critical, as most time is spent in the fastloop above instead. We + * therefore omit some optimizations here in favor of smaller code. + */ +generic_loop: + for (;;) { + u32 length, offset; + const u8 *src; + u8 *dst; + + REFILL_BITS(); + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + if (unlikely(entry & HUFFDEC_SUBTABLE_POINTER)) { + entry = d->u.litlen_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + } + length = entry >> 16; + if (entry & HUFFDEC_LITERAL) { + if (unlikely(out_next == out_end)) + return LIBDEFLATE_INSUFFICIENT_SPACE; + *out_next++ = length; + continue; + } + if (unlikely(entry & HUFFDEC_END_OF_BLOCK)) + goto block_done; + length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8); + if (unlikely(length > out_end - out_next)) + return LIBDEFLATE_INSUFFICIENT_SPACE; + + if (!CAN_CONSUME(LENGTH_MAXBITS + OFFSET_MAXBITS)) + REFILL_BITS(); + entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)]; + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + bitbuf >>= OFFSET_TABLEBITS; + bitsleft -= OFFSET_TABLEBITS; + entry = d->offset_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + if (!CAN_CONSUME(OFFSET_MAXBITS)) + REFILL_BITS(); + } + offset = entry >> 16; + offset += EXTRACT_VARBITS8(bitbuf, entry) >> (u8)(entry >> 8); + bitbuf >>= (u8)entry; + bitsleft -= entry; + + SAFETY_CHECK(offset <= out_next - (const u8 *)out); + src = out_next - offset; + dst = out_next; + out_next += length; + + STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3); + *dst++ = *src++; + *dst++ = *src++; + do { + *dst++ = *src++; + } while (dst < out_next); + } + +block_done: + /* Finished decoding a block */ + + if (!is_final_block) + goto next_block; + + /* That was the last block. */ + + bitsleft = (u8)bitsleft; + + /* + * If any of the implicit appended zero bytes were consumed (not just + * refilled) before hitting end of stream, then the data is bad. + */ + SAFETY_CHECK(overread_count <= (bitsleft >> 3)); + + /* Optionally return the actual number of bytes consumed. */ + if (actual_in_nbytes_ret) { + /* Don't count bytes that were refilled but not consumed. */ + in_next -= (bitsleft >> 3) - overread_count; + + *actual_in_nbytes_ret = in_next - (u8 *)in; + } + + /* Optionally return the actual number of bytes written. */ + if (actual_out_nbytes_ret) { + *actual_out_nbytes_ret = out_next - (u8 *)out; + } else { + if (out_next != out_end) + return LIBDEFLATE_SHORT_OUTPUT; + } + return LIBDEFLATE_SUCCESS; +} + +#undef FUNCNAME +#undef ATTRIBUTES +#undef EXTRACT_VARBITS +#undef EXTRACT_VARBITS8 + + +/* Include architecture-specific implementation(s) if available. */ +#undef DEFAULT_IMPL +#undef arch_select_decompress_func +#if defined(ARCH_X86_32) || defined(ARCH_X86_64) +#ifndef LIB_X86_DECOMPRESS_IMPL_H +#define LIB_X86_DECOMPRESS_IMPL_H + +/* + * BMI2 optimized version + * + * FIXME: with MSVC, this isn't actually compiled with BMI2 code generation + * enabled yet. That would require that this be moved to its own .c file. + */ +#if HAVE_BMI2_INTRIN +# define deflate_decompress_bmi2 deflate_decompress_bmi2 +# define FUNCNAME deflate_decompress_bmi2 +# if !HAVE_BMI2_NATIVE +# define ATTRIBUTES _target_attribute("bmi2") +# endif + /* + * Even with __attribute__((target("bmi2"))), gcc doesn't reliably use the + * bzhi instruction for 'word & BITMASK(count)'. So use the bzhi intrinsic + * explicitly. EXTRACT_VARBITS() is equivalent to 'word & BITMASK(count)'; + * EXTRACT_VARBITS8() is equivalent to 'word & BITMASK((u8)count)'. + * Nevertheless, their implementation using the bzhi intrinsic is identical, + * as the bzhi instruction truncates the count to 8 bits implicitly. + */ +# ifndef __clang__ +# include +# ifdef ARCH_X86_64 +# define EXTRACT_VARBITS(word, count) _bzhi_u64((word), (count)) +# define EXTRACT_VARBITS8(word, count) _bzhi_u64((word), (count)) +# else +# define EXTRACT_VARBITS(word, count) _bzhi_u32((word), (count)) +# define EXTRACT_VARBITS8(word, count) _bzhi_u32((word), (count)) +# endif +# endif +/* + * decompress_template.h + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * This is the actual DEFLATE decompression routine, lifted out of + * deflate_decompress.c so that it can be compiled multiple times with different + * target instruction sets. + */ + +#ifndef ATTRIBUTES +# define ATTRIBUTES +#endif +#ifndef EXTRACT_VARBITS +# define EXTRACT_VARBITS(word, count) ((word) & BITMASK(count)) +#endif +#ifndef EXTRACT_VARBITS8 +# define EXTRACT_VARBITS8(word, count) ((word) & BITMASK((u8)(count))) +#endif + +static enum libdeflate_result ATTRIBUTES MAYBE_UNUSED +FUNCNAME(struct libdeflate_decompressor * restrict d, + const void * restrict in, size_t in_nbytes, + void * restrict out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret) +{ + u8 *out_next = out; + u8 * const out_end = out_next + out_nbytes_avail; + u8 * const out_fastloop_end = + out_end - MIN(out_nbytes_avail, FASTLOOP_MAX_BYTES_WRITTEN); + + /* Input bitstream state; see deflate_decompress.c for documentation */ + const u8 *in_next = in; + const u8 * const in_end = in_next + in_nbytes; + const u8 * const in_fastloop_end = + in_end - MIN(in_nbytes, FASTLOOP_MAX_BYTES_READ); + bitbuf_t bitbuf = 0; + bitbuf_t saved_bitbuf; + u32 bitsleft = 0; + size_t overread_count = 0; + + bool is_final_block; + unsigned block_type; + unsigned num_litlen_syms; + unsigned num_offset_syms; + bitbuf_t litlen_tablemask; + u32 entry; + +next_block: + /* Starting to read the next block */ + ; + + STATIC_ASSERT(CAN_CONSUME(1 + 2 + 5 + 5 + 4 + 3)); + REFILL_BITS(); + + /* BFINAL: 1 bit */ + is_final_block = bitbuf & BITMASK(1); + + /* BTYPE: 2 bits */ + block_type = (bitbuf >> 1) & BITMASK(2); + + if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) { + + /* Dynamic Huffman block */ + + /* The order in which precode lengths are stored */ + static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + }; + + unsigned num_explicit_precode_lens; + unsigned i; + + /* Read the codeword length counts. */ + + STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 257 + BITMASK(5)); + num_litlen_syms = 257 + ((bitbuf >> 3) & BITMASK(5)); + + STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 1 + BITMASK(5)); + num_offset_syms = 1 + ((bitbuf >> 8) & BITMASK(5)); + + STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == 4 + BITMASK(4)); + num_explicit_precode_lens = 4 + ((bitbuf >> 13) & BITMASK(4)); + + d->static_codes_loaded = false; + + /* + * Read the precode codeword lengths. + * + * A 64-bit bitbuffer is just one bit too small to hold the + * maximum number of precode lens, so to minimize branches we + * merge one len with the previous fields. + */ + STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1); + if (CAN_CONSUME(3 * (DEFLATE_NUM_PRECODE_SYMS - 1))) { + d->u.precode_lens[deflate_precode_lens_permutation[0]] = + (bitbuf >> 17) & BITMASK(3); + bitbuf >>= 20; + bitsleft -= 20; + REFILL_BITS(); + i = 1; + do { + d->u.precode_lens[deflate_precode_lens_permutation[i]] = + bitbuf & BITMASK(3); + bitbuf >>= 3; + bitsleft -= 3; + } while (++i < num_explicit_precode_lens); + } else { + bitbuf >>= 17; + bitsleft -= 17; + i = 0; + do { + if ((u8)bitsleft < 3) + REFILL_BITS(); + d->u.precode_lens[deflate_precode_lens_permutation[i]] = + bitbuf & BITMASK(3); + bitbuf >>= 3; + bitsleft -= 3; + } while (++i < num_explicit_precode_lens); + } + for (; i < DEFLATE_NUM_PRECODE_SYMS; i++) + d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0; + + /* Build the decode table for the precode. */ + SAFETY_CHECK(build_precode_decode_table(d)); + + /* Decode the litlen and offset codeword lengths. */ + i = 0; + do { + unsigned presym; + u8 rep_val; + unsigned rep_count; + + if ((u8)bitsleft < DEFLATE_MAX_PRE_CODEWORD_LEN + 7) + REFILL_BITS(); + + /* + * The code below assumes that the precode decode table + * doesn't have any subtables. + */ + STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN); + + /* Decode the next precode symbol. */ + entry = d->u.l.precode_decode_table[ + bitbuf & BITMASK(DEFLATE_MAX_PRE_CODEWORD_LEN)]; + bitbuf >>= (u8)entry; + bitsleft -= entry; /* optimization: subtract full entry */ + presym = entry >> 16; + + if (presym < 16) { + /* Explicit codeword length */ + d->u.l.lens[i++] = presym; + continue; + } + + /* Run-length encoded codeword lengths */ + + /* + * Note: we don't need to immediately verify that the + * repeat count doesn't overflow the number of elements, + * since we've sized the lens array to have enough extra + * space to allow for the worst-case overrun (138 zeroes + * when only 1 length was remaining). + * + * In the case of the small repeat counts (presyms 16 + * and 17), it is fastest to always write the maximum + * number of entries. That gets rid of branches that + * would otherwise be required. + * + * It is not just because of the numerical order that + * our checks go in the order 'presym < 16', 'presym == + * 16', and 'presym == 17'. For typical data this is + * ordered from most frequent to least frequent case. + */ + STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1); + + if (presym == 16) { + /* Repeat the previous length 3 - 6 times. */ + SAFETY_CHECK(i != 0); + rep_val = d->u.l.lens[i - 1]; + STATIC_ASSERT(3 + BITMASK(2) == 6); + rep_count = 3 + (bitbuf & BITMASK(2)); + bitbuf >>= 2; + bitsleft -= 2; + d->u.l.lens[i + 0] = rep_val; + d->u.l.lens[i + 1] = rep_val; + d->u.l.lens[i + 2] = rep_val; + d->u.l.lens[i + 3] = rep_val; + d->u.l.lens[i + 4] = rep_val; + d->u.l.lens[i + 5] = rep_val; + i += rep_count; + } else if (presym == 17) { + /* Repeat zero 3 - 10 times. */ + STATIC_ASSERT(3 + BITMASK(3) == 10); + rep_count = 3 + (bitbuf & BITMASK(3)); + bitbuf >>= 3; + bitsleft -= 3; + d->u.l.lens[i + 0] = 0; + d->u.l.lens[i + 1] = 0; + d->u.l.lens[i + 2] = 0; + d->u.l.lens[i + 3] = 0; + d->u.l.lens[i + 4] = 0; + d->u.l.lens[i + 5] = 0; + d->u.l.lens[i + 6] = 0; + d->u.l.lens[i + 7] = 0; + d->u.l.lens[i + 8] = 0; + d->u.l.lens[i + 9] = 0; + i += rep_count; + } else { + /* Repeat zero 11 - 138 times. */ + STATIC_ASSERT(11 + BITMASK(7) == 138); + rep_count = 11 + (bitbuf & BITMASK(7)); + bitbuf >>= 7; + bitsleft -= 7; + memset(&d->u.l.lens[i], 0, + rep_count * sizeof(d->u.l.lens[i])); + i += rep_count; + } + } while (i < num_litlen_syms + num_offset_syms); + + /* Unnecessary, but check this for consistency with zlib. */ + SAFETY_CHECK(i == num_litlen_syms + num_offset_syms); + + } else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) { + u16 len, nlen; + + /* + * Uncompressed block: copy 'len' bytes literally from the input + * buffer to the output buffer. + */ + + bitsleft -= 3; /* for BTYPE and BFINAL */ + + /* + * Align the bitstream to the next byte boundary. This means + * the next byte boundary as if we were reading a byte at a + * time. Therefore, we have to rewind 'in_next' by any bytes + * that have been refilled but not actually consumed yet (not + * counting overread bytes, which don't increment 'in_next'). + */ + bitsleft = (u8)bitsleft; + SAFETY_CHECK(overread_count <= (bitsleft >> 3)); + in_next -= (bitsleft >> 3) - overread_count; + overread_count = 0; + bitbuf = 0; + bitsleft = 0; + + SAFETY_CHECK(in_end - in_next >= 4); + len = get_unaligned_le16(in_next); + nlen = get_unaligned_le16(in_next + 2); + in_next += 4; + + SAFETY_CHECK(len == (u16)~nlen); + if (unlikely(len > out_end - out_next)) + return LIBDEFLATE_INSUFFICIENT_SPACE; + SAFETY_CHECK(len <= in_end - in_next); + + memcpy(out_next, in_next, len); + in_next += len; + out_next += len; + + goto block_done; + + } else { + unsigned i; + + SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN); + + /* + * Static Huffman block: build the decode tables for the static + * codes. Skip doing so if the tables are already set up from + * an earlier static block; this speeds up decompression of + * degenerate input of many empty or very short static blocks. + * + * Afterwards, the remainder is the same as decompressing a + * dynamic Huffman block. + */ + + bitbuf >>= 3; /* for BTYPE and BFINAL */ + bitsleft -= 3; + + if (d->static_codes_loaded) + goto have_decode_tables; + + d->static_codes_loaded = true; + + STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288); + STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32); + + for (i = 0; i < 144; i++) + d->u.l.lens[i] = 8; + for (; i < 256; i++) + d->u.l.lens[i] = 9; + for (; i < 280; i++) + d->u.l.lens[i] = 7; + for (; i < 288; i++) + d->u.l.lens[i] = 8; + + for (; i < 288 + 32; i++) + d->u.l.lens[i] = 5; + + num_litlen_syms = 288; + num_offset_syms = 32; + } + + /* Decompressing a Huffman block (either dynamic or static) */ + + SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms)); + SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms)); +have_decode_tables: + litlen_tablemask = BITMASK(d->litlen_tablebits); + + /* + * This is the "fastloop" for decoding literals and matches. It does + * bounds checks on in_next and out_next in the loop conditions so that + * additional bounds checks aren't needed inside the loop body. + * + * To reduce latency, the bitbuffer is refilled and the next litlen + * decode table entry is preloaded before each loop iteration. + */ + if (in_next >= in_fastloop_end || out_next >= out_fastloop_end) + goto generic_loop; + REFILL_BITS_IN_FASTLOOP(); + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + do { + u32 length, offset, lit; + const u8 *src; + u8 *dst; + + /* + * Consume the bits for the litlen decode table entry. Save the + * original bitbuf for later, in case the extra match length + * bits need to be extracted from it. + */ + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; /* optimization: subtract full entry */ + + /* + * Begin by checking for a "fast" literal, i.e. a literal that + * doesn't need a subtable. + */ + if (entry & HUFFDEC_LITERAL) { + /* + * On 64-bit platforms, we decode up to 2 extra fast + * literals in addition to the primary item, as this + * increases performance and still leaves enough bits + * remaining for what follows. We could actually do 3, + * assuming LITLEN_TABLEBITS=11, but that actually + * decreases performance slightly (perhaps by messing + * with the branch prediction of the conditional refill + * that happens later while decoding the match offset). + * + * Note: the definitions of FASTLOOP_MAX_BYTES_WRITTEN + * and FASTLOOP_MAX_BYTES_READ need to be updated if the + * number of extra literals decoded here is changed. + */ + if (/* enough bits for 2 fast literals + length + offset preload? */ + CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS + + LENGTH_MAXBITS, + OFFSET_TABLEBITS) && + /* enough bits for 2 fast literals + slow literal + litlen preload? */ + CAN_CONSUME_AND_THEN_PRELOAD(2 * LITLEN_TABLEBITS + + DEFLATE_MAX_LITLEN_CODEWORD_LEN, + LITLEN_TABLEBITS)) { + /* 1st extra fast literal */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + *out_next++ = lit; + if (entry & HUFFDEC_LITERAL) { + /* 2nd extra fast literal */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + *out_next++ = lit; + if (entry & HUFFDEC_LITERAL) { + /* + * Another fast literal, but + * this one is in lieu of the + * primary item, so it doesn't + * count as one of the extras. + */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + *out_next++ = lit; + continue; + } + } + } else { + /* + * Decode a literal. While doing so, preload + * the next litlen decode table entry and refill + * the bitbuffer. To reduce latency, we've + * arranged for there to be enough "preloadable" + * bits remaining to do the table preload + * independently of the refill. + */ + STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD( + LITLEN_TABLEBITS, LITLEN_TABLEBITS)); + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + *out_next++ = lit; + continue; + } + } + + /* + * It's not a literal entry, so it can be a length entry, a + * subtable pointer entry, or an end-of-block entry. Detect the + * two unlikely cases by testing the HUFFDEC_EXCEPTIONAL flag. + */ + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + /* Subtable pointer or end-of-block entry */ + + if (unlikely(entry & HUFFDEC_END_OF_BLOCK)) + goto block_done; + + /* + * A subtable is required. Load and consume the + * subtable entry. The subtable entry can be of any + * type: literal, length, or end-of-block. + */ + entry = d->u.litlen_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + + /* + * 32-bit platforms that use the byte-at-a-time refill + * method have to do a refill here for there to always + * be enough bits to decode a literal that requires a + * subtable, then preload the next litlen decode table + * entry; or to decode a match length that requires a + * subtable, then preload the offset decode table entry. + */ + if (!CAN_CONSUME_AND_THEN_PRELOAD(DEFLATE_MAX_LITLEN_CODEWORD_LEN, + LITLEN_TABLEBITS) || + !CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXBITS, + OFFSET_TABLEBITS)) + REFILL_BITS_IN_FASTLOOP(); + if (entry & HUFFDEC_LITERAL) { + /* Decode a literal that required a subtable. */ + lit = entry >> 16; + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + *out_next++ = lit; + continue; + } + if (unlikely(entry & HUFFDEC_END_OF_BLOCK)) + goto block_done; + /* Else, it's a length that required a subtable. */ + } + + /* + * Decode the match length: the length base value associated + * with the litlen symbol (which we extract from the decode + * table entry), plus the extra length bits. We don't need to + * consume the extra length bits here, as they were included in + * the bits consumed by the entry earlier. We also don't need + * to check for too-long matches here, as this is inside the + * fastloop where it's already been verified that the output + * buffer has enough space remaining to copy a max-length match. + */ + length = entry >> 16; + length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8); + + /* + * Decode the match offset. There are enough "preloadable" bits + * remaining to preload the offset decode table entry, but a + * refill might be needed before consuming it. + */ + STATIC_ASSERT(CAN_CONSUME_AND_THEN_PRELOAD(LENGTH_MAXFASTBITS, + OFFSET_TABLEBITS)); + entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)]; + if (CAN_CONSUME_AND_THEN_PRELOAD(OFFSET_MAXBITS, + LITLEN_TABLEBITS)) { + /* + * Decoding a match offset on a 64-bit platform. We may + * need to refill once, but then we can decode the whole + * offset and preload the next litlen table entry. + */ + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + /* Offset codeword requires a subtable */ + if (unlikely((u8)bitsleft < OFFSET_MAXBITS + + LITLEN_TABLEBITS - PRELOAD_SLACK)) + REFILL_BITS_IN_FASTLOOP(); + bitbuf >>= OFFSET_TABLEBITS; + bitsleft -= OFFSET_TABLEBITS; + entry = d->offset_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + } else if (unlikely((u8)bitsleft < OFFSET_MAXFASTBITS + + LITLEN_TABLEBITS - PRELOAD_SLACK)) + REFILL_BITS_IN_FASTLOOP(); + } else { + /* Decoding a match offset on a 32-bit platform */ + REFILL_BITS_IN_FASTLOOP(); + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + /* Offset codeword requires a subtable */ + bitbuf >>= OFFSET_TABLEBITS; + bitsleft -= OFFSET_TABLEBITS; + entry = d->offset_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + REFILL_BITS_IN_FASTLOOP(); + /* No further refill needed before extra bits */ + STATIC_ASSERT(CAN_CONSUME( + OFFSET_MAXBITS - OFFSET_TABLEBITS)); + } else { + /* No refill needed before extra bits */ + STATIC_ASSERT(CAN_CONSUME(OFFSET_MAXFASTBITS)); + } + } + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; /* optimization: subtract full entry */ + offset = entry >> 16; + offset += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8); + + /* Validate the match offset; needed even in the fastloop. */ + SAFETY_CHECK(offset <= out_next - (const u8 *)out); + src = out_next - offset; + dst = out_next; + out_next += length; + + /* + * Before starting to issue the instructions to copy the match, + * refill the bitbuffer and preload the litlen decode table + * entry for the next loop iteration. This can increase + * performance by allowing the latency of the match copy to + * overlap with these other operations. To further reduce + * latency, we've arranged for there to be enough bits remaining + * to do the table preload independently of the refill, except + * on 32-bit platforms using the byte-at-a-time refill method. + */ + if (!CAN_CONSUME_AND_THEN_PRELOAD( + MAX(OFFSET_MAXBITS - OFFSET_TABLEBITS, + OFFSET_MAXFASTBITS), + LITLEN_TABLEBITS) && + unlikely((u8)bitsleft < LITLEN_TABLEBITS - PRELOAD_SLACK)) + REFILL_BITS_IN_FASTLOOP(); + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + REFILL_BITS_IN_FASTLOOP(); + + /* + * Copy the match. On most CPUs the fastest method is a + * word-at-a-time copy, unconditionally copying about 5 words + * since this is enough for most matches without being too much. + * + * The normal word-at-a-time copy works for offset >= WORDBYTES, + * which is most cases. The case of offset == 1 is also common + * and is worth optimizing for, since it is just RLE encoding of + * the previous byte, which is the result of compressing long + * runs of the same byte. + * + * Writing past the match 'length' is allowed here, since it's + * been ensured there is enough output space left for a slight + * overrun. FASTLOOP_MAX_BYTES_WRITTEN needs to be updated if + * the maximum possible overrun here is changed. + */ + if (UNALIGNED_ACCESS_IS_FAST && offset >= WORDBYTES) { + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + while (dst < out_next) { + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + store_word_unaligned(load_word_unaligned(src), dst); + src += WORDBYTES; + dst += WORDBYTES; + } + } else if (UNALIGNED_ACCESS_IS_FAST && offset == 1) { + machine_word_t v; + + /* + * This part tends to get auto-vectorized, so keep it + * copying a multiple of 16 bytes at a time. + */ + v = (machine_word_t)0x0101010101010101 * src[0]; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + while (dst < out_next) { + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + store_word_unaligned(v, dst); + dst += WORDBYTES; + } + } else if (UNALIGNED_ACCESS_IS_FAST) { + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + do { + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + store_word_unaligned(load_word_unaligned(src), dst); + src += offset; + dst += offset; + } while (dst < out_next); + } else { + *dst++ = *src++; + *dst++ = *src++; + do { + *dst++ = *src++; + } while (dst < out_next); + } + } while (in_next < in_fastloop_end && out_next < out_fastloop_end); + + /* + * This is the generic loop for decoding literals and matches. This + * handles cases where in_next and out_next are close to the end of + * their respective buffers. Usually this loop isn't performance- + * critical, as most time is spent in the fastloop above instead. We + * therefore omit some optimizations here in favor of smaller code. + */ +generic_loop: + for (;;) { + u32 length, offset; + const u8 *src; + u8 *dst; + + REFILL_BITS(); + entry = d->u.litlen_decode_table[bitbuf & litlen_tablemask]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + if (unlikely(entry & HUFFDEC_SUBTABLE_POINTER)) { + entry = d->u.litlen_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + saved_bitbuf = bitbuf; + bitbuf >>= (u8)entry; + bitsleft -= entry; + } + length = entry >> 16; + if (entry & HUFFDEC_LITERAL) { + if (unlikely(out_next == out_end)) + return LIBDEFLATE_INSUFFICIENT_SPACE; + *out_next++ = length; + continue; + } + if (unlikely(entry & HUFFDEC_END_OF_BLOCK)) + goto block_done; + length += EXTRACT_VARBITS8(saved_bitbuf, entry) >> (u8)(entry >> 8); + if (unlikely(length > out_end - out_next)) + return LIBDEFLATE_INSUFFICIENT_SPACE; + + if (!CAN_CONSUME(LENGTH_MAXBITS + OFFSET_MAXBITS)) + REFILL_BITS(); + entry = d->offset_decode_table[bitbuf & BITMASK(OFFSET_TABLEBITS)]; + if (unlikely(entry & HUFFDEC_EXCEPTIONAL)) { + bitbuf >>= OFFSET_TABLEBITS; + bitsleft -= OFFSET_TABLEBITS; + entry = d->offset_decode_table[(entry >> 16) + + EXTRACT_VARBITS(bitbuf, (entry >> 8) & 0x3F)]; + if (!CAN_CONSUME(OFFSET_MAXBITS)) + REFILL_BITS(); + } + offset = entry >> 16; + offset += EXTRACT_VARBITS8(bitbuf, entry) >> (u8)(entry >> 8); + bitbuf >>= (u8)entry; + bitsleft -= entry; + + SAFETY_CHECK(offset <= out_next - (const u8 *)out); + src = out_next - offset; + dst = out_next; + out_next += length; + + STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3); + *dst++ = *src++; + *dst++ = *src++; + do { + *dst++ = *src++; + } while (dst < out_next); + } + +block_done: + /* Finished decoding a block */ + + if (!is_final_block) + goto next_block; + + /* That was the last block. */ + + bitsleft = (u8)bitsleft; + + /* + * If any of the implicit appended zero bytes were consumed (not just + * refilled) before hitting end of stream, then the data is bad. + */ + SAFETY_CHECK(overread_count <= (bitsleft >> 3)); + + /* Optionally return the actual number of bytes consumed. */ + if (actual_in_nbytes_ret) { + /* Don't count bytes that were refilled but not consumed. */ + in_next -= (bitsleft >> 3) - overread_count; + + *actual_in_nbytes_ret = in_next - (u8 *)in; + } + + /* Optionally return the actual number of bytes written. */ + if (actual_out_nbytes_ret) { + *actual_out_nbytes_ret = out_next - (u8 *)out; + } else { + if (out_next != out_end) + return LIBDEFLATE_SHORT_OUTPUT; + } + return LIBDEFLATE_SUCCESS; +} + +#undef FUNCNAME +#undef ATTRIBUTES +#undef EXTRACT_VARBITS +#undef EXTRACT_VARBITS8 + +#endif /* HAVE_BMI2_INTRIN */ + +#if defined(deflate_decompress_bmi2) && HAVE_BMI2_NATIVE +#define DEFAULT_IMPL deflate_decompress_bmi2 +#else +static inline decompress_func_t +arch_select_decompress_func(void) +{ +#ifdef deflate_decompress_bmi2 + if (HAVE_BMI2(get_x86_cpu_features())) + return deflate_decompress_bmi2; +#endif + return NULL; +} +#define arch_select_decompress_func arch_select_decompress_func +#endif + +#endif /* LIB_X86_DECOMPRESS_IMPL_H */ + +#endif + +#ifndef DEFAULT_IMPL +# define DEFAULT_IMPL deflate_decompress_default +#endif + +#ifdef arch_select_decompress_func +static enum libdeflate_result +dispatch_decomp(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret); + +static volatile decompress_func_t decompress_impl = dispatch_decomp; + +/* Choose the best implementation at runtime. */ +static enum libdeflate_result +dispatch_decomp(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret) +{ + decompress_func_t f = arch_select_decompress_func(); + + if (f == NULL) + f = DEFAULT_IMPL; + + decompress_impl = f; + return f(d, in, in_nbytes, out, out_nbytes_avail, + actual_in_nbytes_ret, actual_out_nbytes_ret); +} +#else +/* The best implementation is statically known, so call it directly. */ +# define decompress_impl DEFAULT_IMPL +#endif + +/* + * This is the main DEFLATE decompression routine. See libdeflate.h for the + * documentation. + * + * Note that the real code is in decompress_template.h. The part here just + * handles calling the appropriate implementation depending on the CPU features + * at runtime. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, + size_t *actual_out_nbytes_ret) +{ + return decompress_impl(d, in, in_nbytes, out, out_nbytes_avail, + actual_in_nbytes_ret, actual_out_nbytes_ret); +} + +LIBDEFLATEAPI enum libdeflate_result +libdeflate_deflate_decompress(struct libdeflate_decompressor *d, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_out_nbytes_ret) +{ + return libdeflate_deflate_decompress_ex(d, in, in_nbytes, + out, out_nbytes_avail, + NULL, actual_out_nbytes_ret); +} + +LIBDEFLATEAPI struct libdeflate_decompressor * +libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options) +{ + struct libdeflate_decompressor *d; + + /* + * Note: if more fields are added to libdeflate_options, this code will + * need to be updated to support both the old and new structs. + */ + if (options->sizeof_options != sizeof(*options)) + return NULL; + + d = (options->malloc_func ? options->malloc_func : + libdeflate_default_malloc_func)(sizeof(*d)); + if (d == NULL) + return NULL; + /* + * Note that only certain parts of the decompressor actually must be + * initialized here: + * + * - 'static_codes_loaded' must be initialized to false. + * + * - The first half of the main portion of each decode table must be + * initialized to any value, to avoid reading from uninitialized + * memory during table expansion in build_decode_table(). (Although, + * this is really just to avoid warnings with dynamic tools like + * valgrind, since build_decode_table() is guaranteed to initialize + * all entries eventually anyway.) + * + * - 'free_func' must be set. + * + * But for simplicity, we currently just zero the whole decompressor. + */ + memset(d, 0, sizeof(*d)); + d->free_func = options->free_func ? + options->free_func : libdeflate_default_free_func; + return d; +} + +LIBDEFLATEAPI struct libdeflate_decompressor * +libdeflate_alloc_decompressor(void) +{ + static const struct libdeflate_options defaults = { + .sizeof_options = sizeof(defaults), + }; + return libdeflate_alloc_decompressor_ex(&defaults); +} + +LIBDEFLATEAPI void +libdeflate_free_decompressor(struct libdeflate_decompressor *d) +{ + if (d) + d->free_func(d); +} + + +/* + * utils.c - utility functions for libdeflate + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifdef FREESTANDING +# define malloc NULL +# define free NULL +#else +# include +#endif + +malloc_func_t libdeflate_default_malloc_func = malloc; +free_func_t libdeflate_default_free_func = free; + +void * +libdeflate_aligned_malloc(malloc_func_t malloc_func, + size_t alignment, size_t size) +{ + void *ptr = (*malloc_func)(sizeof(void *) + alignment - 1 + size); + + if (ptr) { + void *orig_ptr = ptr; + + ptr = (void *)ALIGN((uintptr_t)ptr + sizeof(void *), alignment); + ((void **)ptr)[-1] = orig_ptr; + } + return ptr; +} + +void +libdeflate_aligned_free(free_func_t free_func, void *ptr) +{ + (*free_func)(((void **)ptr)[-1]); +} + +LIBDEFLATEAPI void +libdeflate_set_memory_allocator(malloc_func_t malloc_func, + free_func_t free_func) +{ + libdeflate_default_malloc_func = malloc_func; + libdeflate_default_free_func = free_func; +} + +/* + * Implementations of libc functions for freestanding library builds. + * Normal library builds don't use these. Not optimized yet; usually the + * compiler expands these functions and doesn't actually call them anyway. + */ +#ifdef FREESTANDING +#undef memset +void * __attribute__((weak)) +memset(void *s, int c, size_t n) +{ + u8 *p = s; + size_t i; + + for (i = 0; i < n; i++) + p[i] = c; + return s; +} + +#undef memcpy +void * __attribute__((weak)) +memcpy(void *dest, const void *src, size_t n) +{ + u8 *d = dest; + const u8 *s = src; + size_t i; + + for (i = 0; i < n; i++) + d[i] = s[i]; + return dest; +} + +#undef memmove +void * __attribute__((weak)) +memmove(void *dest, const void *src, size_t n) +{ + u8 *d = dest; + const u8 *s = src; + size_t i; + + if (d <= s) + return memcpy(d, s, n); + + for (i = n; i > 0; i--) + d[i - 1] = s[i - 1]; + return dest; +} + +#undef memcmp +int __attribute__((weak)) +memcmp(const void *s1, const void *s2, size_t n) +{ + const u8 *p1 = s1; + const u8 *p2 = s2; + size_t i; + + for (i = 0; i < n; i++) { + if (p1[i] != p2[i]) + return (int)p1[i] - (int)p2[i]; + } + return 0; +} +#endif /* FREESTANDING */ + +#ifdef LIBDEFLATE_ENABLE_ASSERTIONS +#include +#include +void +libdeflate_assertion_failed(const char *expr, const char *file, int line) +{ + fprintf(stderr, "Assertion failed: %s at %s:%d\n", expr, file, line); + abort(); +} +#endif /* LIBDEFLATE_ENABLE_ASSERTIONS */ + +/* + * x86/cpu_features.c - feature detection for x86 CPUs + * + * Copyright 2016 Eric Biggers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#if HAVE_DYNAMIC_X86_CPU_FEATURES + +/* + * With old GCC versions we have to manually save and restore the x86_32 PIC + * register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602 + */ +#if defined(ARCH_X86_32) && defined(__PIC__) +# define EBX_CONSTRAINT "=&r" +#else +# define EBX_CONSTRAINT "=b" +#endif + +/* Execute the CPUID instruction. */ +static inline void +cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d) +{ +#ifdef _MSC_VER + int result[4]; + + __cpuidex(result, leaf, subleaf); + *a = result[0]; + *b = result[1]; + *c = result[2]; + *d = result[3]; +#else + __asm__ volatile(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n" + "cpuid \n" + ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n" + : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d) + : "a" (leaf), "c" (subleaf)); +#endif +} + +/* Read an extended control register. */ +static inline u64 +read_xcr(u32 index) +{ +#ifdef _MSC_VER + return _xgetbv(index); +#else + u32 d, a; + + /* + * Execute the "xgetbv" instruction. Old versions of binutils do not + * recognize this instruction, so list the raw bytes instead. + * + * This must be 'volatile' to prevent this code from being moved out + * from under the check for OSXSAVE. + */ + __asm__ volatile(".byte 0x0f, 0x01, 0xd0" : + "=d" (d), "=a" (a) : "c" (index)); + + return ((u64)d << 32) | a; +#endif +} + +static const struct cpu_feature x86_cpu_feature_table[] = { + {X86_CPU_FEATURE_SSE2, "sse2"}, + {X86_CPU_FEATURE_PCLMUL, "pclmul"}, + {X86_CPU_FEATURE_AVX, "avx"}, + {X86_CPU_FEATURE_AVX2, "avx2"}, + {X86_CPU_FEATURE_BMI2, "bmi2"}, +}; + +volatile u32 libdeflate_x86_cpu_features = 0; + +/* Initialize libdeflate_x86_cpu_features. */ +void libdeflate_init_x86_cpu_features(void) +{ + u32 max_leaf, a, b, c, d; + u64 xcr0 = 0; + u32 features = 0; + + /* EAX=0: Highest Function Parameter and Manufacturer ID */ + cpuid(0, 0, &max_leaf, &b, &c, &d); + if (max_leaf < 1) + goto out; + + /* EAX=1: Processor Info and Feature Bits */ + cpuid(1, 0, &a, &b, &c, &d); + if (d & (1 << 26)) + features |= X86_CPU_FEATURE_SSE2; + if (c & (1 << 1)) + features |= X86_CPU_FEATURE_PCLMUL; + if (c & (1 << 27)) + xcr0 = read_xcr(0); + if ((c & (1 << 28)) && ((xcr0 & 0x6) == 0x6)) + features |= X86_CPU_FEATURE_AVX; + + if (max_leaf < 7) + goto out; + + /* EAX=7, ECX=0: Extended Features */ + cpuid(7, 0, &a, &b, &c, &d); + if ((b & (1 << 5)) && ((xcr0 & 0x6) == 0x6)) + features |= X86_CPU_FEATURE_AVX2; + if (b & (1 << 8)) + features |= X86_CPU_FEATURE_BMI2; + +out: + disable_cpu_features_for_testing(&features, x86_cpu_feature_table, + ARRAY_LEN(x86_cpu_feature_table)); + + libdeflate_x86_cpu_features = features | X86_CPU_FEATURES_KNOWN; +} + +#endif /* HAVE_DYNAMIC_X86_CPU_FEATURES */ diff --git a/Source/ThirdParty/OpenFBX/libdeflate.h b/Source/ThirdParty/OpenFBX/libdeflate.h new file mode 100644 index 000000000..382d895de --- /dev/null +++ b/Source/ThirdParty/OpenFBX/libdeflate.h @@ -0,0 +1,411 @@ +/* + * libdeflate.h - public header for libdeflate + */ + +#ifndef LIBDEFLATE_H +#define LIBDEFLATE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define LIBDEFLATE_VERSION_MAJOR 1 +#define LIBDEFLATE_VERSION_MINOR 18 +#define LIBDEFLATE_VERSION_STRING "1.18" + +/* + * Users of libdeflate.dll on Windows can define LIBDEFLATE_DLL to cause + * __declspec(dllimport) to be used. This should be done when it's easy to do. + * Otherwise it's fine to skip it, since it is a very minor performance + * optimization that is irrelevant for most use cases of libdeflate. + */ +#ifndef LIBDEFLATEAPI +# if defined(LIBDEFLATE_DLL) && (defined(_WIN32) || defined(__CYGWIN__)) +# define LIBDEFLATEAPI __declspec(dllimport) +# else +# define LIBDEFLATEAPI +# endif +#endif + +/* ========================================================================== */ +/* Compression */ +/* ========================================================================== */ + +struct libdeflate_compressor; +struct libdeflate_options; + +/* + * libdeflate_alloc_compressor() allocates a new compressor that supports + * DEFLATE, zlib, and gzip compression. 'compression_level' is the compression + * level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 = + * medium/default, 9 = slow, 12 = slowest). Level 0 is also supported and means + * "no compression", specifically "create a valid stream, but only emit + * uncompressed blocks" (this will expand the data slightly). + * + * The return value is a pointer to the new compressor, or NULL if out of memory + * or if the compression level is invalid (i.e. outside the range [0, 12]). + * + * Note: for compression, the sliding window size is defined at compilation time + * to 32768, the largest size permissible in the DEFLATE format. It cannot be + * changed at runtime. + * + * A single compressor is not safe to use by multiple threads concurrently. + * However, different threads may use different compressors concurrently. + */ +LIBDEFLATEAPI struct libdeflate_compressor * +libdeflate_alloc_compressor(int compression_level); + +/* + * Like libdeflate_alloc_compressor(), but adds the 'options' argument. + */ +LIBDEFLATEAPI struct libdeflate_compressor * +libdeflate_alloc_compressor_ex(int compression_level, + const struct libdeflate_options *options); + +/* + * libdeflate_deflate_compress() performs raw DEFLATE compression on a buffer of + * data. It attempts to compress 'in_nbytes' bytes of data located at 'in' and + * write the result to 'out', which has space for 'out_nbytes_avail' bytes. The + * return value is the compressed size in bytes, or 0 if the data could not be + * compressed to 'out_nbytes_avail' bytes or fewer (but see note below). + * + * If compression is successful, then the output data is guaranteed to be a + * valid DEFLATE stream that decompresses to the input data. No other + * guarantees are made about the output data. Notably, different versions of + * libdeflate can produce different compressed data for the same uncompressed + * data, even at the same compression level. Do ***NOT*** do things like + * writing tests that compare compressed data to a golden output, as this can + * break when libdeflate is updated. (This property isn't specific to + * libdeflate; the same is true for zlib and other compression libraries too.) + */ +LIBDEFLATEAPI size_t +libdeflate_deflate_compress(struct libdeflate_compressor *compressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail); + +/* + * libdeflate_deflate_compress_bound() returns a worst-case upper bound on the + * number of bytes of compressed data that may be produced by compressing any + * buffer of length less than or equal to 'in_nbytes' using + * libdeflate_deflate_compress() with the specified compressor. This bound will + * necessarily be a number greater than or equal to 'in_nbytes'. It may be an + * overestimate of the true upper bound. The return value is guaranteed to be + * the same for all invocations with the same compressor and same 'in_nbytes'. + * + * As a special case, 'compressor' may be NULL. This causes the bound to be + * taken across *any* libdeflate_compressor that could ever be allocated with + * this build of the library, with any options. + * + * Note that this function is not necessary in many applications. With + * block-based compression, it is usually preferable to separately store the + * uncompressed size of each block and to store any blocks that did not compress + * to less than their original size uncompressed. In that scenario, there is no + * need to know the worst-case compressed size, since the maximum number of + * bytes of compressed data that may be used would always be one less than the + * input length. You can just pass a buffer of that size to + * libdeflate_deflate_compress() and store the data uncompressed if + * libdeflate_deflate_compress() returns 0, indicating that the compressed data + * did not fit into the provided output buffer. + */ +LIBDEFLATEAPI size_t +libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor, + size_t in_nbytes); + +/* + * Like libdeflate_deflate_compress(), but uses the zlib wrapper format instead + * of raw DEFLATE. + */ +LIBDEFLATEAPI size_t +libdeflate_zlib_compress(struct libdeflate_compressor *compressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail); + +/* + * Like libdeflate_deflate_compress_bound(), but assumes the data will be + * compressed with libdeflate_zlib_compress() rather than with + * libdeflate_deflate_compress(). + */ +LIBDEFLATEAPI size_t +libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor, + size_t in_nbytes); + +/* + * Like libdeflate_deflate_compress(), but uses the gzip wrapper format instead + * of raw DEFLATE. + */ +LIBDEFLATEAPI size_t +libdeflate_gzip_compress(struct libdeflate_compressor *compressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail); + +/* + * Like libdeflate_deflate_compress_bound(), but assumes the data will be + * compressed with libdeflate_gzip_compress() rather than with + * libdeflate_deflate_compress(). + */ +LIBDEFLATEAPI size_t +libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor, + size_t in_nbytes); + +/* + * libdeflate_free_compressor() frees a compressor that was allocated with + * libdeflate_alloc_compressor(). If a NULL pointer is passed in, no action is + * taken. + */ +LIBDEFLATEAPI void +libdeflate_free_compressor(struct libdeflate_compressor *compressor); + +/* ========================================================================== */ +/* Decompression */ +/* ========================================================================== */ + +struct libdeflate_decompressor; +struct libdeflate_options; + +/* + * libdeflate_alloc_decompressor() allocates a new decompressor that can be used + * for DEFLATE, zlib, and gzip decompression. The return value is a pointer to + * the new decompressor, or NULL if out of memory. + * + * This function takes no parameters, and the returned decompressor is valid for + * decompressing data that was compressed at any compression level and with any + * sliding window size. + * + * A single decompressor is not safe to use by multiple threads concurrently. + * However, different threads may use different decompressors concurrently. + */ +LIBDEFLATEAPI struct libdeflate_decompressor * +libdeflate_alloc_decompressor(void); + +/* + * Like libdeflate_alloc_decompressor(), but adds the 'options' argument. + */ +LIBDEFLATEAPI struct libdeflate_decompressor * +libdeflate_alloc_decompressor_ex(const struct libdeflate_options *options); + +/* + * Result of a call to libdeflate_deflate_decompress(), + * libdeflate_zlib_decompress(), or libdeflate_gzip_decompress(). + */ +enum libdeflate_result { + /* Decompression was successful. */ + LIBDEFLATE_SUCCESS = 0, + + /* Decompression failed because the compressed data was invalid, + * corrupt, or otherwise unsupported. */ + LIBDEFLATE_BAD_DATA = 1, + + /* A NULL 'actual_out_nbytes_ret' was provided, but the data would have + * decompressed to fewer than 'out_nbytes_avail' bytes. */ + LIBDEFLATE_SHORT_OUTPUT = 2, + + /* The data would have decompressed to more than 'out_nbytes_avail' + * bytes. */ + LIBDEFLATE_INSUFFICIENT_SPACE = 3, +}; + +/* + * libdeflate_deflate_decompress() decompresses a DEFLATE stream from the buffer + * 'in' with compressed size up to 'in_nbytes' bytes. The uncompressed data is + * written to 'out', a buffer with size 'out_nbytes_avail' bytes. If + * decompression succeeds, then 0 (LIBDEFLATE_SUCCESS) is returned. Otherwise, + * a nonzero result code such as LIBDEFLATE_BAD_DATA is returned, and the + * contents of the output buffer are undefined. + * + * Decompression stops at the end of the DEFLATE stream (as indicated by the + * BFINAL flag), even if it is actually shorter than 'in_nbytes' bytes. + * + * libdeflate_deflate_decompress() can be used in cases where the actual + * uncompressed size is known (recommended) or unknown (not recommended): + * + * - If the actual uncompressed size is known, then pass the actual + * uncompressed size as 'out_nbytes_avail' and pass NULL for + * 'actual_out_nbytes_ret'. This makes libdeflate_deflate_decompress() fail + * with LIBDEFLATE_SHORT_OUTPUT if the data decompressed to fewer than the + * specified number of bytes. + * + * - If the actual uncompressed size is unknown, then provide a non-NULL + * 'actual_out_nbytes_ret' and provide a buffer with some size + * 'out_nbytes_avail' that you think is large enough to hold all the + * uncompressed data. In this case, if the data decompresses to less than + * or equal to 'out_nbytes_avail' bytes, then + * libdeflate_deflate_decompress() will write the actual uncompressed size + * to *actual_out_nbytes_ret and return 0 (LIBDEFLATE_SUCCESS). Otherwise, + * it will return LIBDEFLATE_INSUFFICIENT_SPACE if the provided buffer was + * not large enough but no other problems were encountered, or another + * nonzero result code if decompression failed for another reason. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_out_nbytes_ret); + +/* + * Like libdeflate_deflate_decompress(), but adds the 'actual_in_nbytes_ret' + * argument. If decompression succeeds and 'actual_in_nbytes_ret' is not NULL, + * then the actual compressed size of the DEFLATE stream (aligned to the next + * byte boundary) is written to *actual_in_nbytes_ret. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, + size_t *actual_out_nbytes_ret); + +/* + * Like libdeflate_deflate_decompress(), but assumes the zlib wrapper format + * instead of raw DEFLATE. + * + * Decompression will stop at the end of the zlib stream, even if it is shorter + * than 'in_nbytes'. If you need to know exactly where the zlib stream ended, + * use libdeflate_zlib_decompress_ex(). + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_out_nbytes_ret); + +/* + * Like libdeflate_zlib_decompress(), but adds the 'actual_in_nbytes_ret' + * argument. If 'actual_in_nbytes_ret' is not NULL and the decompression + * succeeds (indicating that the first zlib-compressed stream in the input + * buffer was decompressed), then the actual number of input bytes consumed is + * written to *actual_in_nbytes_ret. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_zlib_decompress_ex(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, + size_t *actual_out_nbytes_ret); + +/* + * Like libdeflate_deflate_decompress(), but assumes the gzip wrapper format + * instead of raw DEFLATE. + * + * If multiple gzip-compressed members are concatenated, then only the first + * will be decompressed. Use libdeflate_gzip_decompress_ex() if you need + * multi-member support. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_out_nbytes_ret); + +/* + * Like libdeflate_gzip_decompress(), but adds the 'actual_in_nbytes_ret' + * argument. If 'actual_in_nbytes_ret' is not NULL and the decompression + * succeeds (indicating that the first gzip-compressed member in the input + * buffer was decompressed), then the actual number of input bytes consumed is + * written to *actual_in_nbytes_ret. + */ +LIBDEFLATEAPI enum libdeflate_result +libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor, + const void *in, size_t in_nbytes, + void *out, size_t out_nbytes_avail, + size_t *actual_in_nbytes_ret, + size_t *actual_out_nbytes_ret); + +/* + * libdeflate_free_decompressor() frees a decompressor that was allocated with + * libdeflate_alloc_decompressor(). If a NULL pointer is passed in, no action + * is taken. + */ +LIBDEFLATEAPI void +libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor); + +/* ========================================================================== */ +/* Checksums */ +/* ========================================================================== */ + +/* + * libdeflate_adler32() updates a running Adler-32 checksum with 'len' bytes of + * data and returns the updated checksum. When starting a new checksum, the + * required initial value for 'adler' is 1. This value is also returned when + * 'buffer' is specified as NULL. + */ +LIBDEFLATEAPI uint32_t +libdeflate_adler32(uint32_t adler, const void *buffer, size_t len); + + +/* + * libdeflate_crc32() updates a running CRC-32 checksum with 'len' bytes of data + * and returns the updated checksum. When starting a new checksum, the required + * initial value for 'crc' is 0. This value is also returned when 'buffer' is + * specified as NULL. + */ +LIBDEFLATEAPI uint32_t +libdeflate_crc32(uint32_t crc, const void *buffer, size_t len); + +/* ========================================================================== */ +/* Custom memory allocator */ +/* ========================================================================== */ + +/* + * Install a custom memory allocator which libdeflate will use for all memory + * allocations by default. 'malloc_func' is a function that must behave like + * malloc(), and 'free_func' is a function that must behave like free(). + * + * The per-(de)compressor custom memory allocator that can be specified in + * 'struct libdeflate_options' takes priority over this. + * + * This doesn't affect the free() function that will be used to free + * (de)compressors that were already in existence when this is called. + */ +LIBDEFLATEAPI void +libdeflate_set_memory_allocator(void *(*malloc_func)(size_t), + void (*free_func)(void *)); + +/* + * Advanced options. This is the options structure that + * libdeflate_alloc_compressor_ex() and libdeflate_alloc_decompressor_ex() + * require. Most users won't need this and should just use the non-"_ex" + * functions instead. If you do need this, it should be initialized like this: + * + * struct libdeflate_options options; + * + * memset(&options, 0, sizeof(options)); + * options.sizeof_options = sizeof(options); + * // Then set the fields that you need to override the defaults for. + */ +struct libdeflate_options { + + /* + * This field must be set to the struct size. This field exists for + * extensibility, so that fields can be appended to this struct in + * future versions of libdeflate while still supporting old binaries. + */ + size_t sizeof_options; + + /* + * An optional custom memory allocator to use for this (de)compressor. + * 'malloc_func' must be a function that behaves like malloc(), and + * 'free_func' must be a function that behaves like free(). + * + * This is useful in cases where a process might have multiple users of + * libdeflate who want to use different memory allocators. For example, + * a library might want to use libdeflate with a custom memory allocator + * without interfering with user code that might use libdeflate too. + * + * This takes priority over the "global" memory allocator (which by + * default is malloc() and free(), but can be changed by + * libdeflate_set_memory_allocator()). Moreover, libdeflate will never + * call the "global" memory allocator if a per-(de)compressor custom + * allocator is always given. + */ + void *(*malloc_func)(size_t); + void (*free_func)(void *); +}; + +#ifdef __cplusplus +} +#endif + +#endif /* LIBDEFLATE_H */ diff --git a/Source/ThirdParty/OpenFBX/ofbx.cpp b/Source/ThirdParty/OpenFBX/ofbx.cpp index e60211e3d..9f75a2e6b 100644 --- a/Source/ThirdParty/OpenFBX/ofbx.cpp +++ b/Source/ThirdParty/OpenFBX/ofbx.cpp @@ -1,5 +1,5 @@ #include "ofbx.h" -#include "miniz.h" +#include "libdeflate.h" #include #include #include @@ -8,11 +8,33 @@ #include #include #include +#include +#include +#include +#if __cplusplus >= 202002L +#include // for std::bit_cast (C++20 and later) +#endif +#include namespace ofbx { +static int decodeIndex(int idx) +{ + return (idx < 0) ? (-idx - 1) : idx; +} + +static int codeIndex(int idx, bool last) +{ + return last ? (-idx - 1) : idx; +} + +template +static T& emplace_back(std::vector& vec) { + vec.emplace_back(); + return vec.back(); +} struct Allocator { struct Page { @@ -25,11 +47,12 @@ struct Allocator { Page* first = nullptr; ~Allocator() { - while (first) { - Page* page = first; - first = first->header.next; - delete page; - } + Page* p = first; + while (p) { + Page* n = p->header.next; + delete p; + p = n; + } } template T* allocate(Args&&... args) @@ -42,7 +65,7 @@ struct Allocator { if (p->header.offset % alignof(T) != 0) { p->header.offset += alignof(T) - p->header.offset % alignof(T); } - + if (p->header.offset + sizeof(T) > sizeof(p->data)) { p = new Page; p->header.next = first; @@ -52,37 +75,35 @@ struct Allocator { p->header.offset += sizeof(T); return res; } - - // store temporary data, can be reused - std::vector tmp; - std::vector int_tmp; - std::vector vec3_tmp; - std::vector double_tmp; - std::vector vec3_tmp2; -}; - - -struct Temporaries { - std::vector f; - std::vector i; - std::vector v2; - std::vector v3; - std::vector v4; }; struct Video { + IElementProperty* base64_property = nullptr; DataView filename; DataView content; DataView media; + bool is_base_64; }; struct Error { Error() {} - Error(const char* msg) { s_message = msg; } + Error(const char* msg) + { + s_message = msg; + } + + // Format a message with printf-style arguments. + template + Error(const char* fmt, Args... args) + { + char buf[1024]; + std::snprintf(buf, sizeof(buf), fmt, args...); + s_message = buf; + } static const char* s_message; }; @@ -151,7 +172,7 @@ struct Cursor }; -static void setTranslation(const Vec3& t, Matrix* mtx) +static void setTranslation(const DVec3& t, DMatrix* mtx) { mtx->m[12] = t.x; mtx->m[13] = t.y; @@ -159,15 +180,15 @@ static void setTranslation(const Vec3& t, Matrix* mtx) } -static Vec3 operator-(const Vec3& v) +static DVec3 operator-(const DVec3& v) { return {-v.x, -v.y, -v.z}; } -static Matrix operator*(const Matrix& lhs, const Matrix& rhs) +static DMatrix operator*(const DMatrix& lhs, const DMatrix& rhs) { - Matrix res; + DMatrix res; for (int j = 0; j < 4; ++j) { for (int i = 0; i < 4; ++i) @@ -184,15 +205,15 @@ static Matrix operator*(const Matrix& lhs, const Matrix& rhs) } -static Matrix makeIdentity() +static DMatrix makeIdentity() { return {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}; } -static Matrix rotationX(double angle) +static DMatrix rotationX(double angle) { - Matrix m = makeIdentity(); + DMatrix m = makeIdentity(); double c = cos(angle); double s = sin(angle); @@ -204,9 +225,9 @@ static Matrix rotationX(double angle) } -static Matrix rotationY(double angle) +static DMatrix rotationY(double angle) { - Matrix m = makeIdentity(); + DMatrix m = makeIdentity(); double c = cos(angle); double s = sin(angle); @@ -218,9 +239,9 @@ static Matrix rotationY(double angle) } -static Matrix rotationZ(double angle) +static DMatrix rotationZ(double angle) { - Matrix m = makeIdentity(); + DMatrix m = makeIdentity(); double c = cos(angle); double s = sin(angle); @@ -232,12 +253,12 @@ static Matrix rotationZ(double angle) } -static Matrix getRotationMatrix(const Vec3& euler, RotationOrder order) +static DMatrix getRotationMatrix(const DVec3& euler, RotationOrder order) { const double TO_RAD = 3.1415926535897932384626433832795028 / 180.0; - Matrix rx = rotationX(euler.x * TO_RAD); - Matrix ry = rotationY(euler.y * TO_RAD); - Matrix rz = rotationZ(euler.z * TO_RAD); + DMatrix rx = rotationX(euler.x * TO_RAD); + DMatrix ry = rotationY(euler.y * TO_RAD); + DMatrix rz = rotationZ(euler.z * TO_RAD); switch (order) { default: @@ -264,13 +285,18 @@ i64 secondsToFbxTime(double value) } -static Vec3 operator*(const Vec3& v, float f) +static DVec3 operator*(const DVec3& v, float f) { return {v.x * f, v.y * f, v.z * f}; } -static Vec3 operator+(const Vec3& a, const Vec3& b) +static DVec3 operator+(const DVec3& a, const DVec3& b) +{ + return {a.x + b.x, a.y + b.y, a.z + b.z}; +} + +static FVec3 operator+(const FVec3& a, const FVec3& b) { return {a.x + b.x, a.y + b.y, a.z + b.z}; } @@ -300,7 +326,9 @@ u64 DataView::toU64() const if (is_binary) { assert(end - begin == sizeof(u64)); - return *(u64*)begin; + u64 result; + memcpy(&result, begin, sizeof(u64)); + return result; } static_assert(sizeof(unsigned long long) >= sizeof(u64), "can't use strtoull"); return strtoull((const char*)begin, nullptr, 10); @@ -312,7 +340,9 @@ i64 DataView::toI64() const if (is_binary) { assert(end - begin == sizeof(i64)); - return *(i64*)begin; + i64 result; + memcpy(&result, begin, sizeof(i64)); + return result; } static_assert(sizeof(long long) >= sizeof(i64), "can't use atoll"); return atoll((const char*)begin); @@ -324,7 +354,9 @@ int DataView::toInt() const if (is_binary) { assert(end - begin == sizeof(int)); - return *(int*)begin; + int result; + memcpy(&result, begin, sizeof(int)); + return result; } return atoi((const char*)begin); } @@ -335,18 +367,27 @@ u32 DataView::toU32() const if (is_binary) { assert(end - begin == sizeof(u32)); - return *(u32*)begin; + u32 result; + memcpy(&result, begin, sizeof(u32)); + return result; } return (u32)atoll((const char*)begin); } +bool DataView::toBool() const +{ + return toInt() != 0; +} + double DataView::toDouble() const { if (is_binary) { assert(end - begin == sizeof(double)); - return *(double*)begin; + double result; + memcpy(&result, begin, sizeof(double)); + return result; } return atof((const char*)begin); } @@ -357,7 +398,9 @@ float DataView::toFloat() const if (is_binary) { assert(end - begin == sizeof(float)); - return *(float*)begin; + float result; + memcpy(&result, begin, sizeof(float)); + return result; } return (float)atof((const char*)begin); } @@ -374,15 +417,33 @@ bool DataView::operator==(const char* rhs) const ++c; ++c2; } - return c2 == (const char*)end && *c == '\0'; + return *c2 == '\0' || c2 == (const char*)end && *c == '\0'; } struct Property; -template static bool parseArrayRaw(const Property& property, T* out, int max_size); -template static bool parseBinaryArray(const Property& property, std::vector* out); +struct Element; + +template static bool parseMemory(const Property& property, T* out, int max_size_bytes); +template static bool parseVecData(Property& property, std::vector* out_vec); +template static bool parseVertexData(const Element& element, const char* name, const char* index_name, T& out, std::vector& jobs); static bool parseDouble(Property& property, double* out); +struct ParseDataJob { + using F = bool (*)(Property*, void*); + Property* property = nullptr; + void* data = nullptr; + bool error = false; + F f; +}; + +template [[nodiscard]] bool pushJob(std::vector& jobs, Property& prop, std::vector& data) { + ParseDataJob& job = emplace_back(jobs); + job.property = ∝ + job.data = (void*)&data; + job.f = [](Property* prop, void* data){ return parseVecData(*prop, (std::vector*)data); }; + return true; +} struct Property : IElementProperty { @@ -394,20 +455,22 @@ struct Property : IElementProperty assert(type == ARRAY_DOUBLE || type == ARRAY_INT || type == ARRAY_FLOAT || type == ARRAY_LONG); if (value.is_binary) { - return int(*(u32*)value.begin); + int i; + memcpy(&i, value.begin, sizeof(i)); + return i; } return count; } - bool getValues(double* values, int max_size) const override { return parseArrayRaw(*this, values, max_size); } + bool getValues(double* values, int max_size) const override { return parseMemory(*this, values, max_size); } - bool getValues(float* values, int max_size) const override { return parseArrayRaw(*this, values, max_size); } + bool getValues(float* values, int max_size) const override { return parseMemory(*this, values, max_size); } - bool getValues(u64* values, int max_size) const override { return parseArrayRaw(*this, values, max_size); } + bool getValues(u64* values, int max_size) const override { return parseMemory(*this, values, max_size); } - bool getValues(i64* values, int max_size) const override { return parseArrayRaw(*this, values, max_size); } + bool getValues(i64* values, int max_size) const override { return parseMemory(*this, values, max_size); } - bool getValues(int* values, int max_size) const override { return parseArrayRaw(*this, values, max_size); } + bool getValues(int* values, int max_size) const override { return parseMemory(*this, values, max_size); } int count = 0; u8 type = INTEGER; @@ -415,7 +478,6 @@ struct Property : IElementProperty Property* next = nullptr; }; - struct Element : IElement { IElement* getFirstChild() const override { return child; } @@ -452,10 +514,15 @@ static const Element* findChild(const Element& element, const char* id) } -static IElement* resolveProperty(const Object& obj, const char* name) +static IElement* resolveProperty(const Object& obj, const char* name, bool* is_p60) { + *is_p60 = false; const Element* props = findChild((const Element&)obj.element, "Properties70"); - if (!props) return nullptr; + if (!props) { + props = findChild((const Element&)obj.element, "Properties60"); + *is_p60 = true; + if (!props) return nullptr; + } Element* prop = props->child; while (prop) @@ -472,59 +539,47 @@ static IElement* resolveProperty(const Object& obj, const char* name) static int resolveEnumProperty(const Object& object, const char* name, int default_value) { - Element* element = (Element*)resolveProperty(object, name); + bool is_p60; + Element* element = (Element*)resolveProperty(object, name, &is_p60); if (!element) return default_value; - Property* x = (Property*)element->getProperty(4); + Property* x = (Property*)element->getProperty(is_p60 ? 3 : 4); if (!x) return default_value; return x->value.toInt(); } -static Vec3 resolveVec3Property(const Object& object, const char* name, const Vec3& default_value) +static DVec3 resolveVec3Property(const Object& object, const char* name, const DVec3& default_value) { - Element* element = (Element*)resolveProperty(object, name); + bool is_p60; + Element* element = (Element*)resolveProperty(object, name, &is_p60); if (!element) return default_value; - Property* x = (Property*)element->getProperty(4); + Property* x = (Property*)element->getProperty(is_p60 ? 3 : 4); if (!x || !x->next || !x->next->next) return default_value; return {x->value.toDouble(), x->next->value.toDouble(), x->next->next->value.toDouble()}; } - -Object::Object(const Scene& _scene, const IElement& _element) - : scene(_scene) - , element(_element) - , is_node(false) - , node_attribute(nullptr) +static bool isString(const Property* prop) { - auto& e = (Element&)_element; - if (e.first_property && e.first_property->next) - { - e.first_property->next->value.toString(name); - } - else - { - name[0] = '\0'; - } + if (!prop) return false; + return prop->getType() == Property::STRING; } +static bool isLong(const Property* prop) +{ + if (!prop) return false; + return prop->getType() == Property::LONG; +} + static bool decompress(const u8* in, size_t in_size, u8* out, size_t out_size) { - mz_stream stream = {}; - mz_inflateInit(&stream); - - stream.avail_in = (int)in_size; - stream.next_in = in; - stream.avail_out = (int)out_size; - stream.next_out = out; - - int status = mz_inflate(&stream, Z_SYNC_FLUSH); - - if (status != Z_STREAM_END) return false; - - return mz_inflateEnd(&stream) == Z_OK; + auto dec = libdeflate_alloc_decompressor(); + size_t dummy; + bool res = libdeflate_deflate_decompress(dec, in + 2, in_size - 2, out, out_size, &dummy) == LIBDEFLATE_SUCCESS; + libdeflate_free_decompressor(dec); + return res; } @@ -568,6 +623,20 @@ static OptionalError readLongString(Cursor* cursor) return value; } +// Cheat sheet: // +/* +'S': Long string +'Y': 16-bit signed integer +'C': 8-bit signed integer +'I': 32-bit signed integer +'F': Single precision floating-point number +'D': Double precision floating-point number +'L': 64-bit signed integer +'R': Binary data +'b', 'f', 'd', 'l', 'c' and 'i': Arrays of binary data + +Src: https://code.blender.org/2013/08/fbx-binary-file-format-specification/ +*/ static OptionalError readProperty(Cursor* cursor, Allocator& allocator) { @@ -603,6 +672,7 @@ static OptionalError readProperty(Cursor* cursor, Allocator& allocato break; } case 'b': + case 'c': case 'f': case 'd': case 'l': @@ -616,14 +686,18 @@ static OptionalError readProperty(Cursor* cursor, Allocator& allocato cursor->current += comp_len.getValue(); break; } - default: return Error("Unknown property type"); + default: + { + char str[32]; + snprintf(str, sizeof(str), "Unknown property type: %c", prop->type); + return Error(str); + } } prop->value.end = cursor->current; return prop; } - -static OptionalError readElementOffset(Cursor* cursor, u16 version) +static OptionalError readElementOffset(Cursor* cursor, u32 version) { if (version >= 7500) { @@ -685,7 +759,7 @@ static OptionalError readElement(Cursor* cursor, u32 version, Allocato } *link = child.getValue(); - if (child.getValue() == 0) break; + if (child.getValue() == 0) break; link = &(*link)->sibling; } @@ -701,13 +775,13 @@ static OptionalError readElement(Cursor* cursor, u32 version, Allocato static bool isEndLine(const Cursor& cursor) { - return *cursor.current == '\n'; + return *cursor.current == '\n' || *cursor.current == '\r' && cursor.current + 1 < cursor.end && *(cursor.current + 1) != '\n'; } static void skipInsignificantWhitespaces(Cursor* cursor) { - while (cursor->current < cursor->end && isspace(*cursor->current) && *cursor->current != '\n') + while (cursor->current < cursor->end && isspace(*cursor->current) && !isEndLine(*cursor)) { ++cursor->current; } @@ -824,6 +898,14 @@ static OptionalError readTextProperty(Cursor* cursor, Allocator& allo return prop; } + if (*cursor->current == ',') { + // https://github.com/nem0/OpenFBX/issues/85 + prop->type = IElementProperty::NONE; + prop->value.begin = cursor->current; + prop->value.end = cursor->current; + return prop; + } + if (*cursor->current == '*') { prop->type = 'l'; @@ -845,7 +927,7 @@ static OptionalError readTextProperty(Cursor* cursor, Allocator& allo if (is_any) ++prop->count; is_any = false; } - else if (!isspace(*cursor->current) && *cursor->current != '\n') + else if (!isspace(*cursor->current) && !isEndLine(*cursor)) is_any = true; if (*cursor->current == '.') prop->type = 'd'; ++cursor->current; @@ -857,7 +939,7 @@ static OptionalError readTextProperty(Cursor* cursor, Allocator& allo } assert(false); - return Error("TODO"); + return Error("Unknown error"); } @@ -875,7 +957,7 @@ static OptionalError readTextElement(Cursor* cursor, Allocator& alloca element->id = id; Property** prop_link = &element->first_property; - while (cursor->current < cursor->end && *cursor->current != '\n' && *cursor->current != '{') + while (cursor->current < cursor->end && !isEndLine(*cursor) && *cursor->current != '{') { OptionalError prop = readTextProperty(cursor, allocator); if (prop.isError()) @@ -954,15 +1036,23 @@ static OptionalError tokenizeText(const u8* data, size_t size, Allocat } -static OptionalError tokenize(const u8* data, size_t size, u32& version, Allocator& allocator) -{ +static OptionalError tokenize(const u8* data, size_t size, u32& version, Allocator& allocator) { + if (size < sizeof(Header)) return Error("Invalid header"); + Cursor cursor; cursor.begin = data; cursor.current = data; cursor.end = data + size; - const Header* header = (const Header*)cursor.current; - cursor.current += sizeof(*header); +#if __cplusplus >= 202002L + const Header* header = std::bit_cast(cursor.current); +#else + Header header_temp; + memcpy(&header_temp, cursor.current, sizeof(Header)); + const Header* header = &header_temp; +#endif + + cursor.current += sizeof(Header); version = header->version; Element* root = allocator.allocate(); @@ -976,16 +1066,17 @@ static OptionalError tokenize(const u8* data, size_t size, u32& versio for (;;) { OptionalError child = readElement(&cursor, header->version, allocator); - if (child.isError()) { + if (child.isError()) + { return Error(); } + *element = child.getValue(); if (!*element) return root; element = &(*element)->sibling; } } - static void parseTemplates(const Element& root) { const Element* defs = findChild(root, "Definitions"); @@ -1019,12 +1110,204 @@ static void parseTemplates(const Element& root) struct Scene; +enum class VertexDataMapping { + BY_POLYGON_VERTEX, + BY_POLYGON, + BY_VERTEX +}; + +struct Vec2AttributesImpl { + std::vector values; + std::vector indices; + VertexDataMapping mapping; + operator Vec2Attributes() const { + return { values.data(), indices.data(), int(indices.empty() ? values.size() : indices.size()) }; + } +}; + +struct Vec3AttributesImpl { + std::vector values; + std::vector indices; + VertexDataMapping mapping; + operator Vec3Attributes() const { + return { values.data(), indices.data(), int(indices.empty() ? values.size() : indices.size()), int(values.size()) }; + } +}; + +struct Vec4AttributesImpl { + std::vector values; + std::vector indices; + VertexDataMapping mapping; + operator Vec4Attributes() const { + return { values.data(), indices.data(), int(indices.empty() ? values.size() : indices.size()) }; + } +}; + +struct GeometryPartitionImpl { + std::vector polygons; + int max_polygon_triangles = 0; + int triangles_count = 0; +}; + +struct GeometryDataImpl : GeometryData { + Vec3AttributesImpl positions; + Vec3AttributesImpl normals; + Vec3AttributesImpl tangents; + Vec4AttributesImpl colors; + Vec2AttributesImpl uvs[Geometry::s_uvs_max]; + std::vector partitions; + + std::vector materials; + + template + T patchAttributes(const S& attr) const { + T res = attr; + if (!attr.values.empty() && attr.mapping == VertexDataMapping::BY_VERTEX && attr.indices.empty()) { + res.indices = positions.indices.data(); + res.count = int(positions.indices.size()); + } + return res; + } + + Vec3Attributes getPositions() const override { return positions; } + Vec3Attributes getNormals() const override { return patchAttributes(normals); } + Vec2Attributes getUVs(int index) const override { return patchAttributes(uvs[index]); } + Vec4Attributes getColors() const override { return patchAttributes(colors); } + Vec3Attributes getTangents() const override { return patchAttributes(tangents); } + int getPartitionCount() const override { return (int)partitions.size(); } + + GeometryPartition getPartition(int index) const override { + if (index >= partitions.size()) return {nullptr, 0, 0, 0}; + return { + partitions[index].polygons.data(), + int(partitions[index].polygons.size()), + partitions[index].max_polygon_triangles, + partitions[index].triangles_count + }; + } + + template + bool postprocess(T& attr) { + if (attr.values.empty()) return true; + if (attr.mapping == VertexDataMapping::BY_VERTEX && !attr.indices.empty()) { + if (positions.indices.empty()) return false; // not supported + + std::vector remapped; + attr.mapping = VertexDataMapping::BY_POLYGON_VERTEX; + remapped.resize(positions.indices.size()); + for (int i = 0; i < remapped.size(); ++i) { + remapped[i] = attr.indices[decodeIndex(positions.indices[i])]; + } + attr.indices = remapped; + } + else if (attr.mapping == VertexDataMapping::BY_POLYGON) { + if (!attr.indices.empty()) return false; // not supported + if (partitions.size() != 1) return false; // not supported + if (partitions[0].polygons.size() != attr.values.size()) return false; // invalid + + std::vector remapped; + attr.mapping = VertexDataMapping::BY_POLYGON_VERTEX; + remapped.resize(positions.indices.size()); + + for (int i = 0, c = (int)partitions[0].polygons.size(); i < c; ++i) { + GeometryPartition::Polygon& polygon = partitions[0].polygons[i]; + for (int j = polygon.from_vertex; j < polygon.from_vertex + polygon.vertex_count; ++j) { + remapped[j] = i; + } + } + attr.indices = remapped; + } + return true; + } + + bool postprocess() { + if (materials.empty()) { + GeometryPartitionImpl& partition = emplace_back(partitions); + int polygon_count = 0; + for (int i : positions.indices) { + if (i < 0) ++polygon_count; + } + partition.polygons.reserve(polygon_count); + int polygon_start = 0; + int max_polygon_triangles = 0; + int total_triangles = 0; + int* indices = positions.indices.data(); + for (int i = 0, c = (int)positions.indices.size(); i < c; ++i) { + if (indices[i] < 0) { + int vertex_count = i - polygon_start + 1; + if (vertex_count > 2) { + partition.polygons.push_back({polygon_start, vertex_count}); + indices[i] = -indices[i] - 1; + int triangles = vertex_count - 2; + total_triangles += triangles; + if (triangles > max_polygon_triangles) max_polygon_triangles = triangles; + } + polygon_start = i + 1; + } + } + partition.max_polygon_triangles = max_polygon_triangles; + partition.triangles_count = total_triangles; + } + else { + int max_partition = 0; + for (int m : materials) { + if (m > max_partition) max_partition = m; + } + partitions.resize(max_partition + 1); + + u32 polygon_idx = 0; + int* indices = positions.indices.data(); + int num_polygon_vertices = 0; + int polygon_start = 0; + for (int i = 0, c = (int)positions.indices.size(); i < c; ++i) { + ++num_polygon_vertices; + if (indices[i] < 0) { + u32 material_index = materials[polygon_idx]; + GeometryPartitionImpl& partition = partitions[material_index]; + partition.polygons.push_back({polygon_start, num_polygon_vertices}); + + int triangles = num_polygon_vertices - 2; + partition.triangles_count += triangles; + if (triangles > partition.max_polygon_triangles) partition.max_polygon_triangles = triangles; + + indices[i] = -indices[i] - 1; + + polygon_start = i + 1; + ++polygon_idx; + num_polygon_vertices = 0; + } + } + } + + postprocess(normals); + postprocess(tangents); + for (Vec2AttributesImpl& uv : uvs) postprocess(uv); + postprocess(colors); + + return true; + } +}; + Mesh::Mesh(const Scene& _scene, const IElement& _element) : Object(_scene, _element) { } +struct GeometryImpl : Geometry, GeometryDataImpl { + const Skin* skin = nullptr; + const BlendShape* blendShape = nullptr; + + GeometryImpl(const Scene& _scene, const IElement& _element) + : Geometry(_scene, _element) + { + } + + Type getType() const override { return Type::GEOMETRY; } + const GeometryData& getGeometryData() const override { return *this; } + const Skin* getSkin() const override { return skin; } + const BlendShape* getBlendShape() const override { return blendShape; } +}; struct MeshImpl : Mesh { @@ -1035,35 +1318,41 @@ struct MeshImpl : Mesh } - Matrix getGeometricMatrix() const override + DMatrix getGeometricMatrix() const override { - Vec3 translation = resolveVec3Property(*this, "GeometricTranslation", {0, 0, 0}); - Vec3 rotation = resolveVec3Property(*this, "GeometricRotation", {0, 0, 0}); - Vec3 scale = resolveVec3Property(*this, "GeometricScaling", {1, 1, 1}); + DVec3 translation = resolveVec3Property(*this, "GeometricTranslation", {0, 0, 0}); + DVec3 rotation = resolveVec3Property(*this, "GeometricRotation", {0, 0, 0}); + DVec3 scale = resolveVec3Property(*this, "GeometricScaling", {1, 1, 1}); - Matrix scale_mtx = makeIdentity(); + DMatrix scale_mtx = makeIdentity(); scale_mtx.m[0] = (float)scale.x; scale_mtx.m[5] = (float)scale.y; scale_mtx.m[10] = (float)scale.z; - Matrix mtx = getRotationMatrix(rotation, RotationOrder::EULER_XYZ); + DMatrix mtx = getRotationMatrix(rotation, RotationOrder::EULER_XYZ); setTranslation(translation, &mtx); return scale_mtx * mtx; } - Type getType() const override { return Type::MESH; } - const Pose* getPose() const override { return pose; } const Geometry* getGeometry() const override { return geometry; } const Material* getMaterial(int index) const override { return materials[index]; } int getMaterialCount() const override { return (int)materials.size(); } + const GeometryData& getGeometryData() const override { return geometry ? static_cast(*geometry) : geometry_data; } + const Skin* getSkin() const override { return geometry ? geometry->getSkin() : skin; } + const BlendShape* getBlendShape() const override { return geometry ? geometry->getBlendShape() : blendShape; } const Pose* pose = nullptr; - const Geometry* geometry = nullptr; + const GeometryImpl* geometry = nullptr; std::vector materials; + const Skin* skin = nullptr; + const BlendShape* blendShape = nullptr; + + // old formats do not use Geometry nodes but embed vertex data directly in Mesh + GeometryDataImpl geometry_data; }; @@ -1086,34 +1375,34 @@ struct MaterialImpl : Material const Texture* getTexture(Texture::TextureType type) const override { return textures[type]; } Color getDiffuseColor() const override { return diffuse_color; } Color getSpecularColor() const override { return specular_color; } - Color getReflectionColor() const override { return reflection_color; }; - Color getAmbientColor() const override { return ambient_color; }; - Color getEmissiveColor() const override { return emissive_color; }; - - double getDiffuseFactor() const override { return diffuse_factor; }; - double getSpecularFactor() const override { return specular_factor; }; - double getReflectionFactor() const override { return reflection_factor; }; - double getShininess() const override { return shininess; }; - double getShininessExponent() const override { return shininess_exponent; }; - double getAmbientFactor() const override { return ambient_factor; }; - double getBumpFactor() const override { return bump_factor; }; - double getEmissiveFactor() const override { return emissive_factor; }; + Color getReflectionColor() const override { return reflection_color; }; + Color getAmbientColor() const override { return ambient_color; }; + Color getEmissiveColor() const override { return emissive_color; }; + + double getDiffuseFactor() const override { return diffuse_factor; }; + double getSpecularFactor() const override { return specular_factor; }; + double getReflectionFactor() const override { return reflection_factor; }; + double getShininess() const override { return shininess; }; + double getShininessExponent() const override { return shininess_exponent; }; + double getAmbientFactor() const override { return ambient_factor; }; + double getBumpFactor() const override { return bump_factor; }; + double getEmissiveFactor() const override { return emissive_factor; }; const Texture* textures[Texture::TextureType::COUNT]; Color diffuse_color; Color specular_color; - Color reflection_color; - Color ambient_color; - Color emissive_color; + Color reflection_color; + Color ambient_color; + Color emissive_color; - double diffuse_factor; - double specular_factor; - double reflection_factor; - double shininess; - double shininess_exponent; - double ambient_factor; - double bump_factor; - double emissive_factor; + double diffuse_factor; + double specular_factor; + double reflection_factor; + double shininess; + double shininess_exponent; + double ambient_factor; + double bump_factor; + double emissive_factor; }; @@ -1165,81 +1454,29 @@ Geometry::Geometry(const Scene& _scene, const IElement& _element) } -struct GeometryImpl : Geometry -{ - enum VertexDataMapping - { - BY_POLYGON_VERTEX, - BY_POLYGON, - BY_VERTEX - }; - - struct NewVertex - { - ~NewVertex() { delete next; } - - int index = -1; - NewVertex* next = nullptr; - }; - - std::vector vertices; - std::vector normals; - std::vector uvs[s_uvs_max]; - std::vector colors; - std::vector tangents; - std::vector materials; - - const Skin* skin = nullptr; - const BlendShape* blendShape = nullptr; - - std::vector indices; - std::vector to_new_vertices; - - GeometryImpl(const Scene& _scene, const IElement& _element) - : Geometry(_scene, _element) - { - } - - - Type getType() const override { return Type::GEOMETRY; } - int getVertexCount() const override { return (int)vertices.size(); } - const int* getFaceIndices() const override { return indices.empty() ? nullptr : &indices[0]; } - int getIndexCount() const override { return (int)indices.size(); } - const Vec3* getVertices() const override { return &vertices[0]; } - const Vec3* getNormals() const override { return normals.empty() ? nullptr : &normals[0]; } - const Vec2* getUVs(int index = 0) const override { return index < 0 || index >= s_uvs_max || uvs[index].empty() ? nullptr : &uvs[index][0]; } - const Vec4* getColors() const override { return colors.empty() ? nullptr : &colors[0]; } - const Vec3* getTangents() const override { return tangents.empty() ? nullptr : &tangents[0]; } - const Skin* getSkin() const override { return skin; } - const BlendShape* getBlendShape() const override { return blendShape; } - const int* getMaterials() const override { return materials.empty() ? nullptr : &materials[0]; } -}; - - Shape::Shape(const Scene& _scene, const IElement& _element) : Object(_scene, _element) { } -struct ShapeImpl : Shape -{ +struct ShapeImpl : Shape { std::vector vertices; std::vector normals; + std::vector indices; ShapeImpl(const Scene& _scene, const IElement& _element) : Shape(_scene, _element) - { - } - - - bool postprocess(GeometryImpl* geom, Allocator& allocator); + {} + bool postprocess(GeometryImpl& geom, Allocator& allocator); Type getType() const override { return Type::SHAPE; } int getVertexCount() const override { return (int)vertices.size(); } + int getIndexCount() const override { return (int)indices.size(); } const Vec3* getVertices() const override { return &vertices[0]; } const Vec3* getNormals() const override { return normals.empty() ? nullptr : &normals[0]; } + const int* getIndices() const override { return indices.empty() ? nullptr : &indices[0]; } }; @@ -1260,50 +1497,30 @@ struct ClusterImpl : Cluster int getIndicesCount() const override { return (int)indices.size(); } const double* getWeights() const override { return &weights[0]; } int getWeightsCount() const override { return (int)weights.size(); } - Matrix getTransformMatrix() const override { return transform_matrix; } - Matrix getTransformLinkMatrix() const override { return transform_link_matrix; } + DMatrix getTransformMatrix() const override { return transform_matrix; } + DMatrix getTransformLinkMatrix() const override { return transform_link_matrix; } Object* getLink() const override { return link; } - - bool postprocess(Allocator& allocator) - { + bool postprocess() { assert(skin); - GeometryImpl* geom = (GeometryImpl*)skin->resolveObjectLinkReverse(Object::Type::GEOMETRY); - if (!geom) return false; + GeometryDataImpl* geom = static_cast(static_cast(skin->resolveObjectLinkReverse(Object::Type::GEOMETRY))); + if (!geom) { + MeshImpl* mesh = (MeshImpl*)skin->resolveObjectLinkReverse(Object::Type::MESH); + if(!mesh) return false; + geom = &mesh->geometry_data; + } - allocator.int_tmp.clear(); // old indices const Element* indexes = findChild((const Element&)element, "Indexes"); if (indexes && indexes->first_property) { - if (!parseBinaryArray(*indexes->first_property, &allocator.int_tmp)) return false; + if (!parseVecData(*indexes->first_property, &indices)) return false; } - allocator.double_tmp.clear(); // old weights const Element* weights_el = findChild((const Element&)element, "Weights"); if (weights_el && weights_el->first_property) { - if (!parseBinaryArray(*weights_el->first_property, &allocator.double_tmp)) return false; - } - - if (allocator.int_tmp.size() != allocator.double_tmp.size()) return false; - - indices.reserve(allocator.int_tmp.size()); - weights.reserve(allocator.int_tmp.size()); - int* ir = allocator.int_tmp.empty() ? nullptr : &allocator.int_tmp[0]; - double* wr = allocator.double_tmp.empty() ? nullptr : &allocator.double_tmp[0]; - for (int i = 0, c = (int)allocator.int_tmp.size(); i < c; ++i) - { - int old_idx = ir[i]; - double w = wr[i]; - GeometryImpl::NewVertex* n = &geom->to_new_vertices[old_idx]; - if (n->index == -1) continue; // skip vertices which aren't indexed. - while (n) - { - indices.push_back(n->index); - weights.push_back(w); - n = n->next; - } + if (!parseVecData(*weights_el->first_property, &weights)) return false; } return true; @@ -1314,8 +1531,8 @@ struct ClusterImpl : Cluster Skin* skin = nullptr; std::vector indices; std::vector weights; - Matrix transform_matrix; - Matrix transform_link_matrix; + DMatrix transform_matrix; + DMatrix transform_link_matrix; Type getType() const override { return Type::CLUSTER; } }; @@ -1420,8 +1637,7 @@ struct BlendShapeChannelImpl : BlendShapeChannel Type getType() const override { return Type::BLEND_SHAPE_CHANNEL; } - bool postprocess(Allocator& allocator) - { + bool postprocess(Allocator& allocator) { assert(blendShape); GeometryImpl* geom = (GeometryImpl*)blendShape->resolveObjectLinkReverse(Object::Type::GEOMETRY); @@ -1436,13 +1652,13 @@ struct BlendShapeChannelImpl : BlendShapeChannel const Element* full_weights_el = findChild((const Element&)element, "FullWeights"); if (full_weights_el && full_weights_el->first_property) { - if (!parseBinaryArray(*full_weights_el->first_property, &fullWeights)) return false; + if (!parseVecData(*full_weights_el->first_property, &fullWeights)) return false; } - for (int i = 0; i < shapes.size(); i++) + for (int i = 0; i < (int)shapes.size(); i++) { auto shape = (ShapeImpl*)shapes[i]; - if (!shape->postprocess(geom, allocator)) return false; + if (!shape->postprocess(*geom, allocator)) return false; } return true; @@ -1494,20 +1710,16 @@ struct PoseImpl : Pose { PoseImpl(const Scene& _scene, const IElement& _element) : Pose(_scene, _element) - { - } + {} - bool postprocess(Scene* scene); - - - Matrix getMatrix() const override { return matrix; } + bool postprocess(Scene& scene); + DMatrix getMatrix() const override { return matrix; } const Object* getNode() const override { return node; } - Type getType() const override { return Type::POSE; } - Matrix matrix; + DMatrix matrix; Object* node = nullptr; - DataView node_id; + u64 node_id; }; @@ -1528,6 +1740,163 @@ struct TextureImpl : Texture Type getType() const override { return Type::TEXTURE; } }; +struct LightImpl : Light +{ + LightImpl(const Scene& _scene, const IElement& _element) + : Light(_scene, _element) + { + } + + Type getType() const override { return Type::LIGHT; } + LightType getLightType() const override { return lightType; } + + bool doesCastLight() const override { return castLight; } + + bool doesDrawVolumetricLight() const override + { + // Return the draw volumetric light property based on the stored data (WIP) + return false; + } + + bool doesDrawGroundProjection() const override + { + // Return the draw ground projection property based on the stored data (WIP) + return false; + } + + bool doesDrawFrontFacingVolumetricLight() const override + { + // Return the draw front-facing volumetric light property based on the stored data (WIP) + return false; + } + + Color getColor() const override { return color; } + double getIntensity() const override { return intensity; } + double getInnerAngle() const override { return innerAngle; } + double getOuterAngle() const override { return outerAngle; } + + double getFog() const override { return fog; } + + DecayType getDecayType() const override { return decayType; } + double getDecayStart() const override { return decayStart; } + + // Near attenuation + bool doesEnableNearAttenuation() const override { return enableNearAttenuation; } + double getNearAttenuationStart() const override { return nearAttenuationStart; } + double getNearAttenuationEnd() const override { return nearAttenuationEnd; } + + // Far attenuation + bool doesEnableFarAttenuation() const override { return enableFarAttenuation; } + double getFarAttenuationStart() const override { return farAttenuationStart; } + double getFarAttenuationEnd() const override { return farAttenuationEnd; } + + // Shadows + const Texture* getShadowTexture() const override { return shadowTexture; } + bool doesCastShadows() const override { return castShadows; } + Color getShadowColor() const override { return shadowColor; } + + // Member variables to store light properties + //------------------------------------------------------------------------- + LightType lightType = LightType::POINT; + bool castLight = true; + Color color = {1, 1, 1}; // Light color (RGB values) + double intensity = 100.0; + + double innerAngle = 0.0; + double outerAngle = 45.0; + + double fog = 50; + + DecayType decayType = DecayType::QUADRATIC; + double decayStart = 1.0; + + bool enableNearAttenuation = false; + double nearAttenuationStart = 0.0; + double nearAttenuationEnd = 0.0; + + bool enableFarAttenuation = false; + double farAttenuationStart = 0.0; + double farAttenuationEnd = 0.0; + + const Texture* shadowTexture = nullptr; + bool castShadows = true; + Color shadowColor = {0, 0, 0}; +}; + +static float OFBX_PI = 3.14159265358979323846f; +struct CameraImpl : public Camera +{ + CameraImpl(const Scene& _scene, const IElement& _element) + : Camera(_scene, _element) + { + } + + ProjectionType projectionType = ProjectionType::PERSPECTIVE; + ApertureMode apertureMode = ApertureMode::HORIZONTAL; // Used to determine the FOV + + double filmHeight = 36.0; + double filmWidth = 24.0; + + double aspectHeight = 1.0; + double aspectWidth = 1.0; + + double nearPlane = 0.1; + double farPlane = 1000.0; + bool autoComputeClipPanes = true; + + GateFit gateFit = GateFit::HORIZONTAL; + double filmAspectRatio = 1.0; + double focalLength = 50.0; + double focusDistance = 50.0; + + DVec3 backgroundColor = {0, 0, 0}; + DVec3 interestPosition = {0, 0, 0}; + + double fieldOfView = 60.0; + + Type getType() const override { return Type::CAMERA; } + ProjectionType getProjectionType() const override { return projectionType; } + ApertureMode getApertureMode() const override { return apertureMode; } + + double getFilmHeight() const override { return filmHeight; } + double getFilmWidth() const override { return filmWidth; } + + double getAspectHeight() const override { return aspectHeight; } + double getAspectWidth() const override { return aspectWidth; } + + double getNearPlane() const override { return nearPlane; } + double getFarPlane() const override { return farPlane; } + bool doesAutoComputeClipPanes() const override { return autoComputeClipPanes; } + + GateFit getGateFit() const override { return gateFit; } + double getFilmAspectRatio() const override { return filmAspectRatio; } + double getFocalLength() const override { return focalLength; } + double getFocusDistance() const override { return focusDistance; } + + DVec3 getBackgroundColor() const override { return backgroundColor; } + DVec3 getInterestPosition() const override { return interestPosition; } + + void CalculateFOV() + { + switch (apertureMode) + { + case Camera::ApertureMode::HORIZONTAL: + fieldOfView = 2.0 * atan(filmWidth / (2.0 * focalLength)) * 180.0 / OFBX_PI; + return; + case Camera::ApertureMode::VERTICAL: + fieldOfView = 2.0 * atan(filmHeight / (2.0 * focalLength)) * 180.0 / OFBX_PI; + return; + case Camera::ApertureMode::HORIZANDVERT: + fieldOfView = 2.0 * atan(sqrt(filmWidth * filmWidth + filmHeight * filmHeight) / (2.0 * focalLength)) * 180.0 / OFBX_PI; + return; + case Camera::ApertureMode::FOCALLENGTH: + fieldOfView = 2.0 * atan(filmHeight / (2.0 * focalLength)) * 180.0 / OFBX_PI; // Same as vertical ¯\_(ツ)_/¯ + return; + default: + fieldOfView = 60.0; + } + } +}; struct Root : Object { @@ -1548,13 +1917,16 @@ struct Scene : IScene enum Type { OBJECT_OBJECT, - OBJECT_PROPERTY + OBJECT_PROPERTY, + PROPERTY_OBJECT, + PROPERTY_PROPERTY, }; Type type = OBJECT_OBJECT; - u64 from = 0; - u64 to = 0; - DataView property; + u64 from_object = 0; + u64 to_object = 0; + DataView from_property; + DataView to_property; }; struct ObjectPair @@ -1565,9 +1937,9 @@ struct Scene : IScene int getAnimationStackCount() const override { return (int)m_animation_stacks.size(); } + int getGeometryCount() const override { return (int)m_geometries.size(); } int getMeshCount() const override { return (int)m_meshes.size(); } float getSceneFrameRate() const override { return m_scene_frame_rate; } - const GlobalInfo* getGlobalInfo() const override { return &m_info; } const GlobalSettings* getGlobalSettings() const override { return &m_settings; } const Object* const* getAllObjects() const override { return m_all_objects.empty() ? nullptr : &m_all_objects[0]; } @@ -1583,6 +1955,14 @@ struct Scene : IScene return m_videos[index].content; } + bool isEmbeddedBase64(int index) const override { + return m_videos[index].is_base_64; + } + + const IElementProperty* getEmbeddedBase64Data(int index) const override { + return m_videos[index].base64_property; + } + DataView getEmbeddedFilename(int index) const override { return m_videos[index].filename; } @@ -1603,6 +1983,14 @@ struct Scene : IScene } + const Geometry* getGeometry(int index) const override + { + assert(index >= 0); + assert(index < m_geometries.size()); + return m_geometries[index]; + } + + const TakeInfo* getTakeInfo(const char* name) const override { for (const TakeInfo& info : m_take_infos) @@ -1612,6 +2000,30 @@ struct Scene : IScene return nullptr; } + const Camera* getCamera(int index) const override + { + assert(index >= 0); + assert(index < m_cameras.size()); + return m_cameras[index]; + } + + int getCameraCount() const override + { + return (int)m_cameras.size(); + } + + const Light* getLight(int index) const override + { + assert(index >= 0); + assert(index < m_lights.size()); + return m_lights[index]; + } + + int getLightCount() const override + { + return (int)m_lights.size(); + } + const IElement* getRootElement() const override { return m_root_element; } const Object* getRoot() const override { return m_root; } @@ -1620,29 +2032,54 @@ struct Scene : IScene void destroy() override { delete this; } - ~Scene() override - { - for(auto ptr : m_all_objects) - ptr->~Object(); + ~Scene() override { + for(Object* ptr : m_all_objects) { + ptr->~Object(); + } } + bool finalize(); Element* m_root_element = nullptr; Root* m_root = nullptr; float m_scene_frame_rate = -1; - GlobalInfo m_info; GlobalSettings m_settings; + + std::unordered_map m_fake_ids; std::unordered_map m_object_map; std::vector m_all_objects; std::vector m_meshes; + std::vector m_geometries; std::vector m_animation_stacks; + std::vector m_cameras; + std::vector m_lights; std::vector m_connections; std::vector m_data; std::vector m_take_infos; std::vector /// The zero-based index at which item should be inserted. - /// The item to insert. + /// The item to be inserted by copying. void Insert(int32 index, const T& item) { ASSERT(index >= 0 && index <= _count); @@ -592,6 +592,23 @@ public: data[index] = item; } + /// + /// Insert the given item at specified index with keeping items order. + /// + /// The zero-based index at which item should be inserted. + /// The item to inserted by moving. + void Insert(int32 index, T&& item) + { + ASSERT(index >= 0 && index <= _count); + EnsureCapacity(_count + 1); + T* data = _allocation.Get(); + Memory::ConstructItems(data + _count, 1); + for (int32 i = _count - 1; i >= index; i--) + data[i + 1] = MoveTemp(data[i]); + _count++; + data[index] = MoveTemp(item); + } + /// /// Insert the given item at specified index with keeping items order. /// @@ -772,9 +789,9 @@ public: /// /// Performs pop from stack operation (stack grows at the end of the collection). /// - T Pop() + FORCE_INLINE T Pop() { - T item(Last()); + T item = MoveTemp(Last()); RemoveLast(); return item; } @@ -807,6 +824,15 @@ public: Add(item); } + /// + /// Performs enqueue to queue operation (queue head is in the beginning of queue). + /// + /// The item to append. + void Enqueue(T&& item) + { + Add(MoveTemp(item)); + } + /// /// Performs dequeue from queue operation (queue head is in the beginning of queue). /// @@ -814,7 +840,7 @@ public: T Dequeue() { ASSERT(HasItems()); - T item(First()); + T item = MoveTemp(_allocation.Get()[0]); RemoveAtKeepOrder(0); return item; } From 5f4aee71b8c1de3a99e92b0ce8017fd9f07dcfb5 Mon Sep 17 00:00:00 2001 From: Mateusz Karbowiak <69864511+mtszkarbowiak@users.noreply.github.com> Date: Sat, 22 Jun 2024 15:14:02 +0200 Subject: [PATCH 21/53] Actor naming without string copy fix --- Source/Engine/Level/Actor.cpp | 9 +++++++++ Source/Engine/Level/Actor.h | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp index a30a929de..1cdfccd3b 100644 --- a/Source/Engine/Level/Actor.cpp +++ b/Source/Engine/Level/Actor.cpp @@ -543,6 +543,15 @@ void Actor::SetLayerRecursive(int32 layerIndex) OnLayerChanged(); } +void Actor::SetName(String&& value) +{ + if (_name == value) + return; + _name = MoveTemp(value); + if (GetScene()) + Level::callActorEvent(Level::ActorEventType::OnActorNameChanged, this, nullptr); +} + void Actor::SetName(const StringView& value) { if (_name == value) diff --git a/Source/Engine/Level/Actor.h b/Source/Engine/Level/Actor.h index dc77cdbee..fdf587b07 100644 --- a/Source/Engine/Level/Actor.h +++ b/Source/Engine/Level/Actor.h @@ -181,6 +181,11 @@ public: return _name; } + /// + /// Sets the actor name without copying the string. + /// + API_FUNCTION() void SetName(String&& value); + /// /// Sets the actor name. /// From cb3e8e4112c4bbf9ba461d1fa386c6b699fba331 Mon Sep 17 00:00:00 2001 From: Mateusz Karbowiak <69864511+mtszkarbowiak@users.noreply.github.com> Date: Sat, 22 Jun 2024 16:41:29 +0200 Subject: [PATCH 22/53] Ambiguous name fix --- Source/Engine/Level/Actor.cpp | 2 +- Source/Engine/Level/Actor.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp index 1cdfccd3b..08d2832d1 100644 --- a/Source/Engine/Level/Actor.cpp +++ b/Source/Engine/Level/Actor.cpp @@ -543,7 +543,7 @@ void Actor::SetLayerRecursive(int32 layerIndex) OnLayerChanged(); } -void Actor::SetName(String&& value) +void Actor::SetNameNoCopy(String&& value) { if (_name == value) return; diff --git a/Source/Engine/Level/Actor.h b/Source/Engine/Level/Actor.h index fdf587b07..787cc6835 100644 --- a/Source/Engine/Level/Actor.h +++ b/Source/Engine/Level/Actor.h @@ -184,7 +184,7 @@ public: /// /// Sets the actor name without copying the string. /// - API_FUNCTION() void SetName(String&& value); + void SetNameNoCopy(String&& value); /// /// Sets the actor name. From 751c1f20dccaf609c8dfac43541b797edff37928 Mon Sep 17 00:00:00 2001 From: Olly Rybak Date: Wed, 3 Jul 2024 18:35:32 +1000 Subject: [PATCH 23/53] Returns lightmap UV correctly with no errors --- Source/Editor/Surface/Archetypes/Textures.cs | 13 +++++++++++++ .../MaterialGenerator.Textures.cpp | 19 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/Source/Editor/Surface/Archetypes/Textures.cs b/Source/Editor/Surface/Archetypes/Textures.cs index 00c0a806d..53b2ccf04 100644 --- a/Source/Editor/Surface/Archetypes/Textures.cs +++ b/Source/Editor/Surface/Archetypes/Textures.cs @@ -434,6 +434,19 @@ namespace FlaxEditor.Surface.Archetypes NodeElementArchetype.Factory.ComboBox(50, Surface.Constants.LayoutOffsetY * 4, 100, 0, typeof(CommonSamplerType)) } }, + new NodeArchetype + { + TypeID = 18, + Title = "Lightmap UV", + AlternativeTitles = new string[] { "Lightmap TexCoord" }, + Description = "Lightmap UVs", + Flags = NodeFlags.MaterialGraph, + Size = new Float2(110, 30), + Elements = new [] + { + NodeElementArchetype.Factory.Output(0, "UVs", typeof(Float2), 0) + } + } }; } } diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp index 9b0a334e6..8083718dd 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Textures.cpp @@ -733,6 +733,25 @@ void MaterialGenerator::ProcessGroupTextures(Box* box, Node* node, Value& value) _writer.Write(*triplanarTexture); value = result; } + // Get Lightmap UV + case 18: + { + auto output = writeLocal(Value::InitForZero(ValueType::Float2), node); + auto lightmapUV = String::Format(TEXT( + "{{\n" + "#if USE_LIGHTMAP\n" + "\t {0} = input.LightmapUV;\n" + "#else\n" + "\t {0} = float2(0,0);\n" + "#endif\n" + "}}\n" + ), output.Value); + + _writer.Write(*lightmapUV); + + value = output; + break; + } default: break; } From 9b11461eaf7da1eef1415efa0366d3348ee338df Mon Sep 17 00:00:00 2001 From: Chandler Cox Date: Fri, 19 Jul 2024 06:48:35 -0500 Subject: [PATCH 24/53] Add type display to empty object ref in editor --- Source/Editor/CustomEditors/Editors/FlaxObjectRefEditor.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Editor/CustomEditors/Editors/FlaxObjectRefEditor.cs b/Source/Editor/CustomEditors/Editors/FlaxObjectRefEditor.cs index 97b016fba..530e8c2b1 100644 --- a/Source/Editor/CustomEditors/Editors/FlaxObjectRefEditor.cs +++ b/Source/Editor/CustomEditors/Editors/FlaxObjectRefEditor.cs @@ -208,7 +208,7 @@ namespace FlaxEditor.CustomEditors.Editors else { // Draw info - Render2D.DrawText(style.FontMedium, "-", nameRect, isEnabled ? Color.OrangeRed : Color.DarkOrange, TextAlignment.Near, TextAlignment.Center); + Render2D.DrawText(style.FontMedium, Type != null ? $"None ({Utilities.Utils.GetPropertyNameUI(Type.ToString())})" : "-", nameRect, isEnabled ? Color.OrangeRed : Color.DarkOrange, TextAlignment.Near, TextAlignment.Center); } // Draw picker button From 880473819381a81bfbd0a9b9cd6ba8dd8f6e2f0a Mon Sep 17 00:00:00 2001 From: Chandler Cox Date: Fri, 19 Jul 2024 07:42:57 -0500 Subject: [PATCH 25/53] Allow for Initializing children injected via scene loading. --- Source/Engine/Level/Level.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index 95f3d7f0f..218d0677d 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -930,6 +930,9 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou // Fire event CallSceneEvent(SceneEventType::OnSceneLoading, scene, sceneId); + // Get any injected children of the scene. + Array injectedSceneChildren = scene->Children; + // Loaded scene objects list CollectionPoolCache::ScopeCache sceneObjects = ActorsCache::SceneObjectsListCache.Get(); const int32 dataCount = (int32)data.Size(); @@ -1031,6 +1034,18 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou // /\ all above this has to be done on multiple threads at once // \/ all below this has to be done on an any thread + // Add injected children of scene (via OnSceneLoading) into sceneObjects to be initialized + for (auto child : injectedSceneChildren) + { + sceneObjects->Add(child); + if (!child->IsRegistered()) + { + child->RegisterObject(); + } + LOG(Warning, "{}", child->GetName()); + } + sceneObjects->Resize(dataCount + injectedSceneChildren.Count()); + // Synchronize prefab instances (prefab may have objects removed or reordered so deserialized instances need to synchronize with it) // TODO: resave and force sync scenes during game cooking so this step could be skipped in game SceneObjectsFactory::SynchronizePrefabInstances(context, prefabSyncData); @@ -1047,7 +1062,7 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou PROFILE_CPU_NAMED("Initialize"); SceneObject** objects = sceneObjects->Get(); - for (int32 i = 0; i < dataCount; i++) + for (int32 i = 0; i < dataCount + injectedSceneChildren.Count(); i++) { SceneObject* obj = objects[i]; if (obj) From d717430256a52de9b997b10bcf75334c46821608 Mon Sep 17 00:00:00 2001 From: Chandler Cox Date: Fri, 19 Jul 2024 07:48:01 -0500 Subject: [PATCH 26/53] Clean up code --- Source/Engine/Level/Level.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index 218d0677d..63a79eb3a 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -1042,7 +1042,6 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou { child->RegisterObject(); } - LOG(Warning, "{}", child->GetName()); } sceneObjects->Resize(dataCount + injectedSceneChildren.Count()); From 8a297a6fd4d6978531d9162df6bdb078f12df5ba Mon Sep 17 00:00:00 2001 From: Chandler Cox Date: Fri, 19 Jul 2024 07:59:57 -0500 Subject: [PATCH 27/53] Clean up code more. --- Source/Engine/Level/Level.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index 63a79eb3a..b1ac5f890 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -1043,7 +1043,6 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou child->RegisterObject(); } } - sceneObjects->Resize(dataCount + injectedSceneChildren.Count()); // Synchronize prefab instances (prefab may have objects removed or reordered so deserialized instances need to synchronize with it) // TODO: resave and force sync scenes during game cooking so this step could be skipped in game @@ -1061,7 +1060,7 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou PROFILE_CPU_NAMED("Initialize"); SceneObject** objects = sceneObjects->Get(); - for (int32 i = 0; i < dataCount + injectedSceneChildren.Count(); i++) + for (int32 i = 0; i < sceneObjects->Count(); i++) { SceneObject* obj = objects[i]; if (obj) From 305f725394d1fe9407b65b99eaa468f12e9284d9 Mon Sep 17 00:00:00 2001 From: Chandler Cox Date: Fri, 19 Jul 2024 08:19:34 -0500 Subject: [PATCH 28/53] Ensure to add all scene objects of injected actors. --- Source/Engine/Level/Level.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Source/Engine/Level/Level.cpp b/Source/Engine/Level/Level.cpp index b1ac5f890..f24a853e3 100644 --- a/Source/Engine/Level/Level.cpp +++ b/Source/Engine/Level/Level.cpp @@ -1037,10 +1037,14 @@ bool Level::loadScene(rapidjson_flax::Value& data, int32 engineBuild, Scene** ou // Add injected children of scene (via OnSceneLoading) into sceneObjects to be initialized for (auto child : injectedSceneChildren) { - sceneObjects->Add(child); - if (!child->IsRegistered()) + Array injectedSceneObjects; + injectedSceneObjects.Add(child); + SceneQuery::GetAllSceneObjects(child, injectedSceneObjects); + for (auto o : injectedSceneObjects) { - child->RegisterObject(); + if (!o->IsRegistered()) + o->RegisterObject(); + sceneObjects->Add(o); } } From 1142fa68eafaa792343dd8c92c59857decf5881d Mon Sep 17 00:00:00 2001 From: Chandler Cox Date: Fri, 19 Jul 2024 20:49:58 -0500 Subject: [PATCH 29/53] Small table improvements. --- Source/Editor/GUI/ColumnDefinition.cs | 13 +++++++++- Source/Editor/GUI/Table.cs | 35 ++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/Source/Editor/GUI/ColumnDefinition.cs b/Source/Editor/GUI/ColumnDefinition.cs index 241e6bec3..5c462c903 100644 --- a/Source/Editor/GUI/ColumnDefinition.cs +++ b/Source/Editor/GUI/ColumnDefinition.cs @@ -1,6 +1,7 @@ // Copyright (c) 2012-2024 Wojciech Figat. All rights reserved. using FlaxEngine; +using FlaxEngine.GUI; namespace FlaxEditor.GUI { @@ -43,10 +44,20 @@ namespace FlaxEditor.GUI public Color TitleColor = Color.White; /// - /// The column title background background. + /// The column title background color. /// public Color TitleBackgroundColor = Color.Brown; + /// + /// The column title horizontal text alignment + /// + public TextAlignment TitleAlignment = TextAlignment.Near; + + /// + /// The column title margin. + /// + public Margin TitleMargin = new Margin(4, 4, 0, 0); + /// /// The minimum size (in pixels) of the column. /// diff --git a/Source/Editor/GUI/Table.cs b/Source/Editor/GUI/Table.cs index 88c1d9e50..eb7aa3767 100644 --- a/Source/Editor/GUI/Table.cs +++ b/Source/Editor/GUI/Table.cs @@ -130,12 +130,14 @@ namespace FlaxEditor.GUI var style = Style.Current; var font = column.TitleFont ?? style.FontMedium; - Render2D.DrawText(font, column.Title, rect, column.TitleColor, TextAlignment.Center, TextAlignment.Center); + var textRect = rect; + column.TitleMargin.ShrinkRectangle(ref textRect); + Render2D.DrawText(font, column.Title, textRect, column.TitleColor, column.TitleAlignment, TextAlignment.Center); if (columnIndex < _columns.Length - 1) { - var splitRect = new Rectangle(rect.Right - 1, 2, 2, rect.Height - 4); - Render2D.FillRectangle(splitRect, _movingSplit == columnIndex || splitRect.Contains(_mousePos) ? style.BorderNormal : column.TitleBackgroundColor * 0.9f); + var splitRect = new Rectangle(rect.Right - 2, 2, 4, rect.Height - 4); + Render2D.FillRectangle(splitRect, _movingSplit == columnIndex || splitRect.Contains(_mousePos) ? style.BorderNormal : style.Background * 0.9f); } } @@ -151,7 +153,7 @@ namespace FlaxEditor.GUI { rect.Width = GetColumnWidth(i); - var splitRect = new Rectangle(rect.Right - 1, 2, 2, rect.Height - 4); + var splitRect = new Rectangle(rect.Right - 2, 2, 4, rect.Height - 4); if (splitRect.Contains(location)) { // Start moving splitter @@ -193,6 +195,31 @@ namespace FlaxEditor.GUI PerformLayout(); } + else + { + if (_columns != null && _splits != null) + { + Rectangle rect = new Rectangle(0, 0, 0, _headerHeight); + for (int i = 0; i < _columns.Length - 1; i++) + { + rect.Width = GetColumnWidth(i); + + var splitRect = new Rectangle(rect.Right - 2, 2, 4, rect.Height - 4); + if (splitRect.Contains(location)) + { + // Start moving splitter + Cursor = CursorType.SizeWE; + break; + } + else + { + Cursor = CursorType.Default; + } + + rect.X += rect.Width; + } + } + } base.OnMouseMove(location); } From 55d55212c3e45de038cfe56749e0c258ca6bf881 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 20 Jul 2024 15:33:27 +0300 Subject: [PATCH 30/53] Add assertions for checking against un-normalized direction vectors --- Source/Engine/Core/Math/Quaternion.cpp | 1 + Source/Engine/Core/Math/Ray.h | 1 + Source/Engine/Debug/DebugDraw.cpp | 8 ++++-- Source/Engine/Physics/Colliders/Collider.cpp | 2 ++ Source/Engine/Physics/Physics.cpp | 30 ++++++++++++++++++++ Source/Engine/Physics/Physics.h | 2 +- Source/Engine/Terrain/TerrainPatch.cpp | 4 +++ 7 files changed, 44 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Core/Math/Quaternion.cpp b/Source/Engine/Core/Math/Quaternion.cpp index 5927e0665..5eae439dd 100644 --- a/Source/Engine/Core/Math/Quaternion.cpp +++ b/Source/Engine/Core/Math/Quaternion.cpp @@ -289,6 +289,7 @@ void Quaternion::Billboard(const Float3& objectPosition, const Float3& cameraPos Quaternion Quaternion::FromDirection(const Float3& direction) { + ASSERT(direction.IsNormalized()); Quaternion orientation; if (Float3::Dot(direction, Float3::Up) >= 0.999f) { diff --git a/Source/Engine/Core/Math/Ray.h b/Source/Engine/Core/Math/Ray.h index 59d080f56..2f8b4b907 100644 --- a/Source/Engine/Core/Math/Ray.h +++ b/Source/Engine/Core/Math/Ray.h @@ -46,6 +46,7 @@ public: : Position(position) , Direction(direction) { + ASSERT(Direction.IsNormalized()); } public: diff --git a/Source/Engine/Debug/DebugDraw.cpp b/Source/Engine/Debug/DebugDraw.cpp index 942069130..1bb3a47f8 100644 --- a/Source/Engine/Debug/DebugDraw.cpp +++ b/Source/Engine/Debug/DebugDraw.cpp @@ -946,7 +946,8 @@ void DebugDraw::DrawActors(Actor** selectedActors, int32 selectedActorsCount, bo void DebugDraw::DrawAxisFromDirection(const Vector3& origin, const Vector3& direction, float size, float duration, bool depthTest) { - const auto rot = Quaternion::FromDirection(direction.GetNormalized()); + ASSERT(direction.IsNormalized()); + const auto rot = Quaternion::FromDirection(direction); const Vector3 up = (rot * Vector3::Up); const Vector3 forward = (rot * Vector3::Forward); const Vector3 right = (rot * Vector3::Right); @@ -971,16 +972,17 @@ void DebugDraw::DrawRay(const Vector3& origin, const Vector3& direction, const C void DebugDraw::DrawRay(const Vector3& origin, const Vector3& direction, const Color& color, float length, float duration, bool depthTest) { + ASSERT(direction.IsNormalized()); if (isnan(length) || isinf(length)) return; - DrawLine(origin, origin + (direction.GetNormalized() * length), color, duration, depthTest); + DrawLine(origin, origin + (direction * length), color, duration, depthTest); } void DebugDraw::DrawRay(const Ray& ray, const Color& color, float length, float duration, bool depthTest) { if (isnan(length) || isinf(length)) return; - DrawLine(ray.Position, ray.Position + (ray.Direction.GetNormalized() * length), color, duration, depthTest); + DrawLine(ray.Position, ray.Position + (ray.Direction * length), color, duration, depthTest); } void DebugDraw::DrawLine(const Vector3& start, const Vector3& end, const Color& color, float duration, bool depthTest) diff --git a/Source/Engine/Physics/Colliders/Collider.cpp b/Source/Engine/Physics/Colliders/Collider.cpp index 793368abe..7e0a149aa 100644 --- a/Source/Engine/Physics/Colliders/Collider.cpp +++ b/Source/Engine/Physics/Colliders/Collider.cpp @@ -71,6 +71,7 @@ void Collider::SetContactOffset(float value) bool Collider::RayCast(const Vector3& origin, const Vector3& direction, float& resultHitDistance, float maxDistance) const { + ASSERT(direction.IsNormalized()); resultHitDistance = MAX_float; if (_shape == nullptr) return false; @@ -79,6 +80,7 @@ bool Collider::RayCast(const Vector3& origin, const Vector3& direction, float& r bool Collider::RayCast(const Vector3& origin, const Vector3& direction, RayCastHit& hitInfo, float maxDistance) const { + ASSERT(direction.IsNormalized()); if (_shape == nullptr) return false; return PhysicsBackend::RayCastShape(_shape, _transform.Translation, _transform.Orientation, origin, direction, hitInfo, maxDistance); diff --git a/Source/Engine/Physics/Physics.cpp b/Source/Engine/Physics/Physics.cpp index a1a5e8db2..85298bcff 100644 --- a/Source/Engine/Physics/Physics.cpp +++ b/Source/Engine/Physics/Physics.cpp @@ -235,76 +235,91 @@ bool Physics::LineCastAll(const Vector3& start, const Vector3& end, ArrayRayCast(origin, direction, maxDistance, layerMask, hitTriggers); } bool Physics::RayCast(const Vector3& origin, const Vector3& direction, RayCastHit& hitInfo, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->RayCast(origin, direction, hitInfo, maxDistance, layerMask, hitTriggers); } bool Physics::RayCastAll(const Vector3& origin, const Vector3& direction, Array& results, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->RayCastAll(origin, direction, results, maxDistance, layerMask, hitTriggers); } bool Physics::BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->BoxCast(center, halfExtents, direction, rotation, maxDistance, layerMask, hitTriggers); } bool Physics::BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->BoxCast(center, halfExtents, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers); } bool Physics::BoxCastAll(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->BoxCastAll(center, halfExtents, direction, results, rotation, maxDistance, layerMask, hitTriggers); } bool Physics::SphereCast(const Vector3& center, const float radius, const Vector3& direction, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->SphereCast(center, radius, direction, maxDistance, layerMask, hitTriggers); } bool Physics::SphereCast(const Vector3& center, const float radius, const Vector3& direction, RayCastHit& hitInfo, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->SphereCast(center, radius, direction, hitInfo, maxDistance, layerMask, hitTriggers); } bool Physics::SphereCastAll(const Vector3& center, const float radius, const Vector3& direction, Array& results, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->SphereCastAll(center, radius, direction, results, maxDistance, layerMask, hitTriggers); } bool Physics::CapsuleCast(const Vector3& center, const float radius, const float height, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->CapsuleCast(center, radius, height, direction, rotation, maxDistance, layerMask, hitTriggers); } bool Physics::CapsuleCast(const Vector3& center, const float radius, const float height, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->CapsuleCast(center, radius, height, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers); } bool Physics::CapsuleCastAll(const Vector3& center, const float radius, const float height, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->CapsuleCastAll(center, radius, height, direction, results, rotation, maxDistance, layerMask, hitTriggers); } bool Physics::ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->ConvexCast(center, convexMesh, scale, direction, rotation, maxDistance, layerMask, hitTriggers); } bool Physics::ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->ConvexCast(center, convexMesh, scale, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers); } bool Physics::ConvexCastAll(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return DefaultScene->ConvexCastAll(center, convexMesh, scale, direction, results, rotation, maxDistance, layerMask, hitTriggers); } @@ -505,76 +520,91 @@ bool PhysicsScene::LineCastAll(const Vector3& start, const Vector3& end, Array& results, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::RayCastAll(_scene, origin, direction, results, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::BoxCast(_scene, center, halfExtents, direction, rotation, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::BoxCast(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::BoxCast(_scene, center, halfExtents, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::BoxCastAll(const Vector3& center, const Vector3& halfExtents, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::BoxCastAll(_scene, center, halfExtents, direction, results, rotation, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::SphereCast(const Vector3& center, const float radius, const Vector3& direction, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::SphereCast(_scene, center, radius, direction, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::SphereCast(const Vector3& center, const float radius, const Vector3& direction, RayCastHit& hitInfo, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::SphereCast(_scene, center, radius, direction, hitInfo, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::SphereCastAll(const Vector3& center, const float radius, const Vector3& direction, Array& results, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::SphereCastAll(_scene, center, radius, direction, results, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::CapsuleCast(const Vector3& center, const float radius, const float height, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::CapsuleCast(_scene, center, radius, height, direction, rotation, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::CapsuleCast(const Vector3& center, const float radius, const float height, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::CapsuleCast(_scene, center, radius, height, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::CapsuleCastAll(const Vector3& center, const float radius, const float height, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::CapsuleCastAll(_scene, center, radius, height, direction, results, rotation, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::ConvexCast(_scene, center, convexMesh, scale, direction, rotation, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::ConvexCast(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, RayCastHit& hitInfo, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::ConvexCast(_scene, center, convexMesh, scale, direction, hitInfo, rotation, maxDistance, layerMask, hitTriggers); } bool PhysicsScene::ConvexCastAll(const Vector3& center, const CollisionData* convexMesh, const Vector3& scale, const Vector3& direction, Array& results, const Quaternion& rotation, const float maxDistance, uint32 layerMask, bool hitTriggers) { + ASSERT(direction.IsNormalized()); return PhysicsBackend::ConvexCastAll(_scene, center, convexMesh, scale, direction, results, rotation, maxDistance, layerMask, hitTriggers); } diff --git a/Source/Engine/Physics/Physics.h b/Source/Engine/Physics/Physics.h index 977db34c2..efd11b184 100644 --- a/Source/Engine/Physics/Physics.h +++ b/Source/Engine/Physics/Physics.h @@ -120,7 +120,7 @@ public: /// Performs a line between two points in the scene, returns all hitpoints infos. /// /// The origin of the ray. - /// The normalized direction of the ray. + /// The end position of the line. /// The result hits. Valid only when method returns true. /// The layer mask used to filter the results. /// If set to true triggers will be hit, otherwise will skip them. diff --git a/Source/Engine/Terrain/TerrainPatch.cpp b/Source/Engine/Terrain/TerrainPatch.cpp index 4272cabe8..b7947288b 100644 --- a/Source/Engine/Terrain/TerrainPatch.cpp +++ b/Source/Engine/Terrain/TerrainPatch.cpp @@ -1963,6 +1963,7 @@ bool TerrainPatch::UpdateCollision() bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, float& resultHitDistance, float maxDistance) const { + ASSERT(direction.IsNormalized()); if (_physicsShape == nullptr) return false; Vector3 shapePos; @@ -1973,6 +1974,7 @@ bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, floa bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, float& resultHitDistance, Vector3& resultHitNormal, float maxDistance) const { + ASSERT(direction.IsNormalized()); if (_physicsShape == nullptr) return false; Vector3 shapePos; @@ -1990,6 +1992,7 @@ bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, floa bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, float& resultHitDistance, TerrainChunk*& resultChunk, float maxDistance) const { + ASSERT(direction.IsNormalized()); if (_physicsShape == nullptr) return false; Vector3 shapePos; @@ -2027,6 +2030,7 @@ bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, floa bool TerrainPatch::RayCast(const Vector3& origin, const Vector3& direction, RayCastHit& hitInfo, float maxDistance) const { + ASSERT(direction.IsNormalized()); if (_physicsShape == nullptr) return false; Vector3 shapePos; From 24f7ec4a542f3491d1eea42e2178eaf405c642a2 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sun, 14 Jul 2024 00:17:57 +0300 Subject: [PATCH 31/53] Fix statically linked binary modules not getting compiled --- Source/Tools/Flax.Build/Build/NativeCpp/Builder.NativeCpp.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Tools/Flax.Build/Build/NativeCpp/Builder.NativeCpp.cs b/Source/Tools/Flax.Build/Build/NativeCpp/Builder.NativeCpp.cs index ee61da013..e23cad4d4 100644 --- a/Source/Tools/Flax.Build/Build/NativeCpp/Builder.NativeCpp.cs +++ b/Source/Tools/Flax.Build/Build/NativeCpp/Builder.NativeCpp.cs @@ -500,7 +500,7 @@ namespace Flax.Build } // Compile C++ file with binary module (only for first module in the binary module to prevent multiple implementations) - if (buildData.BinaryModules.FirstOrDefault(x => x.Key == module.BinaryModuleName)?.First() == module) + if (buildData.BinaryModules.FirstOrDefault(x => x.Key == module.BinaryModuleName)?.First(x => !(x is DepsModule || x is HeaderOnlyModule)) == module) { var project = GetModuleProject(module, buildData); var binaryModuleSourcePath = Path.Combine(project.ProjectFolderPath, "Source", module.BinaryModuleName + ".Gen.cpp"); From 8b22ffe0070d5c246c222d2f4a26a3d52763aaa5 Mon Sep 17 00:00:00 2001 From: Ari Vuollet Date: Sat, 20 Jul 2024 20:45:46 +0300 Subject: [PATCH 32/53] Fix warnings about upcoming C# reserved keywords --- .../Content/Import/TextureImportEntry.cs | 6 +-- Source/Editor/Scripting/ScriptType.cs | 6 +-- Source/Engine/Core/Math/FloatR10G10B10A2.cs | 48 +++++++++---------- Source/Engine/Core/Math/FloatR11G11B10.cs | 26 +++++----- Source/Engine/Core/Math/Half.cs | 24 ++++------ 5 files changed, 49 insertions(+), 61 deletions(-) diff --git a/Source/Editor/Content/Import/TextureImportEntry.cs b/Source/Editor/Content/Import/TextureImportEntry.cs index 9842485c1..e7a007fb2 100644 --- a/Source/Editor/Content/Import/TextureImportEntry.cs +++ b/Source/Editor/Content/Import/TextureImportEntry.cs @@ -58,10 +58,10 @@ namespace FlaxEngine.Tools FieldInfo[] fields = typeof(CustomMaxSizes).GetFields(); for (int i = 0; i < fields.Length; i++) { - var field = fields[i]; - if (field.Name.Equals("value__")) + var @field = fields[i]; + if (@field.Name.Equals("value__")) continue; - if (value == (int)field.GetRawConstantValue()) + if (value == (int)@field.GetRawConstantValue()) return (CustomMaxSizes)value; } return CustomMaxSizes._8192; diff --git a/Source/Editor/Scripting/ScriptType.cs b/Source/Editor/Scripting/ScriptType.cs index 848fda30e..6d9db8ad6 100644 --- a/Source/Editor/Scripting/ScriptType.cs +++ b/Source/Editor/Scripting/ScriptType.cs @@ -51,12 +51,12 @@ namespace FlaxEditor.Scripting int standardToken = _managed?.MetadataToken ?? _custom?.MetadataToken ?? 0; if (_managed is PropertyInfo && _managed.DeclaringType != null) { - var field = _managed.DeclaringType.GetField(string.Format("<{0}>k__BackingField", Name), BindingFlags.Instance | BindingFlags.NonPublic); - if (field == null || field.MetadataToken == 0) + var backingField = _managed.DeclaringType.GetField(string.Format("<{0}>k__BackingField", Name), BindingFlags.Instance | BindingFlags.NonPublic); + if (backingField == null || backingField.MetadataToken == 0) { return standardToken; } - return field.MetadataToken; + return backingField.MetadataToken; } return standardToken; } diff --git a/Source/Engine/Core/Math/FloatR10G10B10A2.cs b/Source/Engine/Core/Math/FloatR10G10B10A2.cs index ab89b3962..8cd482127 100644 --- a/Source/Engine/Core/Math/FloatR10G10B10A2.cs +++ b/Source/Engine/Core/Math/FloatR10G10B10A2.cs @@ -12,7 +12,7 @@ namespace FlaxEngine [StructLayout(LayoutKind.Sequential, Pack = 4)] public struct FloatR10G10B10A2 { - private uint value; + private uint rawValue; /// /// Initializes a new instance of the structure. @@ -23,7 +23,7 @@ namespace FlaxEngine /// The floating point value that should be stored in A component (2 bit format). public FloatR10G10B10A2(float x, float y, float z, float w) { - value = Pack(x, y, z, w); + rawValue = Pack(x, y, z, w); } /// @@ -33,7 +33,7 @@ namespace FlaxEngine /// The floating point value that should be stored in alpha component (2 bit format). public FloatR10G10B10A2(Float3 value, float w = 0) { - this.value = Pack(value.X, value.Y, value.Z, w); + rawValue = Pack(value.X, value.Y, value.Z, w); } /// @@ -42,37 +42,33 @@ namespace FlaxEngine /// The floating point value that should be stored in 10 bit format. public FloatR10G10B10A2(Float4 value) { - this.value = Pack(value.X, value.Y, value.Z, value.W); + rawValue = Pack(value.X, value.Y, value.Z, value.W); } /// /// Gets or sets the raw 32 bit value used to back this vector. /// - public uint RawValue - { - get => value; - set => this.value = value; - } + public uint RawValue => rawValue; /// /// Gets the R component. /// - public float R => (value & 0x3FF) / 1023.0f; + public float R => (rawValue & 0x3FF) / 1023.0f; /// /// Gets the G component. /// - public float G => ((value >> 10) & 0x3FF) / 1023.0f; + public float G => ((rawValue >> 10) & 0x3FF) / 1023.0f; /// /// Gets the B component. /// - public float B => ((value >> 20) & 0x3FF) / 1023.0f; + public float B => ((rawValue >> 20) & 0x3FF) / 1023.0f; /// /// Gets the A component. /// - public float A => (value >> 30) / 3.0f; + public float A => (rawValue >> 30) / 3.0f; /// /// Performs an explicit conversion from to . @@ -102,7 +98,7 @@ namespace FlaxEngine /// true if has the same value as ; otherwise, false. public static bool operator ==(FloatR10G10B10A2 left, FloatR10G10B10A2 right) { - return left.value == right.value; + return left.rawValue == right.rawValue; } /// @@ -113,7 +109,7 @@ namespace FlaxEngine /// true if has a different value than ; otherwise, false. public static bool operator !=(FloatR10G10B10A2 left, FloatR10G10B10A2 right) { - return left.value != right.value; + return left.rawValue != right.rawValue; } /// @@ -131,7 +127,7 @@ namespace FlaxEngine /// A 32-bit signed integer hash code. public override int GetHashCode() { - return value.GetHashCode(); + return rawValue.GetHashCode(); } /// @@ -142,7 +138,7 @@ namespace FlaxEngine /// true if is the same instance as or if both are null references or if value1.Equals(value2) returns true; otherwise, false. public static bool Equals(ref FloatR10G10B10A2 value1, ref FloatR10G10B10A2 value2) { - return value1.value == value2.value; + return value1.rawValue == value2.rawValue; } /// @@ -152,7 +148,7 @@ namespace FlaxEngine /// true if the current instance is equal to the specified object; false otherwise. public bool Equals(FloatR10G10B10A2 other) { - return other.value == value; + return other.rawValue == rawValue; } /// @@ -162,7 +158,7 @@ namespace FlaxEngine /// true if the current instance is equal to the specified object; false otherwise. public override bool Equals(object obj) { - return obj is FloatR10G10B10A2 other && value == other.value; + return obj is FloatR10G10B10A2 other && rawValue == other.rawValue; } private static uint Pack(float x, float y, float z, float w) @@ -191,11 +187,11 @@ namespace FlaxEngine { Float3 vectorOut; - uint tmp = value & 0x3FF; + uint tmp = rawValue & 0x3FF; vectorOut.X = tmp / 1023.0f; - tmp = (value >> 10) & 0x3FF; + tmp = (rawValue >> 10) & 0x3FF; vectorOut.Y = tmp / 1023.0f; - tmp = (value >> 20) & 0x3FF; + tmp = (rawValue >> 20) & 0x3FF; vectorOut.Z = tmp / 1023.0f; return vectorOut; @@ -209,13 +205,13 @@ namespace FlaxEngine { Float4 vectorOut; - uint tmp = value & 0x3FF; + uint tmp = rawValue & 0x3FF; vectorOut.X = tmp / 1023.0f; - tmp = (value >> 10) & 0x3FF; + tmp = (rawValue >> 10) & 0x3FF; vectorOut.Y = tmp / 1023.0f; - tmp = (value >> 20) & 0x3FF; + tmp = (rawValue >> 20) & 0x3FF; vectorOut.Z = tmp / 1023.0f; - vectorOut.W = (value >> 30) / 3.0f; + vectorOut.W = (rawValue >> 30) / 3.0f; return vectorOut; } diff --git a/Source/Engine/Core/Math/FloatR11G11B10.cs b/Source/Engine/Core/Math/FloatR11G11B10.cs index ccaf31024..524f16096 100644 --- a/Source/Engine/Core/Math/FloatR11G11B10.cs +++ b/Source/Engine/Core/Math/FloatR11G11B10.cs @@ -21,7 +21,7 @@ namespace FlaxEngine { // Reference: [https://github.com/Microsoft/DirectXMath/blob/master/Inc/DirectXPackedVector.h] - private uint value; + private uint rawValue; /// /// Initializes a new instance of the structure. @@ -31,7 +31,7 @@ namespace FlaxEngine /// The floating point value that should be stored in B component (10 bits format). public FloatR11G11B10(float x, float y, float z) { - value = Pack(x, y, z); + rawValue = Pack(x, y, z); } /// @@ -40,17 +40,13 @@ namespace FlaxEngine /// The floating point value that should be stored in compressed format. public FloatR11G11B10(Float3 value) { - this.value = Pack(value.X, value.Y, value.Z); + rawValue = Pack(value.X, value.Y, value.Z); } /// /// Gets or sets the raw 32 bit value used to back this vector. /// - public uint RawValue - { - get => value; - set => this.value = value; - } + public uint RawValue => rawValue; /// /// Performs an explicit conversion from to . @@ -80,7 +76,7 @@ namespace FlaxEngine /// true if has the same value as ; otherwise, false. public static bool operator ==(FloatR11G11B10 left, FloatR11G11B10 right) { - return left.value == right.value; + return left.rawValue == right.rawValue; } /// @@ -91,7 +87,7 @@ namespace FlaxEngine /// true if has a different value than ; otherwise, false. public static bool operator !=(FloatR11G11B10 left, FloatR11G11B10 right) { - return left.value != right.value; + return left.rawValue != right.rawValue; } /// @@ -109,7 +105,7 @@ namespace FlaxEngine /// A 32-bit signed integer hash code. public override int GetHashCode() { - return value.GetHashCode(); + return rawValue.GetHashCode(); } /// @@ -120,7 +116,7 @@ namespace FlaxEngine /// true if is the same instance as or if both are null references or if value1.Equals(value2) returns true; otherwise, false. public static bool Equals(ref FloatR11G11B10 value1, ref FloatR11G11B10 value2) { - return value1.value == value2.value; + return value1.rawValue == value2.rawValue; } /// @@ -130,7 +126,7 @@ namespace FlaxEngine /// true if the current instance is equal to the specified object; false otherwise. public bool Equals(FloatR11G11B10 other) { - return other.value == value; + return other.rawValue == rawValue; } /// @@ -140,7 +136,7 @@ namespace FlaxEngine /// true if the current instance is equal to the specified object; false otherwise. public override bool Equals(object obj) { - return obj is FloatR11G11B10 other && value == other.value; + return obj is FloatR11G11B10 other && rawValue == other.rawValue; } private static unsafe uint Pack(float x, float y, float z) @@ -288,7 +284,7 @@ namespace FlaxEngine { int zeroExponent = -112; - Packed packed = new Packed(value); + Packed packed = new Packed(rawValue); uint* result = stackalloc uint[4]; uint exponent; diff --git a/Source/Engine/Core/Math/Half.cs b/Source/Engine/Core/Math/Half.cs index da10d76e9..5177e7365 100644 --- a/Source/Engine/Core/Math/Half.cs +++ b/Source/Engine/Core/Math/Half.cs @@ -38,7 +38,7 @@ namespace FlaxEngine [StructLayout(LayoutKind.Sequential, Pack = 2)] public struct Half { - private ushort value; + private ushort rawValue; /// /// Number of decimal digits of precision. @@ -111,17 +111,13 @@ namespace FlaxEngine /// The floating point value that should be stored in 16 bit format. public Half(float value) { - this.value = HalfUtils.Pack(value); + rawValue = HalfUtils.Pack(value); } /// /// Gets or sets the raw 16 bit value used to back this half-float. /// - public ushort RawValue - { - get => value; - set => this.value = value; - } + public ushort RawValue => rawValue; /// /// Converts an array of half precision values into full precision values. @@ -166,7 +162,7 @@ namespace FlaxEngine /// The converted value. public static implicit operator float(Half value) { - return HalfUtils.Unpack(value.value); + return HalfUtils.Unpack(value.rawValue); } /// @@ -177,7 +173,7 @@ namespace FlaxEngine /// true if has the same value as ; otherwise, false. public static bool operator ==(Half left, Half right) { - return left.value == right.value; + return left.rawValue == right.rawValue; } /// @@ -188,7 +184,7 @@ namespace FlaxEngine /// true if has a different value than ; otherwise, false. public static bool operator !=(Half left, Half right) { - return left.value != right.value; + return left.rawValue != right.rawValue; } /// @@ -207,7 +203,7 @@ namespace FlaxEngine /// A 32-bit signed integer hash code. public override int GetHashCode() { - ushort num = value; + ushort num = rawValue; return (((num * 3) / 2) ^ num); } @@ -219,7 +215,7 @@ namespace FlaxEngine /// true if is the same instance as or if both are null references or if value1.Equals(value2) returns true; otherwise, false. public static bool Equals(ref Half value1, ref Half value2) { - return value1.value == value2.value; + return value1.rawValue == value2.rawValue; } /// @@ -229,7 +225,7 @@ namespace FlaxEngine /// true if the current instance is equal to the specified object; false otherwise. public bool Equals(Half other) { - return other.value == value; + return other.rawValue == rawValue; } /// @@ -248,7 +244,7 @@ namespace FlaxEngine return false; } Half half = (Half)obj; - return half.value == value; + return half.rawValue == rawValue; } static Half() From a580d6785f26c6ed9e22e73c102dfa4739220124 Mon Sep 17 00:00:00 2001 From: Roman Zhuravlev Date: Sun, 21 Jul 2024 22:25:25 +0200 Subject: [PATCH 33/53] Ignore Translate/Rotate/Scale/TransformSpace hotkeys while holding RMB. Allows to use industry standard WER hotkeys for transformations and not change modes while navigating through level. --- Source/Editor/Viewport/EditorGizmoViewport.cs | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/Source/Editor/Viewport/EditorGizmoViewport.cs b/Source/Editor/Viewport/EditorGizmoViewport.cs index f0681e48d..0a0591015 100644 --- a/Source/Editor/Viewport/EditorGizmoViewport.cs +++ b/Source/Editor/Viewport/EditorGizmoViewport.cs @@ -360,11 +360,36 @@ namespace FlaxEditor.Viewport }; // Setup input actions - viewport.InputActions.Add(options => options.TranslateMode, () => transformGizmo.ActiveMode = TransformGizmoBase.Mode.Translate); - viewport.InputActions.Add(options => options.RotateMode, () => transformGizmo.ActiveMode = TransformGizmoBase.Mode.Rotate); - viewport.InputActions.Add(options => options.ScaleMode, () => transformGizmo.ActiveMode = TransformGizmoBase.Mode.Scale); + viewport.InputActions.Add(options => options.TranslateMode, () => + { + viewport.GetInput(out var input); + if (input.IsMouseRightDown) + return; + + transformGizmo.ActiveMode = TransformGizmoBase.Mode.Translate; + }); + viewport.InputActions.Add(options => options.RotateMode, () => + { + viewport.GetInput(out var input); + if (input.IsMouseRightDown) + return; + + transformGizmo.ActiveMode = TransformGizmoBase.Mode.Rotate; + }); + viewport.InputActions.Add(options => options.ScaleMode, () => + { + viewport.GetInput(out var input); + if (input.IsMouseRightDown) + return; + + transformGizmo.ActiveMode = TransformGizmoBase.Mode.Scale; + }); viewport.InputActions.Add(options => options.ToggleTransformSpace, () => { + viewport.GetInput(out var input); + if (input.IsMouseRightDown) + return; + transformGizmo.ToggleTransformSpace(); if (useProjectCache) editor.ProjectCache.SetCustomData("TransformSpaceState", transformGizmo.ActiveTransformSpace.ToString()); From 82096392616b6a16a4a56e949782aca428850d4c Mon Sep 17 00:00:00 2001 From: Chandler Cox Date: Tue, 23 Jul 2024 22:08:01 -0500 Subject: [PATCH 34/53] Add being able to add a thumbnail to a `SpawnableJsonProxy` --- Source/Editor/Content/Proxy/JsonAssetProxy.cs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Source/Editor/Content/Proxy/JsonAssetProxy.cs b/Source/Editor/Content/Proxy/JsonAssetProxy.cs index 12a54a030..e9caa96a4 100644 --- a/Source/Editor/Content/Proxy/JsonAssetProxy.cs +++ b/Source/Editor/Content/Proxy/JsonAssetProxy.cs @@ -166,6 +166,18 @@ namespace FlaxEditor.Content /// public override string Name { get; } = Utilities.Utils.GetPropertyNameUI(typeof(T).Name); + private SpriteHandle _thumbnail; + + public SpawnableJsonAssetProxy() + { + _thumbnail = SpriteHandle.Invalid; + } + + public SpawnableJsonAssetProxy(SpriteHandle thumbnail) + { + _thumbnail = thumbnail; + } + /// public override bool CanCreate(ContentFolder targetLocation) { @@ -177,6 +189,12 @@ namespace FlaxEditor.Content { Editor.SaveJsonAsset(outputPath, new T()); } + + /// + public override AssetItem ConstructItem(string path, string typeName, ref Guid id) + { + return _thumbnail.IsValid ? new JsonAssetItem(path, id, typeName, _thumbnail) : base.ConstructItem(path, typeName, ref id); + } /// public override string TypeName { get; } = typeof(T).FullName; From 013dab4ba704565acfea4368285bbd6b71b84325 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Jul 2024 10:52:54 +0200 Subject: [PATCH 35/53] Fix codestyle and apply pr feedback #2730 --- Source/Engine/Level/Actor.cpp | 2 +- Source/Engine/Level/Actor.h | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp index 08d2832d1..1cdfccd3b 100644 --- a/Source/Engine/Level/Actor.cpp +++ b/Source/Engine/Level/Actor.cpp @@ -543,7 +543,7 @@ void Actor::SetLayerRecursive(int32 layerIndex) OnLayerChanged(); } -void Actor::SetNameNoCopy(String&& value) +void Actor::SetName(String&& value) { if (_name == value) return; diff --git a/Source/Engine/Level/Actor.h b/Source/Engine/Level/Actor.h index 787cc6835..2a2fb1a4c 100644 --- a/Source/Engine/Level/Actor.h +++ b/Source/Engine/Level/Actor.h @@ -182,15 +182,16 @@ public: } /// - /// Sets the actor name without copying the string. + /// Sets the actor name. /// - void SetNameNoCopy(String&& value); + /// The value to set. + API_PROPERTY() void SetName(String&& value); /// /// Sets the actor name. /// /// The value to set. - API_PROPERTY() void SetName(const StringView& value); + void SetName(const StringView& value); public: /// From 1891b9e3676a115122e464d5e3debe1be666a4ad Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Jul 2024 13:51:32 +0200 Subject: [PATCH 36/53] Refactor PhysX vehicles and cloth simulation code to be more readable #2796 --- Source/Engine/Physics/Actors/Cloth.cpp | 5 + Source/Engine/Physics/Actors/Cloth.h | 6 +- Source/Engine/Physics/Actors/WheeledVehicle.h | 1 + .../Physics/PhysX/PhysicsBackendPhysX.cpp | 999 +++++++++--------- 4 files changed, 518 insertions(+), 493 deletions(-) diff --git a/Source/Engine/Physics/Actors/Cloth.cpp b/Source/Engine/Physics/Actors/Cloth.cpp index 0b5e75a7e..2781cb161 100644 --- a/Source/Engine/Physics/Actors/Cloth.cpp +++ b/Source/Engine/Physics/Actors/Cloth.cpp @@ -27,6 +27,11 @@ Cloth::Cloth(const SpawnParams& params) _drawCategory = SceneRendering::SceneDrawAsync; } +void* Cloth::GetPhysicsCloth() const +{ + return _cloth; +} + ModelInstanceActor::MeshReference Cloth::GetMesh() const { auto value = _mesh; diff --git a/Source/Engine/Physics/Actors/Cloth.h b/Source/Engine/Physics/Actors/Cloth.h index 92c1dea92..5bdcd6e5a 100644 --- a/Source/Engine/Physics/Actors/Cloth.h +++ b/Source/Engine/Physics/Actors/Cloth.h @@ -13,7 +13,6 @@ /// API_CLASS(Attributes="ActorContextMenu(\"New/Physics/Cloth\"), ActorToolbox(\"Physics\")") class FLAXENGINE_API Cloth : public Actor { - friend class PhysicsBackend; DECLARE_SCENE_OBJECT(Cloth); /// @@ -231,6 +230,11 @@ private: Array _paint; public: + /// + /// Gets the native physics backend object. + /// + void* GetPhysicsCloth() const; + /// /// Gets the mesh to use for the cloth simulation (single mesh from specific LOD). Always from the parent static or animated model actor. /// diff --git a/Source/Engine/Physics/Actors/WheeledVehicle.h b/Source/Engine/Physics/Actors/WheeledVehicle.h index 29e3dfbe6..16e226546 100644 --- a/Source/Engine/Physics/Actors/WheeledVehicle.h +++ b/Source/Engine/Physics/Actors/WheeledVehicle.h @@ -13,6 +13,7 @@ API_CLASS(Attributes="ActorContextMenu(\"New/Physics/Wheeled Vehicle\"), ActorToolbox(\"Physics\")") class FLAXENGINE_API WheeledVehicle : public RigidBody { friend class PhysicsBackend; + friend struct ScenePhysX; DECLARE_SCENE_OBJECT(WheeledVehicle); /// diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp index 3439fa27e..93ee09295 100644 --- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp +++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp @@ -102,9 +102,13 @@ struct ScenePhysX Array ClothsList; #endif +#if WITH_VEHICLE + void UpdateVehicles(float dt); +#endif #if WITH_CLOTH void PreSimulateCloth(int32 i); void SimulateCloth(int32 i); + void UpdateCloths(float dt); #endif }; @@ -725,6 +729,451 @@ void InitVehicleSDK() } } +void ScenePhysX::UpdateVehicles(float dt) +{ + if (WheelVehicles.IsEmpty()) + return; + PROFILE_CPU_NAMED("Physics.Vehicles"); + + // Update vehicles steering + WheelVehiclesCache.Clear(); + WheelVehiclesCache.EnsureCapacity(WheelVehicles.Count()); + int32 wheelsCount = 0; + for (auto wheelVehicle : WheelVehicles) + { + if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation()) + continue; + auto drive = (PxVehicleWheels*)wheelVehicle->_vehicle; + ASSERT(drive); + WheelVehiclesCache.Add(drive); + wheelsCount += drive->mWheelsSimData.getNbWheels(); + + const float deadZone = 0.1f; + bool isTank = wheelVehicle->_driveType == WheeledVehicle::DriveTypes::Tank; + float throttle = wheelVehicle->_throttle; + float steering = wheelVehicle->_steering; + float brake = wheelVehicle->_brake; + float leftThrottle = wheelVehicle->_tankLeftThrottle; + float rightThrottle = wheelVehicle->_tankRightThrottle; + float leftBrake = Math::Max(wheelVehicle->_tankLeftBrake, wheelVehicle->_handBrake); + float rightBrake = Math::Max(wheelVehicle->_tankRightBrake, wheelVehicle->_handBrake); + WheeledVehicle::DriveModes vehicleDriveMode = wheelVehicle->_driveControl.DriveMode; + + if (isTank) + { + // Converting default vehicle controls to tank controls. + if (throttle != 0 || steering != 0) + { + leftThrottle = Math::Clamp(throttle + steering, -1.0f, 1.0f); + rightThrottle = Math::Clamp(throttle - steering, -1.0f, 1.0f); + } + } + + // Converting special tank drive mode to standard tank mode when is turning. + if (isTank && vehicleDriveMode == WheeledVehicle::DriveModes::Standard) + { + // Special inputs when turning vehicle -1 1 to left or 1 -1 to turn right + // to: + // Standard inputs when turning vehicle 0 1 to left or 1 0 to turn right + + if (leftThrottle < -deadZone && rightThrottle > deadZone) + { + leftThrottle = 0; + leftBrake = 1; + } + else if (leftThrottle > deadZone && rightThrottle < -deadZone) + { + rightThrottle = 0; + rightBrake = 1; + } + } + + if (wheelVehicle->UseReverseAsBrake) + { + const float invalidDirectionThreshold = 80.0f; + const float breakThreshold = 8.0f; + const float forwardSpeed = wheelVehicle->GetForwardSpeed(); + int currentGear = wheelVehicle->GetCurrentGear(); + // Tank tracks direction: 1 forward -1 backward 0 neutral + bool toForward = false; + toForward |= throttle > deadZone; + toForward |= (leftThrottle > deadZone) && (rightThrottle > deadZone); // 1 1 + + bool toBackward = false; + toBackward |= throttle < -deadZone; + toBackward |= (leftThrottle < -deadZone) && (rightThrottle < -deadZone); // -1 -1 + toBackward |= (leftThrottle < -deadZone) && (rightThrottle < deadZone); // -1 0 + toBackward |= (leftThrottle < deadZone) && (rightThrottle < -deadZone); // 0 -1 + + bool isTankTurning = false; + + if (isTank) + { + isTankTurning |= leftThrottle > deadZone && rightThrottle < -deadZone; // 1 -1 + isTankTurning |= leftThrottle < -deadZone && rightThrottle > deadZone; // -1 1 + isTankTurning |= leftThrottle < deadZone && rightThrottle > deadZone; // 0 1 + isTankTurning |= leftThrottle > deadZone && rightThrottle < deadZone; // 1 0 + isTankTurning |= leftThrottle < -deadZone && rightThrottle < deadZone; // -1 0 + isTankTurning |= leftThrottle < deadZone && rightThrottle < -deadZone; // 0 -1 + + if (toForward || toBackward) + { + isTankTurning = false; + } + } + + // Automatic gear change when changing driving direction + if (Math::Abs(forwardSpeed) < invalidDirectionThreshold) + { + int targetGear = wheelVehicle->GetTargetGear(); + if (toBackward && currentGear > 0 && targetGear >= 0) + { + currentGear = -1; + } + else if (!toBackward && currentGear <= 0 && targetGear <= 0) + { + currentGear = 1; + } + else if (isTankTurning && currentGear <= 0) + { + currentGear = 1; + } + + if (wheelVehicle->GetCurrentGear() != currentGear) + { + wheelVehicle->SetCurrentGear(currentGear); + } + } + + // Automatic break when changing driving direction + if (toForward) + { + if (forwardSpeed < -invalidDirectionThreshold) + { + brake = 1.0f; + leftBrake = 1.0f; + rightBrake = 1.0f; + } + } + else if (toBackward) + { + if (forwardSpeed > invalidDirectionThreshold) + { + brake = 1.0f; + leftBrake = 1.0f; + rightBrake = 1.0f; + } + } + else + { + if (forwardSpeed < breakThreshold && forwardSpeed > -breakThreshold && !isTankTurning) // not accelerating, very slow speed -> stop + { + brake = 1.0f; + leftBrake = 1.0f; + rightBrake = 1.0f; + } + } + + // Block throttle if user is changing driving direction + if ((toForward && currentGear < 0) || (toBackward && currentGear > 0)) + { + throttle = 0.0f; + leftThrottle = 0; + rightThrottle = 0; + } + + throttle = Math::Abs(throttle); + + if (isTank) + { + // invert acceleration when moving to backward because tank inputs can be < 0 + if (currentGear < 0) + { + float lt = -leftThrottle; + float rt = -rightThrottle; + float lb = leftBrake; + float rb = rightBrake; + leftThrottle = rt; + rightThrottle = lt; + leftBrake = rb; + rightBrake = lb; + } + } + } + else + { + throttle = Math::Max(throttle, 0.0f); + } + + // Force brake the another side track to turn faster + if (Math::Abs(leftThrottle) > deadZone && Math::Abs(rightThrottle) < deadZone) + { + rightBrake = 1.0f; + } + if (Math::Abs(rightThrottle) > deadZone && Math::Abs(leftThrottle) < deadZone) + { + leftBrake = 1.0f; + } + + // Smooth input controls + // @formatter:off + PxVehiclePadSmoothingData padSmoothing = + { + { + wheelVehicle->_driveControl.RiseRateAcceleration, // rise rate eANALOG_INPUT_ACCEL + wheelVehicle->_driveControl.RiseRateBrake, // rise rate eANALOG_INPUT_BRAKE + wheelVehicle->_driveControl.RiseRateHandBrake, // rise rate eANALOG_INPUT_HANDBRAKE + wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_LEFT + wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_RIGHT + }, + { + wheelVehicle->_driveControl.FallRateAcceleration, // fall rate eANALOG_INPUT_ACCEL + wheelVehicle->_driveControl.FallRateBrake, // fall rate eANALOG_INPUT_BRAKE + wheelVehicle->_driveControl.FallRateHandBrake, // fall rate eANALOG_INPUT_HANDBRAKE + wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_LEFT + wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_RIGHT + } + }; + PxVehicleKeySmoothingData keySmoothing = + { + { + wheelVehicle->_driveControl.RiseRateAcceleration, // rise rate eANALOG_INPUT_ACCEL + wheelVehicle->_driveControl.RiseRateBrake, // rise rate eANALOG_INPUT_BRAKE + wheelVehicle->_driveControl.RiseRateHandBrake, // rise rate eANALOG_INPUT_HANDBRAKE + wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_LEFT + wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_RIGHT + }, + { + wheelVehicle->_driveControl.FallRateAcceleration, // fall rate eANALOG_INPUT_ACCEL + wheelVehicle->_driveControl.FallRateBrake, // fall rate eANALOG_INPUT_BRAKE + wheelVehicle->_driveControl.FallRateHandBrake, // fall rate eANALOG_INPUT_HANDBRAKE + wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_LEFT + wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_RIGHT + } + }; + // @formatter:on + + // Reduce steer by speed to make vehicle easier to maneuver + constexpr int steerVsSpeedN = 8; + PxF32 steerVsForwardSpeedData[steerVsSpeedN]; + const int lastSteerVsSpeedIndex = wheelVehicle->_driveControl.SteerVsSpeed.Count() - 1; + int steerVsSpeedIndex = 0; + + // Steer vs speed data structure example: + // array: + // speed, steer + // 1000, 1.0, + // 2000, 0.7, + // 5000, 0.5, + // .. + + // fill the steerVsForwardSpeedData with the speed and steer + for (int32 i = 0; i < 8; i += 2) + { + steerVsForwardSpeedData[i] = wheelVehicle->_driveControl.SteerVsSpeed[steerVsSpeedIndex].Speed; + steerVsForwardSpeedData[i + 1] = wheelVehicle->_driveControl.SteerVsSpeed[steerVsSpeedIndex].Steer; + steerVsSpeedIndex = Math::Min(steerVsSpeedIndex + 1, lastSteerVsSpeedIndex); + } + const PxFixedSizeLookupTable steerVsForwardSpeed(steerVsForwardSpeedData, 4); + + if (wheelVehicle->UseAnalogSteering) + { + switch (wheelVehicle->_driveTypeCurrent) + { + case WheeledVehicle::DriveTypes::Drive4W: + { + PxVehicleDrive4WRawInputData rawInputData; + rawInputData.setAnalogAccel(throttle); + rawInputData.setAnalogBrake(brake); + rawInputData.setAnalogSteer(wheelVehicle->_steering); + rawInputData.setAnalogHandbrake(wheelVehicle->_handBrake); + PxVehicleDrive4WSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, steerVsForwardSpeed, rawInputData, dt, false, *(PxVehicleDrive4W*)drive); + break; + } + case WheeledVehicle::DriveTypes::DriveNW: + { + PxVehicleDriveNWRawInputData rawInputData; + rawInputData.setAnalogAccel(throttle); + rawInputData.setAnalogBrake(brake); + rawInputData.setAnalogSteer(wheelVehicle->_steering); + rawInputData.setAnalogHandbrake(wheelVehicle->_handBrake); + PxVehicleDriveNWSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, steerVsForwardSpeed, rawInputData, dt, false, *(PxVehicleDriveNW*)drive); + break; + } + case WheeledVehicle::DriveTypes::Tank: + { + PxVehicleDriveTankRawInputData driveMode = vehicleDriveMode == WheeledVehicle::DriveModes::Standard ? PxVehicleDriveTankControlModel::eSTANDARD : PxVehicleDriveTankControlModel::eSPECIAL; + PxVehicleDriveTankRawInputData rawInputData = PxVehicleDriveTankRawInputData(driveMode); + rawInputData.setAnalogAccel(Math::Max(Math::Abs(leftThrottle), Math::Abs(rightThrottle))); + rawInputData.setAnalogLeftBrake(leftBrake); + rawInputData.setAnalogRightBrake(rightBrake); + rawInputData.setAnalogLeftThrust(leftThrottle); + rawInputData.setAnalogRightThrust(rightThrottle); + PxVehicleDriveTankSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, rawInputData, dt, *(PxVehicleDriveTank*)drive); + break; + } + } + } + else + { + switch (wheelVehicle->_driveTypeCurrent) + { + case WheeledVehicle::DriveTypes::Drive4W: + { + PxVehicleDrive4WRawInputData rawInputData; + rawInputData.setDigitalAccel(throttle > deadZone); + rawInputData.setDigitalBrake(brake > deadZone); + rawInputData.setDigitalSteerLeft(wheelVehicle->_steering < -deadZone); + rawInputData.setDigitalSteerRight(wheelVehicle->_steering > deadZone); + rawInputData.setDigitalHandbrake(wheelVehicle->_handBrake > deadZone); + PxVehicleDrive4WSmoothDigitalRawInputsAndSetAnalogInputs(keySmoothing, steerVsForwardSpeed, rawInputData, dt, false, *(PxVehicleDrive4W*)drive); + break; + } + case WheeledVehicle::DriveTypes::DriveNW: + { + PxVehicleDriveNWRawInputData rawInputData; + rawInputData.setDigitalAccel(throttle > deadZone); + rawInputData.setDigitalBrake(brake > deadZone); + rawInputData.setDigitalSteerLeft(wheelVehicle->_steering < -deadZone); + rawInputData.setDigitalSteerRight(wheelVehicle->_steering > deadZone); + rawInputData.setDigitalHandbrake(wheelVehicle->_handBrake > deadZone); + PxVehicleDriveNWSmoothDigitalRawInputsAndSetAnalogInputs(keySmoothing, steerVsForwardSpeed, rawInputData, dt, false, *(PxVehicleDriveNW*)drive); + break; + } + case WheeledVehicle::DriveTypes::Tank: + { + // Convert analog inputs to digital inputs + leftThrottle = Math::Round(leftThrottle); + rightThrottle = Math::Round(rightThrottle); + leftBrake = Math::Round(leftBrake); + rightBrake = Math::Round(rightBrake); + + PxVehicleDriveTankRawInputData driveMode = vehicleDriveMode == WheeledVehicle::DriveModes::Standard ? PxVehicleDriveTankControlModel::eSTANDARD : PxVehicleDriveTankControlModel::eSPECIAL; + PxVehicleDriveTankRawInputData rawInputData = PxVehicleDriveTankRawInputData(driveMode); + rawInputData.setAnalogAccel(Math::Max(Math::Abs(leftThrottle), Math::Abs(rightThrottle))); + rawInputData.setAnalogLeftBrake(leftBrake); + rawInputData.setAnalogRightBrake(rightBrake); + rawInputData.setAnalogLeftThrust(leftThrottle); + rawInputData.setAnalogRightThrust(rightThrottle); + + // Needs to pass analog values to vehicle to maintain current movement direction because digital inputs accept only true/false values to tracks thrust instead of -1 to 1 + PxVehicleDriveTankSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, rawInputData, dt, *(PxVehicleDriveTank*)drive); + break; + } + } + } + } + + // Update batches queries cache + if (wheelsCount > WheelRaycastBatchQuerySize) + { + if (WheelRaycastBatchQuery) + WheelRaycastBatchQuery->release(); + WheelRaycastBatchQuerySize = wheelsCount; + WheelRaycastBatchQuery = PxCreateBatchQueryExt(*Scene, &WheelRaycastFilter, wheelsCount, wheelsCount, 0, 0, 0, 0); + } + + // Update lookup table that maps wheel type into the surface friction + if (!WheelTireFrictions || WheelTireFrictionsDirty) + { + WheelTireFrictionsDirty = false; + RELEASE_PHYSX(WheelTireFrictions); + Array> materials; + materials.Resize(Math::Min((int32)PhysX->getNbMaterials(), PxVehicleDrivableSurfaceToTireFrictionPairs::eMAX_NB_SURFACE_TYPES)); + PxMaterial** materialsPtr = materials.Get(); + PhysX->getMaterials(materialsPtr, materials.Count(), 0); + Array> tireTypes; + tireTypes.Resize(materials.Count()); + PxVehicleDrivableSurfaceType* tireTypesPtr = tireTypes.Get(); + for (int32 i = 0; i < tireTypes.Count(); i++) + tireTypesPtr[i].mType = i; + WheelTireFrictions = PxVehicleDrivableSurfaceToTireFrictionPairs::allocate(WheelTireTypes.Count(), materials.Count()); + WheelTireFrictions->setup(WheelTireTypes.Count(), materials.Count(), (const PxMaterial**)materialsPtr, tireTypesPtr); + for (int32 material = 0; material < materials.Count(); material++) + { + float friction = materialsPtr[material]->getStaticFriction(); + for (int32 tireType = 0; tireType < WheelTireTypes.Count(); tireType++) + { + float scale = WheelTireTypes[tireType]; + WheelTireFrictions->setTypePairFriction(material, tireType, friction * scale); + } + } + } + + // Setup cache for wheel states + WheelVehiclesResultsPerVehicle.Resize(WheelVehiclesCache.Count(), false); + WheelVehiclesResultsPerWheel.Resize(wheelsCount, false); + wheelsCount = 0; + for (int32 i = 0, ii = 0; i < WheelVehicles.Count(); i++) + { + auto wheelVehicle = WheelVehicles[i]; + if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation()) + continue; + auto drive = (PxVehicleWheels*)WheelVehicles[ii]->_vehicle; + auto& perVehicle = WheelVehiclesResultsPerVehicle[ii]; + ii++; + perVehicle.nbWheelQueryResults = drive->mWheelsSimData.getNbWheels(); + perVehicle.wheelQueryResults = WheelVehiclesResultsPerWheel.Get() + wheelsCount; + wheelsCount += perVehicle.nbWheelQueryResults; + } + + // Update vehicles + if (WheelVehiclesCache.Count() != 0) + { + PxVehicleSuspensionRaycasts(WheelRaycastBatchQuery, WheelVehiclesCache.Count(), WheelVehiclesCache.Get()); + PxVehicleUpdates(dt, Scene->getGravity(), *WheelTireFrictions, WheelVehiclesCache.Count(), WheelVehiclesCache.Get(), WheelVehiclesResultsPerVehicle.Get()); + } + + // Synchronize state + for (int32 i = 0, ii = 0; i < WheelVehicles.Count(); i++) + { + auto wheelVehicle = WheelVehicles[i]; + if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation()) + continue; + auto drive = WheelVehiclesCache[ii]; + auto& perVehicle = WheelVehiclesResultsPerVehicle[ii]; + ii++; +#if PHYSX_VEHICLE_DEBUG_TELEMETRY + LOG(Info, "Vehicle[{}] Gear={}, RPM={}", ii, wheelVehicle->GetCurrentGear(), (int32)wheelVehicle->GetEngineRotationSpeed()); +#endif + + // Update wheels + for (int32 j = 0; j < wheelVehicle->_wheelsData.Count(); j++) + { + auto& wheelData = wheelVehicle->_wheelsData[j]; + auto& perWheel = perVehicle.wheelQueryResults[j]; +#if PHYSX_VEHICLE_DEBUG_TELEMETRY + LOG(Info, "Vehicle[{}] Wheel[{}] longitudinalSlip={}, lateralSlip={}, suspSpringForce={}", ii, j, Utilities::RoundTo2DecimalPlaces(perWheel.longitudinalSlip), Utilities::RoundTo2DecimalPlaces(perWheel.lateralSlip), (int32)perWheel.suspSpringForce); +#endif + + auto& state = wheelData.State; + state.IsInAir = perWheel.isInAir; + state.TireContactCollider = perWheel.tireContactShape ? static_cast(perWheel.tireContactShape->userData) : nullptr; + state.TireContactPoint = P2C(perWheel.tireContactPoint) + Origin; + state.TireContactNormal = P2C(perWheel.tireContactNormal); + state.TireFriction = perWheel.tireFriction; + state.SteerAngle = RadiansToDegrees * perWheel.steerAngle; + state.RotationAngle = -RadiansToDegrees * drive->mWheelsDynData.getWheelRotationAngle(j); + state.SuspensionOffset = perWheel.suspJounce; +#if USE_EDITOR + state.SuspensionTraceStart = P2C(perWheel.suspLineStart) + Origin; + state.SuspensionTraceEnd = P2C(perWheel.suspLineStart + perWheel.suspLineDir * perWheel.suspLineLength) + Origin; +#endif + + if (!wheelData.Collider) + continue; + auto shape = (PxShape*)wheelData.Collider->GetPhysicsShape(); + + // Update wheel collider transformation + auto localPose = shape->getLocalPose(); + Transform t = wheelData.Collider->GetLocalTransform(); + t.Orientation = Quaternion::Euler(-state.RotationAngle, state.SteerAngle, 0) * wheelData.LocalOrientation; + t.Translation = P2C(localPose.p) / wheelVehicle->GetScale() - t.Orientation * wheelData.Collider->GetCenter(); + wheelData.Collider->SetLocalTransform(t); + } + } +} + #endif #if WITH_CLOTH @@ -928,6 +1377,58 @@ void ScenePhysX::SimulateCloth(int32 i) ClothSolver->simulateChunk(i); } +void ScenePhysX::UpdateCloths(float dt) +{ + nv::cloth::Solver* clothSolver = ClothSolver; + if (!clothSolver || ClothsList.IsEmpty()) + return; + PROFILE_CPU_NAMED("Physics.Cloth"); + + { + PROFILE_CPU_NAMED("Pre"); + Function job; + job.Bind(this); + JobSystem::Execute(job, ClothsList.Count()); + } + + { + PROFILE_CPU_NAMED("Simulation"); + if (clothSolver->beginSimulation(dt)) + { + Function job; + job.Bind(this); + JobSystem::Execute(job, clothSolver->getSimulationChunkCount()); + clothSolver->endSimulation(); + } + } + + { + PROFILE_CPU_NAMED("Post"); + ScopeLock lock(ClothLocker); + Array brokenCloths; + for (auto clothPhysX : ClothsList) + { + const auto& clothSettings = Cloths[clothPhysX]; + if (clothSettings.Culled) + continue; + if (clothSettings.UpdateBounds(clothPhysX)) + brokenCloths.Add(clothSettings.Actor); + clothSettings.Actor->OnPostUpdate(); + } + for (auto cloth : brokenCloths) + { + // Rebuild cloth object but keep fabric ref to prevent fabric recook + auto fabric = &((nv::cloth::Cloth*)cloth->GetPhysicsCloth())->getFabric(); + Fabrics[fabric].Refs++; + fabric->incRefCount(); + cloth->Rebuild(); + fabric->decRefCount(); + if (--Fabrics[fabric].Refs == 0) + Fabrics.Remove(fabric); + } + } +} + #endif void* PhysicalMaterial::GetPhysicsMaterial() @@ -1395,448 +1896,7 @@ void PhysicsBackend::EndSimulateScene(void* scene) } #if WITH_VEHICLE - if (scenePhysX->WheelVehicles.HasItems()) - { - PROFILE_CPU_NAMED("Physics.Vehicles"); - - // Update vehicles steering - WheelVehiclesCache.Clear(); - WheelVehiclesCache.EnsureCapacity(scenePhysX->WheelVehicles.Count()); - int32 wheelsCount = 0; - for (auto wheelVehicle : scenePhysX->WheelVehicles) - { - if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation()) - continue; - auto drive = (PxVehicleWheels*)wheelVehicle->_vehicle; - ASSERT(drive); - WheelVehiclesCache.Add(drive); - wheelsCount += drive->mWheelsSimData.getNbWheels(); - - const float deadZone = 0.1f; - bool isTank = wheelVehicle->_driveType == WheeledVehicle::DriveTypes::Tank; - float throttle = wheelVehicle->_throttle; - float steering = wheelVehicle->_steering; - float brake = wheelVehicle->_brake; - float leftThrottle = wheelVehicle->_tankLeftThrottle; - float rightThrottle = wheelVehicle->_tankRightThrottle; - float leftBrake = Math::Max(wheelVehicle->_tankLeftBrake, wheelVehicle->_handBrake); - float rightBrake = Math::Max(wheelVehicle->_tankRightBrake, wheelVehicle->_handBrake); - WheeledVehicle::DriveModes vehicleDriveMode = wheelVehicle->_driveControl.DriveMode; - - if (isTank) - { - // Converting default vehicle controls to tank controls. - if (throttle != 0 || steering != 0) - { - leftThrottle = Math::Clamp(throttle + steering, -1.0f, 1.0f); - rightThrottle = Math::Clamp(throttle - steering, -1.0f, 1.0f); - } - } - - // Converting special tank drive mode to standard tank mode when is turning. - if (isTank && vehicleDriveMode == WheeledVehicle::DriveModes::Standard) - { - // Special inputs when turning vehicle -1 1 to left or 1 -1 to turn right - // to: - // Standard inputs when turning vehicle 0 1 to left or 1 0 to turn right - - if (leftThrottle < -deadZone && rightThrottle > deadZone) - { - leftThrottle = 0; - leftBrake = 1; - } - else if (leftThrottle > deadZone && rightThrottle < -deadZone) - { - rightThrottle = 0; - rightBrake = 1; - } - } - - if (wheelVehicle->UseReverseAsBrake) - { - const float invalidDirectionThreshold = 80.0f; - const float breakThreshold = 8.0f; - const float forwardSpeed = wheelVehicle->GetForwardSpeed(); - int currentGear = wheelVehicle->GetCurrentGear(); - // Tank tracks direction: 1 forward -1 backward 0 neutral - bool toForward = false; - toForward |= throttle > deadZone; - toForward |= (leftThrottle > deadZone) && (rightThrottle > deadZone); // 1 1 - - bool toBackward = false; - toBackward |= throttle < -deadZone; - toBackward |= (leftThrottle < -deadZone) && (rightThrottle < -deadZone); // -1 -1 - toBackward |= (leftThrottle < -deadZone) && (rightThrottle < deadZone); // -1 0 - toBackward |= (leftThrottle < deadZone) && (rightThrottle < -deadZone); // 0 -1 - - bool isTankTurning = false; - - if (isTank) - { - isTankTurning |= leftThrottle > deadZone && rightThrottle < -deadZone; // 1 -1 - isTankTurning |= leftThrottle < -deadZone && rightThrottle > deadZone; // -1 1 - isTankTurning |= leftThrottle < deadZone && rightThrottle > deadZone; // 0 1 - isTankTurning |= leftThrottle > deadZone && rightThrottle < deadZone; // 1 0 - isTankTurning |= leftThrottle < -deadZone && rightThrottle < deadZone; // -1 0 - isTankTurning |= leftThrottle < deadZone && rightThrottle < -deadZone; // 0 -1 - - if (toForward || toBackward) - { - isTankTurning = false; - } - } - - // Automatic gear change when changing driving direction - if (Math::Abs(forwardSpeed) < invalidDirectionThreshold) - { - int targetGear = wheelVehicle->GetTargetGear(); - if (toBackward && currentGear > 0 && targetGear >= 0) - { - currentGear = -1; - } - else if (!toBackward && currentGear <= 0 && targetGear <= 0) - { - currentGear = 1; - } - else if (isTankTurning && currentGear <= 0) - { - currentGear = 1; - } - - if (wheelVehicle->GetCurrentGear() != currentGear) - { - wheelVehicle->SetCurrentGear(currentGear); - } - } - - // Automatic break when changing driving direction - if (toForward) - { - if (forwardSpeed < -invalidDirectionThreshold) - { - brake = 1.0f; - leftBrake = 1.0f; - rightBrake = 1.0f; - } - } - else if (toBackward) - { - if (forwardSpeed > invalidDirectionThreshold) - { - brake = 1.0f; - leftBrake = 1.0f; - rightBrake = 1.0f; - } - } - else - { - if (forwardSpeed < breakThreshold && forwardSpeed > -breakThreshold && !isTankTurning) // not accelerating, very slow speed -> stop - { - brake = 1.0f; - leftBrake = 1.0f; - rightBrake = 1.0f; - } - } - - // Block throttle if user is changing driving direction - if ((toForward && currentGear < 0) || (toBackward && currentGear > 0)) - { - throttle = 0.0f; - leftThrottle = 0; - rightThrottle = 0; - } - - throttle = Math::Abs(throttle); - - if (isTank) - { - // invert acceleration when moving to backward because tank inputs can be < 0 - if (currentGear < 0) - { - float lt = -leftThrottle; - float rt = -rightThrottle; - float lb = leftBrake; - float rb = rightBrake; - leftThrottle = rt; - rightThrottle = lt; - leftBrake = rb; - rightBrake = lb; - } - } - } - else - { - throttle = Math::Max(throttle, 0.0f); - } - - // Force brake the another side track to turn faster - if (Math::Abs(leftThrottle) > deadZone && Math::Abs(rightThrottle) < deadZone) - { - rightBrake = 1.0f; - } - if (Math::Abs(rightThrottle) > deadZone && Math::Abs(leftThrottle) < deadZone) - { - leftBrake = 1.0f; - } - - // Smooth input controls - // @formatter:off - PxVehiclePadSmoothingData padSmoothing = - { - { - wheelVehicle->_driveControl.RiseRateAcceleration, // rise rate eANALOG_INPUT_ACCEL - wheelVehicle->_driveControl.RiseRateBrake, // rise rate eANALOG_INPUT_BRAKE - wheelVehicle->_driveControl.RiseRateHandBrake, // rise rate eANALOG_INPUT_HANDBRAKE - wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_LEFT - wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_RIGHT - }, - { - wheelVehicle->_driveControl.FallRateAcceleration, // fall rate eANALOG_INPUT_ACCEL - wheelVehicle->_driveControl.FallRateBrake, // fall rate eANALOG_INPUT_BRAKE - wheelVehicle->_driveControl.FallRateHandBrake, // fall rate eANALOG_INPUT_HANDBRAKE - wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_LEFT - wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_RIGHT - } - }; - PxVehicleKeySmoothingData keySmoothing = - { - { - wheelVehicle->_driveControl.RiseRateAcceleration, // rise rate eANALOG_INPUT_ACCEL - wheelVehicle->_driveControl.RiseRateBrake, // rise rate eANALOG_INPUT_BRAKE - wheelVehicle->_driveControl.RiseRateHandBrake, // rise rate eANALOG_INPUT_HANDBRAKE - wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_LEFT - wheelVehicle->_driveControl.RiseRateSteer, // rise rate eANALOG_INPUT_STEER_RIGHT - }, - { - wheelVehicle->_driveControl.FallRateAcceleration, // fall rate eANALOG_INPUT_ACCEL - wheelVehicle->_driveControl.FallRateBrake, // fall rate eANALOG_INPUT_BRAKE - wheelVehicle->_driveControl.FallRateHandBrake, // fall rate eANALOG_INPUT_HANDBRAKE - wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_LEFT - wheelVehicle->_driveControl.FallRateSteer, // fall rate eANALOG_INPUT_STEER_RIGHT - } - }; - // @formatter:on - - // Reduce steer by speed to make vehicle easier to maneuver - constexpr int steerVsSpeedN = 8; - PxF32 steerVsForwardSpeedData[steerVsSpeedN]; - const int lastSteerVsSpeedIndex = wheelVehicle->_driveControl.SteerVsSpeed.Count() - 1; - int steerVsSpeedIndex = 0; - - // Steer vs speed data structure example: - // array: - // speed, steer - // 1000, 1.0, - // 2000, 0.7, - // 5000, 0.5, - // .. - - // fill the steerVsForwardSpeedData with the speed and steer - for (int32 i = 0; i < 8; i += 2) - { - steerVsForwardSpeedData[i] = wheelVehicle->_driveControl.SteerVsSpeed[steerVsSpeedIndex].Speed; - steerVsForwardSpeedData[i + 1] = wheelVehicle->_driveControl.SteerVsSpeed[steerVsSpeedIndex].Steer; - steerVsSpeedIndex = Math::Min(steerVsSpeedIndex + 1, lastSteerVsSpeedIndex); - } - const PxFixedSizeLookupTable steerVsForwardSpeed(steerVsForwardSpeedData, 4); - - if (wheelVehicle->UseAnalogSteering) - { - switch (wheelVehicle->_driveTypeCurrent) - { - case WheeledVehicle::DriveTypes::Drive4W: - { - PxVehicleDrive4WRawInputData rawInputData; - rawInputData.setAnalogAccel(throttle); - rawInputData.setAnalogBrake(brake); - rawInputData.setAnalogSteer(wheelVehicle->_steering); - rawInputData.setAnalogHandbrake(wheelVehicle->_handBrake); - PxVehicleDrive4WSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, steerVsForwardSpeed, rawInputData, scenePhysX->LastDeltaTime, false, *(PxVehicleDrive4W*)drive); - break; - } - case WheeledVehicle::DriveTypes::DriveNW: - { - PxVehicleDriveNWRawInputData rawInputData; - rawInputData.setAnalogAccel(throttle); - rawInputData.setAnalogBrake(brake); - rawInputData.setAnalogSteer(wheelVehicle->_steering); - rawInputData.setAnalogHandbrake(wheelVehicle->_handBrake); - PxVehicleDriveNWSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, steerVsForwardSpeed, rawInputData, scenePhysX->LastDeltaTime, false, *(PxVehicleDriveNW*)drive); - break; - } - case WheeledVehicle::DriveTypes::Tank: - { - PxVehicleDriveTankRawInputData driveMode = vehicleDriveMode == WheeledVehicle::DriveModes::Standard ? PxVehicleDriveTankControlModel::eSTANDARD : PxVehicleDriveTankControlModel::eSPECIAL; - PxVehicleDriveTankRawInputData rawInputData = PxVehicleDriveTankRawInputData(driveMode); - rawInputData.setAnalogAccel(Math::Max(Math::Abs(leftThrottle), Math::Abs(rightThrottle))); - rawInputData.setAnalogLeftBrake(leftBrake); - rawInputData.setAnalogRightBrake(rightBrake); - rawInputData.setAnalogLeftThrust(leftThrottle); - rawInputData.setAnalogRightThrust(rightThrottle); - PxVehicleDriveTankSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, rawInputData, scenePhysX->LastDeltaTime, *(PxVehicleDriveTank*)drive); - break; - } - } - } - else - { - switch (wheelVehicle->_driveTypeCurrent) - { - case WheeledVehicle::DriveTypes::Drive4W: - { - PxVehicleDrive4WRawInputData rawInputData; - rawInputData.setDigitalAccel(throttle > deadZone); - rawInputData.setDigitalBrake(brake > deadZone); - rawInputData.setDigitalSteerLeft(wheelVehicle->_steering < -deadZone); - rawInputData.setDigitalSteerRight(wheelVehicle->_steering > deadZone); - rawInputData.setDigitalHandbrake(wheelVehicle->_handBrake > deadZone); - PxVehicleDrive4WSmoothDigitalRawInputsAndSetAnalogInputs(keySmoothing, steerVsForwardSpeed, rawInputData, scenePhysX->LastDeltaTime, false, *(PxVehicleDrive4W*)drive); - break; - } - case WheeledVehicle::DriveTypes::DriveNW: - { - PxVehicleDriveNWRawInputData rawInputData; - rawInputData.setDigitalAccel(throttle > deadZone); - rawInputData.setDigitalBrake(brake > deadZone); - rawInputData.setDigitalSteerLeft(wheelVehicle->_steering < -deadZone); - rawInputData.setDigitalSteerRight(wheelVehicle->_steering > deadZone); - rawInputData.setDigitalHandbrake(wheelVehicle->_handBrake > deadZone); - PxVehicleDriveNWSmoothDigitalRawInputsAndSetAnalogInputs(keySmoothing, steerVsForwardSpeed, rawInputData, scenePhysX->LastDeltaTime, false, *(PxVehicleDriveNW*)drive); - break; - } - case WheeledVehicle::DriveTypes::Tank: - { - // Convert analog inputs to digital inputs - leftThrottle = Math::Round(leftThrottle); - rightThrottle = Math::Round(rightThrottle); - leftBrake = Math::Round(leftBrake); - rightBrake = Math::Round(rightBrake); - - PxVehicleDriveTankRawInputData driveMode = vehicleDriveMode == WheeledVehicle::DriveModes::Standard ? PxVehicleDriveTankControlModel::eSTANDARD : PxVehicleDriveTankControlModel::eSPECIAL; - PxVehicleDriveTankRawInputData rawInputData = PxVehicleDriveTankRawInputData(driveMode); - rawInputData.setAnalogAccel(Math::Max(Math::Abs(leftThrottle), Math::Abs(rightThrottle))); - rawInputData.setAnalogLeftBrake(leftBrake); - rawInputData.setAnalogRightBrake(rightBrake); - rawInputData.setAnalogLeftThrust(leftThrottle); - rawInputData.setAnalogRightThrust(rightThrottle); - - // Needs to pass analog values to vehicle to maintain current movement direction because digital inputs accept only true/false values to tracks thrust instead of -1 to 1 - PxVehicleDriveTankSmoothAnalogRawInputsAndSetAnalogInputs(padSmoothing, rawInputData, scenePhysX->LastDeltaTime, *(PxVehicleDriveTank*)drive); - break; - } - } - } - } - - // Update batches queries cache - if (wheelsCount > scenePhysX->WheelRaycastBatchQuerySize) - { - if (scenePhysX->WheelRaycastBatchQuery) - scenePhysX->WheelRaycastBatchQuery->release(); - scenePhysX->WheelRaycastBatchQuerySize = wheelsCount; - scenePhysX->WheelRaycastBatchQuery = PxCreateBatchQueryExt(*scenePhysX->Scene, &WheelRaycastFilter, wheelsCount, wheelsCount, 0, 0, 0, 0); - } - - // Update lookup table that maps wheel type into the surface friction - if (!WheelTireFrictions || WheelTireFrictionsDirty) - { - WheelTireFrictionsDirty = false; - RELEASE_PHYSX(WheelTireFrictions); - Array> materials; - materials.Resize(Math::Min((int32)PhysX->getNbMaterials(), PxVehicleDrivableSurfaceToTireFrictionPairs::eMAX_NB_SURFACE_TYPES)); - PxMaterial** materialsPtr = materials.Get(); - PhysX->getMaterials(materialsPtr, materials.Count(), 0); - Array> tireTypes; - tireTypes.Resize(materials.Count()); - PxVehicleDrivableSurfaceType* tireTypesPtr = tireTypes.Get(); - for (int32 i = 0; i < tireTypes.Count(); i++) - tireTypesPtr[i].mType = i; - WheelTireFrictions = PxVehicleDrivableSurfaceToTireFrictionPairs::allocate(WheelTireTypes.Count(), materials.Count()); - WheelTireFrictions->setup(WheelTireTypes.Count(), materials.Count(), (const PxMaterial**)materialsPtr, tireTypesPtr); - for (int32 material = 0; material < materials.Count(); material++) - { - float friction = materialsPtr[material]->getStaticFriction(); - for (int32 tireType = 0; tireType < WheelTireTypes.Count(); tireType++) - { - float scale = WheelTireTypes[tireType]; - WheelTireFrictions->setTypePairFriction(material, tireType, friction * scale); - } - } - } - - // Setup cache for wheel states - WheelVehiclesResultsPerVehicle.Resize(WheelVehiclesCache.Count(), false); - WheelVehiclesResultsPerWheel.Resize(wheelsCount, false); - wheelsCount = 0; - for (int32 i = 0, ii = 0; i < scenePhysX->WheelVehicles.Count(); i++) - { - auto wheelVehicle = scenePhysX->WheelVehicles[i]; - if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation()) - continue; - auto drive = (PxVehicleWheels*)scenePhysX->WheelVehicles[ii]->_vehicle; - auto& perVehicle = WheelVehiclesResultsPerVehicle[ii]; - ii++; - perVehicle.nbWheelQueryResults = drive->mWheelsSimData.getNbWheels(); - perVehicle.wheelQueryResults = WheelVehiclesResultsPerWheel.Get() + wheelsCount; - wheelsCount += perVehicle.nbWheelQueryResults; - } - - // Update vehicles - if (WheelVehiclesCache.Count() != 0) - { - PxVehicleSuspensionRaycasts(scenePhysX->WheelRaycastBatchQuery, WheelVehiclesCache.Count(), WheelVehiclesCache.Get()); - PxVehicleUpdates(scenePhysX->LastDeltaTime, scenePhysX->Scene->getGravity(), *WheelTireFrictions, WheelVehiclesCache.Count(), WheelVehiclesCache.Get(), WheelVehiclesResultsPerVehicle.Get()); - } - - // Synchronize state - for (int32 i = 0, ii = 0; i < scenePhysX->WheelVehicles.Count(); i++) - { - auto wheelVehicle = scenePhysX->WheelVehicles[i]; - if (!wheelVehicle->IsActiveInHierarchy() || !wheelVehicle->GetEnableSimulation()) - continue; - auto drive = WheelVehiclesCache[ii]; - auto& perVehicle = WheelVehiclesResultsPerVehicle[ii]; - ii++; -#if PHYSX_VEHICLE_DEBUG_TELEMETRY - LOG(Info, "Vehicle[{}] Gear={}, RPM={}", ii, wheelVehicle->GetCurrentGear(), (int32)wheelVehicle->GetEngineRotationSpeed()); -#endif - - // Update wheels - for (int32 j = 0; j < wheelVehicle->_wheelsData.Count(); j++) - { - auto& wheelData = wheelVehicle->_wheelsData[j]; - auto& perWheel = perVehicle.wheelQueryResults[j]; -#if PHYSX_VEHICLE_DEBUG_TELEMETRY - LOG(Info, "Vehicle[{}] Wheel[{}] longitudinalSlip={}, lateralSlip={}, suspSpringForce={}", ii, j, Utilities::RoundTo2DecimalPlaces(perWheel.longitudinalSlip), Utilities::RoundTo2DecimalPlaces(perWheel.lateralSlip), (int32)perWheel.suspSpringForce); -#endif - - auto& state = wheelData.State; - state.IsInAir = perWheel.isInAir; - state.TireContactCollider = perWheel.tireContactShape ? static_cast(perWheel.tireContactShape->userData) : nullptr; - state.TireContactPoint = P2C(perWheel.tireContactPoint) + scenePhysX->Origin; - state.TireContactNormal = P2C(perWheel.tireContactNormal); - state.TireFriction = perWheel.tireFriction; - state.SteerAngle = RadiansToDegrees * perWheel.steerAngle; - state.RotationAngle = -RadiansToDegrees * drive->mWheelsDynData.getWheelRotationAngle(j); - state.SuspensionOffset = perWheel.suspJounce; -#if USE_EDITOR - state.SuspensionTraceStart = P2C(perWheel.suspLineStart) + scenePhysX->Origin; - state.SuspensionTraceEnd = P2C(perWheel.suspLineStart + perWheel.suspLineDir * perWheel.suspLineLength) + scenePhysX->Origin; -#endif - - if (!wheelData.Collider) - continue; - auto shape = (PxShape*)wheelData.Collider->GetPhysicsShape(); - - // Update wheel collider transformation - auto localPose = shape->getLocalPose(); - Transform t = wheelData.Collider->GetLocalTransform(); - t.Orientation = Quaternion::Euler(-state.RotationAngle, state.SteerAngle, 0) * wheelData.LocalOrientation; - t.Translation = P2C(localPose.p) / wheelVehicle->GetScale() - t.Orientation * wheelData.Collider->GetCenter(); - wheelData.Collider->SetLocalTransform(t); - } - } - } + scenePhysX->UpdateVehicles(scenePhysX->LastDeltaTime); #endif { @@ -1860,55 +1920,8 @@ void PhysicsBackend::EndSimulateScene(void* scene) } #if WITH_CLOTH - nv::cloth::Solver* clothSolver = scenePhysX->ClothSolver; - if (clothSolver && scenePhysX->ClothsList.Count() != 0) - { - PROFILE_CPU_NAMED("Physics.Cloth"); - - { - PROFILE_CPU_NAMED("Pre"); - Function job; - job.Bind(scenePhysX); - JobSystem::Execute(job, scenePhysX->ClothsList.Count()); - } - - { - PROFILE_CPU_NAMED("Simulation"); - if (clothSolver->beginSimulation(scenePhysX->LastDeltaTime)) - { - Function job; - job.Bind(scenePhysX); - JobSystem::Execute(job, clothSolver->getSimulationChunkCount()); - clothSolver->endSimulation(); - } - } - - { - PROFILE_CPU_NAMED("Post"); - ScopeLock lock(ClothLocker); - Array brokenCloths; - for (auto clothPhysX : scenePhysX->ClothsList) - { - const auto& clothSettings = Cloths[clothPhysX]; - if (clothSettings.Culled) - continue; - if (clothSettings.UpdateBounds(clothPhysX)) - brokenCloths.Add(clothSettings.Actor); - clothSettings.Actor->OnPostUpdate(); - } - for (auto cloth : brokenCloths) - { - // Rebuild cloth object but keep fabric ref to prevent fabric recook - auto fabric = &((nv::cloth::Cloth*)cloth->_cloth)->getFabric(); - Fabrics[fabric].Refs++; - fabric->incRefCount(); - cloth->Rebuild(); - fabric->decRefCount(); - if (--Fabrics[fabric].Refs == 0) - Fabrics.Remove(fabric); - } - } - } + scenePhysX->UpdateCloths(scenePhysX->LastDeltaTime); +#endif { PROFILE_CPU_NAMED("Physics.SendEvents"); @@ -1916,7 +1929,9 @@ void PhysicsBackend::EndSimulateScene(void* scene) scenePhysX->EventsCallback.SendCollisionEvents(); scenePhysX->EventsCallback.SendJointEvents(); } -#endif + + // Clear delta after simulation ended + scenePhysX->LastDeltaTime = 0.0f; } Vector3 PhysicsBackend::GetSceneGravity(void* scene) From ab7de525314c8b5bfb5a955f3d80d79124591c82 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Jul 2024 13:51:41 +0200 Subject: [PATCH 37/53] Fix compilation regression --- Source/Engine/Level/Scene/Scene.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Level/Scene/Scene.cpp b/Source/Engine/Level/Scene/Scene.cpp index 5830cb95f..a85011e2e 100644 --- a/Source/Engine/Level/Scene/Scene.cpp +++ b/Source/Engine/Level/Scene/Scene.cpp @@ -179,7 +179,7 @@ void Scene::CreateCsgCollider() // Create collider auto result = New(); result->SetStaticFlags(StaticFlags::FullyStatic); - result->SetName(CSG_COLLIDER_NAME); + result->SetName(String(CSG_COLLIDER_NAME)); result->CollisionData = CSGData.CollisionData; result->HideFlags |= HideFlags::DontSelect; @@ -202,7 +202,7 @@ void Scene::CreateCsgModel() // Create model auto result = New(); result->SetStaticFlags(StaticFlags::FullyStatic); - result->SetName(CSG_MODEL_NAME); + result->SetName(String(CSG_MODEL_NAME)); result->Model = CSGData.Model; result->HideFlags |= HideFlags::DontSelect; From 2a1706decb5afc48f3d87ae6a15e20ade760c66b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Jul 2024 14:19:01 +0200 Subject: [PATCH 38/53] Fix vehicles simulation instability when physics substepping is enabled #2796 --- .../Physics/PhysX/PhysicsBackendPhysX.cpp | 22 +++++++++++++++---- .../Physics/PhysX/PhysicsBackendPhysX.h | 1 + .../Physics/PhysX/PhysicsStepperPhysX.cpp | 4 ++-- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp index 93ee09295..d6c5e82c8 100644 --- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp +++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.cpp @@ -1606,6 +1606,24 @@ PxMaterial* PhysicsBackendPhysX::GetDefaultMaterial() return DefaultMaterial; } +void PhysicsBackendPhysX::SimulationStepDone(PxScene* scene, float dt) +{ +#if WITH_VEHICLE + ScenePhysX* scenePhysX = nullptr; + for (auto e : Physics::Scenes) + { + if (((ScenePhysX*)e->GetPhysicsScene())->Scene == scene) + { + scenePhysX = (ScenePhysX*)e->GetPhysicsScene(); + break; + } + } + if (!scenePhysX) + return; + scenePhysX->UpdateVehicles(dt); +#endif +} + bool PhysicsBackend::Init() { #define CHECK_INIT(value, msg) if (!value) { LOG(Error, msg); return true; } @@ -1895,10 +1913,6 @@ void PhysicsBackend::EndSimulateScene(void* scene) scenePhysX->Stepper.wait(scenePhysX->Scene); } -#if WITH_VEHICLE - scenePhysX->UpdateVehicles(scenePhysX->LastDeltaTime); -#endif - { PROFILE_CPU_NAMED("Physics.FlushActiveTransforms"); diff --git a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.h b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.h index 7267058f9..f7f245abd 100644 --- a/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.h +++ b/Source/Engine/Physics/PhysX/PhysicsBackendPhysX.h @@ -18,6 +18,7 @@ public: static PxCooking* GetCooking(); #endif static PxMaterial* GetDefaultMaterial(); + static void SimulationStepDone(PxScene* scene, float dt); }; #endif diff --git a/Source/Engine/Physics/PhysX/PhysicsStepperPhysX.cpp b/Source/Engine/Physics/PhysX/PhysicsStepperPhysX.cpp index dc303c89f..155642ba6 100644 --- a/Source/Engine/Physics/PhysX/PhysicsStepperPhysX.cpp +++ b/Source/Engine/Physics/PhysX/PhysicsStepperPhysX.cpp @@ -3,7 +3,7 @@ #if COMPILE_WITH_PHYSX #include "PhysicsStepperPhysX.h" -#include "Engine/Physics/Physics.h" +#include "PhysicsBackendPhysX.h" #include "Engine/Profiler/ProfilerCPU.h" #include #include @@ -66,7 +66,7 @@ void MultiThreadStepper::substepDone(StepperTask* ownerTask) } // -> OnSubstep - //Physics::OnSubstep(); + PhysicsBackendPhysX::SimulationStepDone(mScene, mSubStepSize); if (mCurrentSubStep >= mNbSubSteps) { From 41fd7b724ec003064fbca0bfcca56eefb4aafc33 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Jul 2024 15:32:05 +0200 Subject: [PATCH 39/53] Fix test build regression --- Source/Engine/Tests/TestPrefabs.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Tests/TestPrefabs.cpp b/Source/Engine/Tests/TestPrefabs.cpp index 02af56a8e..8c6e29249 100644 --- a/Source/Engine/Tests/TestPrefabs.cpp +++ b/Source/Engine/Tests/TestPrefabs.cpp @@ -189,7 +189,7 @@ TEST_CASE("Prefabs") Guid newChildId; Guid::Parse(TEXT("123456a04cc60d56a2f024bfeef57723"), newChildId); auto newChild = EmptyActor::Spawn(ScriptingObject::SpawnParams(newChildId, EmptyActor::TypeInitializer)); - newChild->SetName(TEXT("Prefab B.Child")); + newChild->SetName(String(TEXT("Prefab B.Child"))); newChild->SetParent(instanceB); // Apply nested prefab changes @@ -213,7 +213,7 @@ TEST_CASE("Prefabs") // Add another child Guid::Parse(TEXT("678906a04cc60d56a2f024bfeef57723"), newChildId); newChild = EmptyActor::Spawn(ScriptingObject::SpawnParams(newChildId, EmptyActor::TypeInitializer)); - newChild->SetName(TEXT("Prefab B.Child 2")); + newChild->SetName(String(TEXT("Prefab B.Child 2"))); newChild->SetParent(instanceB); // Apply nested prefab changes From 1e43b031ba32ed8d7d0784039b503c614d1bda8e Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Jul 2024 16:50:01 +0200 Subject: [PATCH 40/53] Fix unpacking marshaled field value #2790 --- Source/Engine/Engine/NativeInterop.Unmanaged.cs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Source/Engine/Engine/NativeInterop.Unmanaged.cs b/Source/Engine/Engine/NativeInterop.Unmanaged.cs index 52218c2f4..62fd95117 100644 --- a/Source/Engine/Engine/NativeInterop.Unmanaged.cs +++ b/Source/Engine/Engine/NativeInterop.Unmanaged.cs @@ -850,11 +850,7 @@ namespace FlaxEngine.Interop { object fieldOwner = fieldOwnerHandle.Target; FieldHolder field = Unsafe.As(fieldHandle.Target); - object value = null; - if (field.field.FieldType.IsValueType) - value = Marshal.PtrToStructure(valuePtr, field.field.FieldType); - else if (valuePtr != IntPtr.Zero) - value = ManagedHandle.FromIntPtr(valuePtr).Target; + object value = MarshalToManaged(valuePtr, field.field.FieldType); field.field.SetValue(fieldOwner, value); } From d14a4f1f66c5bda617ae616fa342286d98bf1987 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Jul 2024 16:50:16 +0200 Subject: [PATCH 41/53] Minor codestyle fixes --- Source/Engine/AI/BehaviorKnowledge.h | 4 ++-- Source/Engine/AI/BehaviorTreeNodes.cpp | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Source/Engine/AI/BehaviorKnowledge.h b/Source/Engine/AI/BehaviorKnowledge.h index aef894c73..e89b3de4b 100644 --- a/Source/Engine/AI/BehaviorKnowledge.h +++ b/Source/Engine/AI/BehaviorKnowledge.h @@ -83,13 +83,13 @@ public: /// Checks if knowledge has a given goal (exact type match without base class check). /// /// The goal type. - /// True if has a given goal, otherwise false. + /// True if knowledge has a given goal, otherwise false. API_FUNCTION() bool HasGoal(ScriptingTypeHandle type) const; /// /// Checks if knowledge has a given goal (exact type match without base class check). /// - /// True if has a given goal, otherwise false. + /// True if knowledge has a given goal, otherwise false. template FORCE_INLINE bool HasGoal() { diff --git a/Source/Engine/AI/BehaviorTreeNodes.cpp b/Source/Engine/AI/BehaviorTreeNodes.cpp index 02080da7d..e5b9d8d60 100644 --- a/Source/Engine/AI/BehaviorTreeNodes.cpp +++ b/Source/Engine/AI/BehaviorTreeNodes.cpp @@ -140,7 +140,7 @@ void BehaviorTreeNode::Deserialize(DeserializeStream& stream, ISerializeModifier { SerializableScriptingObject::Deserialize(stream, modifier); - Name.Clear(); // Missing Name is assumes as unnamed node + Name.Clear(); // Missing Name is assumed as unnamed node DESERIALIZE(Name); } @@ -197,7 +197,6 @@ BehaviorUpdateResult BehaviorTreeSequenceNode::Update(const BehaviorUpdateContex return BehaviorUpdateResult::Failed; auto result = Children[state->CurrentChildIndex]->InvokeUpdate(context); - switch (result) { case BehaviorUpdateResult::Success: @@ -232,7 +231,6 @@ BehaviorUpdateResult BehaviorTreeSelectorNode::Update(const BehaviorUpdateContex return BehaviorUpdateResult::Failed; auto result = Children[state->CurrentChildIndex]->InvokeUpdate(context); - switch (result) { case BehaviorUpdateResult::Success: From 9fbc7a5f097c39cda48d4b82dc16bee0b16abc44 Mon Sep 17 00:00:00 2001 From: Chandler Cox Date: Thu, 25 Jul 2024 10:16:26 -0500 Subject: [PATCH 42/53] Add missing comments for spawnable json proxy ctors. --- Source/Editor/Content/Proxy/JsonAssetProxy.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Source/Editor/Content/Proxy/JsonAssetProxy.cs b/Source/Editor/Content/Proxy/JsonAssetProxy.cs index e9caa96a4..27d0e2347 100644 --- a/Source/Editor/Content/Proxy/JsonAssetProxy.cs +++ b/Source/Editor/Content/Proxy/JsonAssetProxy.cs @@ -168,11 +168,18 @@ namespace FlaxEditor.Content private SpriteHandle _thumbnail; + /// + /// Default Constructor. + /// public SpawnableJsonAssetProxy() { _thumbnail = SpriteHandle.Invalid; } + /// + /// Constructor with overriden thumbnail. + /// + /// The thumbnail to use. public SpawnableJsonAssetProxy(SpriteHandle thumbnail) { _thumbnail = thumbnail; From b4d1e6197c3a566383a94892a7956aa86eaf5a25 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Jul 2024 17:16:50 +0200 Subject: [PATCH 43/53] Fix crash when removing Visual Script asset that's instance is selected in Properties window --- Source/Editor/Content/Items/VisualScriptItem.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Editor/Content/Items/VisualScriptItem.cs b/Source/Editor/Content/Items/VisualScriptItem.cs index b99125f9b..54133e6e2 100644 --- a/Source/Editor/Content/Items/VisualScriptItem.cs +++ b/Source/Editor/Content/Items/VisualScriptItem.cs @@ -100,12 +100,16 @@ namespace FlaxEditor.Content /// public object GetValue(object obj) { + if (!_type.Asset) + throw new TargetException("Missing Visual Script asset."); return _type.Asset.GetScriptInstanceParameterValue(_parameter.Name, (Object)obj); } /// public void SetValue(object obj, object value) { + if (!_type.Asset) + throw new TargetException("Missing Visual Script asset."); _type.Asset.SetScriptInstanceParameterValue(_parameter.Name, (Object)obj, value); } } From 9f078a6e3ce7bd09da8280b42bbdecc4dd33d1ab Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Thu, 25 Jul 2024 17:17:28 +0200 Subject: [PATCH 44/53] Fix crash when using and saving scene with Visual Script object instance which asset was deleted --- Source/Engine/Content/Assets/VisualScript.cpp | 15 ++++++++++++--- Source/Engine/Content/Assets/VisualScript.h | 1 + Source/Engine/Scripting/BinaryModule.h | 8 ++++---- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/Source/Engine/Content/Assets/VisualScript.cpp b/Source/Engine/Content/Assets/VisualScript.cpp index 857e288fc..616fbe91a 100644 --- a/Source/Engine/Content/Assets/VisualScript.cpp +++ b/Source/Engine/Content/Assets/VisualScript.cpp @@ -1522,12 +1522,16 @@ void VisualScript::unload(bool isReloading) if (_scriptingTypeHandle) { VisualScriptingBinaryModule::Locker.Lock(); - auto& type = VisualScriptingModule.Types[_scriptingTypeHandle.TypeIndex]; + ScriptingType& type = VisualScriptingModule.Types[_scriptingTypeHandle.TypeIndex]; if (type.Script.DefaultInstance) { Delete(type.Script.DefaultInstance); type.Script.DefaultInstance = nullptr; } + char* typeName = (char*)Allocator::Allocate(sizeof(_typenameChars)); + Platform::MemoryCopy(typeName, _typenameChars, sizeof(_typenameChars)); + ((StringAnsiView&)type.Fullname) = StringAnsiView(typeName, 32); + VisualScriptingModule._unloadedScriptTypeNames.Add(typeName); VisualScriptingModule.TypeNameToTypeIndex.RemoveValue(_scriptingTypeHandle.TypeIndex); VisualScriptingModule.Scripts[_scriptingTypeHandle.TypeIndex] = nullptr; _scriptingTypeHandleCached = _scriptingTypeHandle; @@ -1653,6 +1657,8 @@ VisualScriptingBinaryModule::VisualScriptingBinaryModule() ScriptingObject* VisualScriptingBinaryModule::VisualScriptObjectSpawn(const ScriptingObjectSpawnParams& params) { // Create native object (base type can be C++ or C#) + if (params.Type.Module == nullptr) + return nullptr; ScriptingType& visualScriptType = (ScriptingType&)params.Type.GetType(); ScriptingTypeHandle baseTypeHandle = visualScriptType.GetBaseType(); const ScriptingType* baseTypePtr = &baseTypeHandle.GetType(); @@ -1663,9 +1669,7 @@ ScriptingObject* VisualScriptingBinaryModule::VisualScriptObjectSpawn(const Scri } ScriptingObject* object = baseTypePtr->Script.Spawn(params); if (!object) - { return nullptr; - } // Beware! Hacking vtables incoming! Undefined behaviors exploits! Low-level programming! visualScriptType.HackObjectVTable(object, baseTypeHandle, 1); @@ -2060,6 +2064,11 @@ void VisualScriptingBinaryModule::Destroy(bool isReloading) return; BinaryModule::Destroy(isReloading); + + // Free cached script typenames table + for (char* str : _unloadedScriptTypeNames) + Allocator::Free(str); + _unloadedScriptTypeNames.Clear(); } ScriptingTypeHandle VisualScript::GetScriptingType() diff --git a/Source/Engine/Content/Assets/VisualScript.h b/Source/Engine/Content/Assets/VisualScript.h index dc63609a6..4fda11fef 100644 --- a/Source/Engine/Content/Assets/VisualScript.h +++ b/Source/Engine/Content/Assets/VisualScript.h @@ -305,6 +305,7 @@ class FLAXENGINE_API VisualScriptingBinaryModule : public BinaryModule friend VisualScript; private: StringAnsi _name; + Array _unloadedScriptTypeNames; public: /// diff --git a/Source/Engine/Scripting/BinaryModule.h b/Source/Engine/Scripting/BinaryModule.h index fa7540cf2..6d9baf5ce 100644 --- a/Source/Engine/Scripting/BinaryModule.h +++ b/Source/Engine/Scripting/BinaryModule.h @@ -118,7 +118,7 @@ public: /// /// Tries to find a method in a given scripting type by the method name and parameters count. /// - /// If the the type contains more than one method of the given name and parameters count the returned value can be non-deterministic (one of the matching methods). + /// If the type contains more than one method of the given name and parameters count the returned value can be non-deterministic (one of the matching methods). /// The type to find method inside it. /// The method name. /// The method parameters count. @@ -182,7 +182,7 @@ public: /// Gets the value of a given scripting field. /// /// The field. - /// The object instance to get it's member field. Unused for static fields. + /// The object instance to get its member field. Unused for static fields. /// The output field value. /// True if failed, otherwise false. virtual bool GetFieldValue(void* field, const Variant& instance, Variant& result) @@ -194,7 +194,7 @@ public: /// Sets the value of a given scripting field. /// /// The field. - /// The object instance to set it's member field. Unused for static fields. + /// The object instance to set its member field. Unused for static fields. /// The field value to assign. /// True if failed, otherwise false. virtual bool SetFieldValue(void* field, const Variant& instance, Variant& value) @@ -242,7 +242,7 @@ public: /// /// Unloads the module (native library and C# assembly and any other scripting data). Unregisters the module. /// - /// If true module is during reloading and should force release the runtime data. Used for C# assembly to cleanup it's runtime data in Mono (or other scripting runtime). + /// If true module is during reloading and should force release the runtime data. Used for C# assembly to clean up it's runtime data in Mono (or other scripting runtime). virtual void Destroy(bool isReloading); }; From 7a89e78f4395cc459f7e53270d56bfaf45ffb522 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 26 Jul 2024 01:13:52 +0200 Subject: [PATCH 45/53] Fix incorrect `JsonAssetReference` serialization #2774 --- Source/Engine/Content/JsonAssetReference.cs | 1 + Source/Engine/Serialization/JsonConverters.cs | 54 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/Source/Engine/Content/JsonAssetReference.cs b/Source/Engine/Content/JsonAssetReference.cs index a3a17fe2b..59cf55cc1 100644 --- a/Source/Engine/Content/JsonAssetReference.cs +++ b/Source/Engine/Content/JsonAssetReference.cs @@ -12,6 +12,7 @@ namespace FlaxEngine #if FLAX_EDITOR [CustomEditor(typeof(FlaxEditor.CustomEditors.Editors.AssetRefEditor))] #endif + [Newtonsoft.Json.JsonConverter(typeof(Json.JsonAssetReferenceConverter))] public struct JsonAssetReference : IComparable, IComparable>, IEquatable> { /// diff --git a/Source/Engine/Serialization/JsonConverters.cs b/Source/Engine/Serialization/JsonConverters.cs index c7d937abc..5be6a6a53 100644 --- a/Source/Engine/Serialization/JsonConverters.cs +++ b/Source/Engine/Serialization/JsonConverters.cs @@ -429,6 +429,60 @@ namespace FlaxEngine.Json } } + internal sealed class JsonAssetReferenceConverter : JsonConverter + { + /// + public override unsafe void WriteJson(JsonWriter writer, object value, Newtonsoft.Json.JsonSerializer serializer) + { + var asset = (JsonAsset)value.GetType().GetField("Asset").GetValue(value); + var id = asset?.ID ?? Guid.Empty; + writer.WriteValue(JsonSerializer.GetStringID(&id)); + } + + /// + public override object ReadJson(JsonReader reader, Type objectType, object existingValue, Newtonsoft.Json.JsonSerializer serializer) + { + var result = Activator.CreateInstance(objectType); + if (reader.TokenType == JsonToken.String) + { + JsonSerializer.ParseID((string)reader.Value, out var id); + var asset = Content.LoadAsync(id); + objectType.GetField("Asset").SetValue(result, asset); + } + else if (reader.TokenType == JsonToken.StartObject) + { + // [Deprecated on 26.07.2024, expires on 26.07.2026] + while (reader.Read() && reader.TokenType != JsonToken.EndObject) + { + switch (reader.TokenType) + { + case JsonToken.PropertyName: + { + var propertyName = (string)reader.Value; + reader.Read(); + if (propertyName == "Asset" && reader.TokenType == JsonToken.String) + { + JsonSerializer.ParseID((string)reader.Value, out var id); + var asset = Content.LoadAsync(id); + objectType.GetField("Asset").SetValue(result, asset); + } + + break; + } + } + } + } + + return result; + } + + /// + public override bool CanConvert(Type objectType) + { + return objectType.Name.StartsWith("JsonAssetReference"); + } + } + /* /// /// Serialize Guid values using `N` format From 00cb2e25eb98194f0892f08de631f9703444d687 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 26 Jul 2024 23:14:04 +0200 Subject: [PATCH 46/53] Update OpenFBX to `Jun 22, 2024` --- Source/ThirdParty/OpenFBX/ofbx.cpp | 38 ++++++++++++++++++------------ Source/ThirdParty/OpenFBX/ofbx.h | 7 +++--- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/Source/ThirdParty/OpenFBX/ofbx.cpp b/Source/ThirdParty/OpenFBX/ofbx.cpp index ddcb70e68..67f34bfeb 100644 --- a/Source/ThirdParty/OpenFBX/ofbx.cpp +++ b/Source/ThirdParty/OpenFBX/ofbx.cpp @@ -12,7 +12,7 @@ #include #include -#if __cplusplus >= 202002L +#if __cplusplus >= 202002L && defined(__cpp_lib_bit_cast) #include // for std::bit_cast (C++20 and later) #endif #include @@ -20,6 +20,12 @@ namespace ofbx { +template static T read_value(const u8* value_ptr) { + T value; + memcpy(&value, value_ptr, sizeof(T)); + return value; +} + static int decodeIndex(int idx) { return (idx < 0) ? (-idx - 1) : idx; @@ -47,11 +53,12 @@ struct Allocator { Page* first = nullptr; ~Allocator() { - while (first) { - Page* page = first; - first = first->header.next; - delete page; - } + Page* p = first; + while (p) { + Page* n = p->header.next; + delete p; + p = n; + } } template T* allocate(Args&&... args) @@ -416,7 +423,7 @@ bool DataView::operator==(const char* rhs) const ++c; ++c2; } - return *c2 == '\0' || c2 == (const char*)end && *c == '\0'; + return (*c2 == '\0' || c2 == (const char*)end) && *c == '\0'; } @@ -585,7 +592,7 @@ static bool decompress(const u8* in, size_t in_size, u8* out, size_t out_size) template static OptionalError read(Cursor* cursor) { if (cursor->current + sizeof(T) > cursor->end) return Error("Reading past the end"); - T value = *(const T*)cursor->current; + T value = read_value(cursor->current); cursor->current += sizeof(T); return value; } @@ -774,7 +781,8 @@ static OptionalError readElement(Cursor* cursor, u32 version, Allocato static bool isEndLine(const Cursor& cursor) { - return *cursor.current == '\n' || *cursor.current == '\r' && cursor.current + 1 < cursor.end && *(cursor.current + 1) != '\n'; + return (*cursor.current == '\n') + || (*cursor.current == '\r' && cursor.current + 1 < cursor.end && *(cursor.current + 1) != '\n'); } @@ -1043,7 +1051,7 @@ static OptionalError tokenize(const u8* data, size_t size, u32& versio cursor.current = data; cursor.end = data + size; -#if __cplusplus >= 202002L +#if __cplusplus >= 202002L && defined(__cpp_lib_bit_cast) const Header* header = std::bit_cast(cursor.current); #else Header header_temp; @@ -1939,7 +1947,7 @@ struct Scene : IScene int getGeometryCount() const override { return (int)m_geometries.size(); } int getMeshCount() const override { return (int)m_meshes.size(); } float getSceneFrameRate() const override { return m_scene_frame_rate; } - const GlobalInfo* getGlobalInfo() const override { return &m_info; } + const GlobalInfo* getGlobalInfo() const override { return &m_info; } const GlobalSettings* getGlobalSettings() const override { return &m_settings; } const Object* const* getAllObjects() const override { return m_all_objects.empty() ? nullptr : &m_all_objects[0]; } @@ -2904,8 +2912,8 @@ static bool parseMemory(const Property& property, T* out, int max_size_bytes) { const u8* data = property.value.begin + sizeof(u32) * 3; if (data > property.value.end) return false; - u32 enc = *(const u32*)(property.value.begin + 4); - u32 len = *(const u32*)(property.value.begin + 8); + u32 enc = read_value(property.value.begin + 4); + u32 len = read_value(property.value.begin + 8); if (enc == 0) { if ((int)len > max_size_bytes) return false; @@ -3518,7 +3526,7 @@ static bool parseObjects(const Element& root, Scene& scene, u16 flags, Allocator { obj = allocator.allocate(scene, *iter.second.element); } - else if (iter.second.element->id == "Deformer" && !ignore_blend_shapes) + else if (iter.second.element->id == "Deformer") { IElementProperty* class_prop = iter.second.element->getProperty(2); if (!class_prop) class_prop = iter.second.element->getProperty(1); @@ -3571,7 +3579,7 @@ static bool parseObjects(const Element& root, Scene& scene, u16 flags, Allocator obj = mesh; } } - else if (class_prop->getValue() == "LimbNode" && !ignore_limbs) + else if ((class_prop->getValue() == "LimbNode" || class_prop->getValue() == "Root") && !ignore_limbs) obj = allocator.allocate(scene, *iter.second.element); else obj = allocator.allocate(scene, *iter.second.element); diff --git a/Source/ThirdParty/OpenFBX/ofbx.h b/Source/ThirdParty/OpenFBX/ofbx.h index 06a694f33..bff12365d 100644 --- a/Source/ThirdParty/OpenFBX/ofbx.h +++ b/Source/ThirdParty/OpenFBX/ofbx.h @@ -8,7 +8,7 @@ namespace ofbx typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; -#ifdef _WIN32 +#if defined(_WIN32) || defined(__ANDROID__) typedef long long i64; typedef unsigned long long u64; #else @@ -70,8 +70,7 @@ struct FVec4 { float x, y, z, w; }; struct FMatrix { float m[16]; }; struct FQuat{ float x, y, z, w; }; -#define OFBX_SINGLE_PRECISION -#ifdef OFBX_SINGLE_PRECISION +#ifndef OFBX_DOUBLE_PRECISION // use floats for vertices, normals, uvs, ... using Vec2 = FVec2; using Vec3 = FVec3; @@ -770,7 +769,7 @@ struct IScene virtual const TakeInfo* getTakeInfo(const char* name) const = 0; virtual float getSceneFrameRate() const = 0; virtual const GlobalSettings* getGlobalSettings() const = 0; - virtual const GlobalInfo* getGlobalInfo() const = 0; + virtual const GlobalInfo* getGlobalInfo() const = 0; virtual ~IScene() {} }; From e3bb38f13b2958f7905a157b4cfc962ff5b16500 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 26 Jul 2024 23:15:07 +0200 Subject: [PATCH 47/53] Move arrays caching to be local for import and setup ignore flags for OpenFBX #2672 --- .../Tools/ModelTool/ModelTool.OpenFBX.cpp | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp index eb13dcb48..6e94f73d3 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp @@ -104,6 +104,10 @@ struct OpenFbxImporterData Array Materials; Array ImportedMaterials; + Array TriangulatedIndicesCache; + Array BlendIndicesCache; + Array BlendWeightsCache; + OpenFbxImporterData(const String& path, const ModelTool::Options& options, ofbx::IScene* scene) : Scene(scene) , ScenePtr(scene) @@ -685,10 +689,8 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* const ofbx::Skin* skin = aMesh->getSkin(); const ofbx::BlendShape* blendShape = aMesh->getBlendShape(); - static Array triangulatedIndices; + auto& triangulatedIndices = data.TriangulatedIndicesCache; triangulatedIndices.Resize(vertexCount, false); - static Array blendIndices; - static Array blendWeights; // Properties const ofbx::Material* aMaterial = nullptr; @@ -833,6 +835,8 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* // Blend Indices and Blend Weights if (skin && skin->getClusterCount() > 0 && EnumHasAnyFlags(data.Options.ImportTypes, ImportDataTypes::Skeleton)) { + auto& blendIndices = data.BlendIndicesCache; + auto& blendWeights = data.BlendWeightsCache; blendIndices.Resize(positions.values_count, false); blendWeights.Resize(positions.values_count, false); blendIndices.SetAll(Int4::Zero); @@ -1261,20 +1265,26 @@ bool ModelTool::ImportDataOpenFBX(const String& path, ModelData& data, Options& errorMsg = TEXT("Cannot load file."); return true; } - ofbx::u16 loadFlags = 0; + ofbx::LoadFlags loadFlags = ofbx::LoadFlags::NONE; if (EnumHasAnyFlags(options.ImportTypes, ImportDataTypes::Geometry)) { if (!options.ImportBlendShapes) - loadFlags |= (ofbx::u64)ofbx::LoadFlags::IGNORE_BLEND_SHAPES; + loadFlags |= ofbx::LoadFlags::IGNORE_BLEND_SHAPES; } else { - loadFlags |= (ofbx::u64)ofbx::LoadFlags::IGNORE_GEOMETRY | (ofbx::u64)ofbx::LoadFlags::IGNORE_BLEND_SHAPES; + loadFlags |= ofbx::LoadFlags::IGNORE_GEOMETRY | ofbx::LoadFlags::IGNORE_BLEND_SHAPES; } + if (EnumHasNoneFlags(options.ImportTypes, ImportDataTypes::Materials)) + loadFlags |= ofbx::LoadFlags::IGNORE_MATERIALS; + if (EnumHasNoneFlags(options.ImportTypes, ImportDataTypes::Textures)) + loadFlags |= ofbx::LoadFlags::IGNORE_TEXTURES; + if (EnumHasNoneFlags(options.ImportTypes, ImportDataTypes::Animations)) + loadFlags |= ofbx::LoadFlags::IGNORE_ANIMATIONS; ofbx::IScene* scene; { PROFILE_CPU_NAMED("ofbx::load"); - scene = ofbx::load(fileData.Get(), fileData.Count(), loadFlags); + scene = ofbx::load(fileData.Get(), fileData.Count(), (ofbx::u16)loadFlags); } if (!scene) { From c4f37741b7bddbd8dad276cca682b407b029fd62 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 26 Jul 2024 23:54:27 +0200 Subject: [PATCH 48/53] Fix blend shapes importing #2672 --- .../Engine/Graphics/Models/ModelData.Tool.cpp | 17 +++---- .../Tools/ModelTool/ModelTool.OpenFBX.cpp | 51 ++++++++++--------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/Source/Engine/Graphics/Models/ModelData.Tool.cpp b/Source/Engine/Graphics/Models/ModelData.Tool.cpp index e8a412550..a62fa6bae 100644 --- a/Source/Engine/Graphics/Models/ModelData.Tool.cpp +++ b/Source/Engine/Graphics/Models/ModelData.Tool.cpp @@ -304,18 +304,15 @@ void MeshData::BuildIndexBuffer() dstBlendShape.Name = srcBlendShape.Name; dstBlendShape.Weight = srcBlendShape.Weight; - dstBlendShape.Vertices.Resize(newVertexCounter); - for (int32 i = 0, j = 0; i < srcBlendShape.Vertices.Count(); i++) + dstBlendShape.Vertices.EnsureCapacity(srcBlendShape.Vertices.Count()); + for (int32 i = 0; i < srcBlendShape.Vertices.Count(); i++) { - const auto idx = mapping[i]; - if (idx != INVALID_INDEX) + auto& v = srcBlendShape.Vertices[i]; + int32 newVertexIndex = v.VertexIndex < (uint32)vertexCount ? mapping[v.VertexIndex] : INVALID_INDEX; + if (newVertexIndex != INVALID_INDEX) { - auto& v = srcBlendShape.Vertices[i]; - ASSERT_LOW_LAYER(v.VertexIndex < (uint32)vertexCount); - ASSERT_LOW_LAYER(mapping[v.VertexIndex] != INVALID_INDEX); - v.VertexIndex = mapping[v.VertexIndex]; - ASSERT_LOW_LAYER(v.VertexIndex < (uint32)newVertexCounter); - dstBlendShape.Vertices[j++] = v; + v.VertexIndex = newVertexIndex; + dstBlendShape.Vertices.Add(v); } } } diff --git a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp index 6e94f73d3..7e56879a2 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp @@ -915,44 +915,46 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* { const ofbx::BlendShapeChannel* channel = blendShape->getBlendShapeChannel(channelIndex); - // Use last shape + // Use the last shape const int targetShapeCount = channel->getShapeCount(); if (targetShapeCount == 0) continue; const ofbx::Shape* shape = channel->getShape(targetShapeCount - 1); - - if (shape->getVertexCount() != vertexCount) + const ofbx::Vec3* shapeVertices = shape->getVertices(); + const ofbx::Vec3* shapeNormals = shape->getNormals(); + const int* shapeIndices = shape->getIndices(); + const int shapeVertexCount = shape->getVertexCount(); + const int shapeIndexCount = shape->getIndexCount(); + if (shapeVertexCount != shapeIndexCount) { - LOG(Error, "Blend shape '{0}' in mesh '{1}' has different amount of vertices ({2}) than mesh ({3})", String(shape->name), mesh.Name, shape->getVertexCount(), vertexCount); + LOG(Error, "Blend shape '{0}' in mesh '{1}' has different amount of vertices ({2}) and indices ({3})", String(shape->name), mesh.Name, shapeVertexCount, shapeIndexCount); continue; } BlendShape& blendShapeData = mesh.BlendShapes.AddOne(); blendShapeData.Name = shape->name; blendShapeData.Weight = channel->getShapeCount() > 1 ? (float)(channel->getDeformPercent() / 100.0) : 1.0f; + blendShapeData.Vertices.EnsureCapacity(shapeIndexCount); - blendShapeData.Vertices.Resize(vertexCount); - for (int32 i = 0; i < blendShapeData.Vertices.Count(); i++) - blendShapeData.Vertices.Get()[i].VertexIndex = i; - - auto shapeVertices = shape->getVertices(); - for (int32 i = 0; i < blendShapeData.Vertices.Count(); i++) + for (int32 i = 0; i < shapeIndexCount; i++) { - auto delta = ToFloat3(shapeVertices[i]) - mesh.Positions.Get()[i]; - blendShapeData.Vertices.Get()[i].PositionDelta = delta; - } - - auto shapeNormals = shape->getNormals(); - for (int32 i = 0; i < blendShapeData.Vertices.Count(); i++) - { - auto delta = ToFloat3(shapeNormals[i]); - if (data.ConvertRH) + int shapeIndex = shapeIndices[i]; + Float3 positionDelta = ToFloat3(shapeVertices[i]); + Float3 normalDelta = shapeNormals ? ToFloat3(shapeNormals[i]) : Float3::Zero; + for (int32 vertexIndex = 0; vertexIndex < vertexCount; vertexIndex++) { - // Mirror normals along the Z axis - delta.Z *= -1.0f; + int sourceIndex = triangulatedIndices[vertexIndex]; + sourceIndex = positions.indices[sourceIndex]; + if (sourceIndex == shapeIndex) + { + // Add blend shape vertex + BlendShapeVertex v; + v.VertexIndex = vertexIndex; + v.PositionDelta = positionDelta; + v.NormalDelta = normalDelta; + blendShapeData.Vertices.Add(v); + } } - delta = delta - mesh.Normals.Get()[i]; - blendShapeData.Vertices.Get()[i].NormalDelta = delta; } } } @@ -965,7 +967,10 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* for (auto& blendShapeData : mesh.BlendShapes) { for (auto& v : blendShapeData.Vertices) + { v.PositionDelta.Z *= -1.0f; + v.NormalDelta.Z *= -1.0f; + } } } From 1843606074f8fce5a466cd41b769185a7ad958c5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 27 Jul 2024 12:28:30 +0200 Subject: [PATCH 49/53] Minor tweaks --- Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp | 8 +++----- Source/Engine/Tools/ModelTool/ModelTool.cpp | 2 ++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp index 7e56879a2..088d11c33 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp @@ -939,8 +939,9 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* for (int32 i = 0; i < shapeIndexCount; i++) { int shapeIndex = shapeIndices[i]; - Float3 positionDelta = ToFloat3(shapeVertices[i]); - Float3 normalDelta = shapeNormals ? ToFloat3(shapeNormals[i]) : Float3::Zero; + BlendShapeVertex v; + v.PositionDelta = ToFloat3(shapeVertices[i]); + v.NormalDelta = shapeNormals ? ToFloat3(shapeNormals[i]) : Float3::Zero; for (int32 vertexIndex = 0; vertexIndex < vertexCount; vertexIndex++) { int sourceIndex = triangulatedIndices[vertexIndex]; @@ -948,10 +949,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* if (sourceIndex == shapeIndex) { // Add blend shape vertex - BlendShapeVertex v; v.VertexIndex = vertexIndex; - v.PositionDelta = positionDelta; - v.NormalDelta = normalDelta; blendShapeData.Vertices.Add(v); } } diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index bfbd9421b..bb2179ecb 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -1035,6 +1035,7 @@ bool ModelTool::ImportModel(const String& path, ModelData& data, Options& option if (min < 0 || max >= data.Skeleton.Bones.Count()) { LOG(Warning, "Imported mesh \'{0}\' has invalid blend indices. It may result in invalid rendering.", mesh->Name); + break; } } @@ -1045,6 +1046,7 @@ bool ModelTool::ImportModel(const String& path, ModelData& data, Options& option if (Math::Abs(sum - 1.0f) > ZeroTolerance) { LOG(Warning, "Imported mesh \'{0}\' has invalid blend weights. It may result in invalid rendering.", mesh->Name); + break; } } } From 6081a159e35bd576915af1aee731d04b93584b42 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 4 Aug 2024 20:18:05 +0200 Subject: [PATCH 50/53] Fix new skinned mesh importing to properly handle vertex indices #2672 --- Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp index 088d11c33..20e2aca8e 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp @@ -870,7 +870,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* { int vtxIndex = clusterIndices[j]; float vtxWeight = (float)clusterWeights[j]; - if (vtxWeight <= 0 || vtxIndex < 0 || vtxIndex >= vertexCount) + if (vtxWeight <= 0 || vtxIndex < 0 || vtxIndex >= positions.values_count) continue; Int4& indices = blendIndices.Get()[vtxIndex]; Float4& weights = blendWeights.Get()[vtxIndex]; @@ -898,10 +898,11 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* // Remap blend values to triangulated data mesh.BlendIndices.Resize(vertexCount, false); mesh.BlendWeights.Resize(vertexCount, false); - for (int i = 0; i < triangulatedIndices.Count(); i++) + for (int i = 0; i < vertexCount; i++) { - mesh.BlendIndices.Get()[i] = blendIndices[positions.indices[triangulatedIndices[i]]]; - mesh.BlendWeights.Get()[i] = blendWeights[positions.indices[triangulatedIndices[i]]]; + const int idx = positions.indices[triangulatedIndices[i]]; + mesh.BlendIndices.Get()[i] = blendIndices[idx]; + mesh.BlendWeights.Get()[i] = blendWeights[idx]; } mesh.NormalizeBlendWeights(); @@ -944,8 +945,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* v.NormalDelta = shapeNormals ? ToFloat3(shapeNormals[i]) : Float3::Zero; for (int32 vertexIndex = 0; vertexIndex < vertexCount; vertexIndex++) { - int sourceIndex = triangulatedIndices[vertexIndex]; - sourceIndex = positions.indices[sourceIndex]; + int sourceIndex = positions.indices[triangulatedIndices[vertexIndex]]; if (sourceIndex == shapeIndex) { // Add blend shape vertex From 5171c33b72ce18873c8877bf6cff82cf151e2afd Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 4 Aug 2024 20:20:26 +0200 Subject: [PATCH 51/53] Add shared memory cache for fbx importer mesh processing --- .../Tools/ModelTool/ModelTool.OpenFBX.cpp | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp index 20e2aca8e..b094763d7 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp @@ -14,6 +14,11 @@ #include "Engine/Platform/File.h" #define OPEN_FBX_CONVERT_SPACE 1 +#if BUILD_DEBUG +#define OPEN_FBX_GET_CACHE_LIST(arrayName, varName, size) data.arrayName.Resize(size, false); auto& varName = data.arrayName +#else +#define OPEN_FBX_GET_CACHE_LIST(arrayName, varName, size) data.arrayName.Resize(size, false); auto* varName = data.arrayName.Get() +#endif // Import OpenFBX library // Source: https://github.com/nem0/OpenFBX @@ -107,6 +112,9 @@ struct OpenFbxImporterData Array TriangulatedIndicesCache; Array BlendIndicesCache; Array BlendWeightsCache; + Array TriangulatePointsCache; + Array TriangulateIndicesCache; + Array TriangulateEarIndicesCache; OpenFbxImporterData(const String& path, const ModelTool::Options& options, ofbx::IScene* scene) : Scene(scene) @@ -528,7 +536,7 @@ bool ImportBones(OpenFbxImporterData& data, String& errorMsg) return false; } -int Triangulate(const ofbx::GeometryData& geom, const ofbx::GeometryPartition::Polygon& polygon, int* triangulatedIndices) +int Triangulate(OpenFbxImporterData& data, const ofbx::GeometryData& geom, const ofbx::GeometryPartition::Polygon& polygon, int* triangulatedIndices) { if (polygon.vertex_count < 3) return 0; @@ -592,9 +600,9 @@ int Triangulate(const ofbx::GeometryData& geom, const ofbx::GeometryPartition::P } // Setup arrays for temporary data (TODO: maybe double-linked list is more optimal?) - static Array points; - static Array indices; - static Array earIndices; + auto& points = data.TriangulatePointsCache; + auto& indices = data.TriangulateIndicesCache; + auto& earIndices = data.TriangulateEarIndicesCache; points.Clear(); indices.Clear(); earIndices.Clear(); @@ -688,9 +696,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* const ofbx::Vec4Attributes& colors = geometryData.getColors(); const ofbx::Skin* skin = aMesh->getSkin(); const ofbx::BlendShape* blendShape = aMesh->getBlendShape(); - - auto& triangulatedIndices = data.TriangulatedIndicesCache; - triangulatedIndices.Resize(vertexCount, false); + OPEN_FBX_GET_CACHE_LIST(TriangulatedIndicesCache, triangulatedIndices, vertexCount); // Properties const ofbx::Material* aMaterial = nullptr; @@ -704,7 +710,7 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* int numIndicesTotal = 0; for (int i = 0; i < partition.polygon_count; i++) { - int numIndices = Triangulate(geometryData, partition.polygons[i], &triangulatedIndices[numIndicesTotal]); + int numIndices = Triangulate(data, geometryData, partition.polygons[i], &triangulatedIndices[numIndicesTotal]); for (int j = numIndicesTotal; j < numIndicesTotal + numIndices; j++) mesh.Positions.Get()[j] = ToFloat3(positions.get(triangulatedIndices[j])); numIndicesTotal += numIndices; @@ -835,12 +841,10 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* // Blend Indices and Blend Weights if (skin && skin->getClusterCount() > 0 && EnumHasAnyFlags(data.Options.ImportTypes, ImportDataTypes::Skeleton)) { - auto& blendIndices = data.BlendIndicesCache; - auto& blendWeights = data.BlendWeightsCache; - blendIndices.Resize(positions.values_count, false); - blendWeights.Resize(positions.values_count, false); - blendIndices.SetAll(Int4::Zero); - blendWeights.SetAll(Float4::Zero); + OPEN_FBX_GET_CACHE_LIST(BlendIndicesCache, blendIndices, positions.values_count); + OPEN_FBX_GET_CACHE_LIST(BlendWeightsCache, blendWeights, positions.values_count); + data.BlendIndicesCache.SetAll(Int4::Zero); + data.BlendWeightsCache.SetAll(Float4::Zero); for (int clusterIndex = 0, clusterCount = skin->getClusterCount(); clusterIndex < clusterCount; clusterIndex++) { From 7fbf75c623d72636d91e0fcb493f0b2de5d2557d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 4 Aug 2024 20:20:51 +0200 Subject: [PATCH 52/53] Add skinned model import data checks to run in all build configs --- Source/Engine/Tools/ModelTool/ModelTool.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp index bb2179ecb..296a21827 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp @@ -1024,25 +1024,24 @@ bool ModelTool::ImportModel(const String& path, ModelData& data, Options& option mesh->BlendIndices.SetAll(indices); mesh->BlendWeights.SetAll(weights); } -#if BUILD_DEBUG else { auto& indices = mesh->BlendIndices; for (int32 j = 0; j < indices.Count(); j++) { - const int32 min = indices[j].MinValue(); - const int32 max = indices[j].MaxValue(); + const Int4 ij = indices.Get()[j]; + const int32 min = ij.MinValue(); + const int32 max = ij.MaxValue(); if (min < 0 || max >= data.Skeleton.Bones.Count()) { LOG(Warning, "Imported mesh \'{0}\' has invalid blend indices. It may result in invalid rendering.", mesh->Name); break; } } - auto& weights = mesh->BlendWeights; for (int32 j = 0; j < weights.Count(); j++) { - const float sum = weights[j].SumValues(); + const float sum = weights.Get()[j].SumValues(); if (Math::Abs(sum - 1.0f) > ZeroTolerance) { LOG(Warning, "Imported mesh \'{0}\' has invalid blend weights. It may result in invalid rendering.", mesh->Name); @@ -1050,7 +1049,6 @@ bool ModelTool::ImportModel(const String& path, ModelData& data, Options& option } } } -#endif } } if (EnumHasAnyFlags(options.ImportTypes, ImportDataTypes::Animations)) From a49751c31dc6001e5e6edc991cb6a2abbc573ea8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 5 Aug 2024 10:54:46 +0200 Subject: [PATCH 53/53] Fix regression --- Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp index b094763d7..dcb0b6c5f 100644 --- a/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp +++ b/Source/Engine/Tools/ModelTool/ModelTool.OpenFBX.cpp @@ -876,8 +876,8 @@ bool ProcessMesh(ModelData& result, OpenFbxImporterData& data, const ofbx::Mesh* float vtxWeight = (float)clusterWeights[j]; if (vtxWeight <= 0 || vtxIndex < 0 || vtxIndex >= positions.values_count) continue; - Int4& indices = blendIndices.Get()[vtxIndex]; - Float4& weights = blendWeights.Get()[vtxIndex]; + Int4& indices = blendIndices[vtxIndex]; + Float4& weights = blendWeights[vtxIndex]; for (int32 k = 0; k < 4; k++) {