From 77b21de534eeb14cba16984f85ab2eeff986fc4f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 7 Mar 2021 21:30:01 +0100 Subject: [PATCH] Fix UTF-8 and UTF-16 encoding support usage in Json resources Closes #310 Closes #27 --- .../Editor/Managed/ManagedEditor.Internal.cpp | 8 +- .../Content/Storage/JsonStorageProxy.cpp | 12 +- Source/Engine/ContentImporters/CreateJson.cpp | 10 +- Source/Engine/ContentImporters/CreateJson.h | 4 +- Source/Engine/Core/Types/String.cpp | 100 +++++++++++++++++ Source/Engine/Core/Types/String.h | 105 +++--------------- Source/Engine/Level/Actor.cpp | 5 +- Source/Engine/Platform/Base/FileBase.cpp | 7 +- Source/Engine/Platform/Win32/Win32Network.cpp | 4 +- .../Engine/Scripting/ManagedSerialization.cpp | 6 +- Source/Engine/Serialization/JsonTools.h | 4 +- Source/Engine/Serialization/JsonWriter.h | 13 +-- Source/Engine/UI/UICanvas.cpp | 3 +- Source/Engine/UI/UIControl.cpp | 5 +- Source/Engine/Utilities/StringConverter.h | 63 +++++++---- Source/ThirdParty/rapidjson/document.h | 4 +- 16 files changed, 186 insertions(+), 167 deletions(-) diff --git a/Source/Editor/Managed/ManagedEditor.Internal.cpp b/Source/Editor/Managed/ManagedEditor.Internal.cpp index cd99f3f98..7b178b956 100644 --- a/Source/Editor/Managed/ManagedEditor.Internal.cpp +++ b/Source/Editor/Managed/ManagedEditor.Internal.cpp @@ -578,15 +578,11 @@ public: MUtils::ToString(outputPathObj, outputPath); FileSystem::NormalizePath(outputPath); - DataContainer data; - const auto dataObjLength = mono_string_length(dataObj); const auto dataObjPtr = mono_string_to_utf8(dataObj); - data.Link(dataObjPtr, dataObjLength); + StringAnsiView data(dataObjPtr); - DataContainer dataTypeName; - const auto dataTypeNameObjLength = mono_string_length(dataTypeNameObj); const auto dataTypeNameObjPtr = mono_string_to_utf8(dataTypeNameObj); - dataTypeName.Link(dataTypeNameObjPtr, dataTypeNameObjLength); + StringAnsiView dataTypeName(dataTypeNameObjPtr); const bool result = CreateJson::Create(outputPath, data, dataTypeName); diff --git a/Source/Engine/Content/Storage/JsonStorageProxy.cpp b/Source/Engine/Content/Storage/JsonStorageProxy.cpp index 7b2fef534..8b2f07944 100644 --- a/Source/Engine/Content/Storage/JsonStorageProxy.cpp +++ b/Source/Engine/Content/Storage/JsonStorageProxy.cpp @@ -3,10 +3,10 @@ #include "JsonStorageProxy.h" #include "Engine/Platform/File.h" #include "Engine/Core/Log.h" -#include "Engine/Level/Types.h" #include "Engine/Core/Utilities.h" #include "Engine/Serialization/JsonTools.h" #include "Engine/Serialization/JsonWriters.h" +#include "Engine/Level/Types.h" #include "Engine/Debug/Exceptions/JsonParseException.h" #include @@ -17,10 +17,6 @@ bool JsonStorageProxy::IsValidExtension(const StringView& extension) bool JsonStorageProxy::GetAssetInfo(const StringView& path, Guid& resultId, String& resultDataTypeName) { - // TODO: - -#if USE_EDITOR || true - // TODO: we could just open file and start reading until we find 'ID:..' without parsing whole file - could be much more faster // Load file @@ -51,12 +47,6 @@ bool JsonStorageProxy::GetAssetInfo(const StringView& path, Guid& resultId, Stri } return false; - -#else - - todo_loading_json_resources_in_builded_version - -#endif } #if USE_EDITOR diff --git a/Source/Engine/ContentImporters/CreateJson.cpp b/Source/Engine/ContentImporters/CreateJson.cpp index 49ccc6e90..6141cb03e 100644 --- a/Source/Engine/ContentImporters/CreateJson.cpp +++ b/Source/Engine/ContentImporters/CreateJson.cpp @@ -20,16 +20,12 @@ bool CreateJson::Create(const StringView& path, rapidjson_flax::StringBuffer& da bool CreateJson::Create(const StringView& path, rapidjson_flax::StringBuffer& data, const char* dataTypename) { - DataContainer data1; - DataContainer data2; - - data1.Link((char*)data.GetString(), (int32)data.GetSize()); - data2.Link((char*)dataTypename, StringUtils::Length(dataTypename)); - + StringAnsiView data1((char*)data.GetString(), (int32)data.GetSize()); + StringAnsiView data2((char*)dataTypename, StringUtils::Length(dataTypename)); return Create(path, data1, data2); } -bool CreateJson::Create(const StringView& path, DataContainer& data, DataContainer& dataTypename) +bool CreateJson::Create(const StringView& path, StringAnsiView& data, StringAnsiView& dataTypename) { Guid id = Guid::New(); diff --git a/Source/Engine/ContentImporters/CreateJson.h b/Source/Engine/ContentImporters/CreateJson.h index f280b262e..ccc15a792 100644 --- a/Source/Engine/ContentImporters/CreateJson.h +++ b/Source/Engine/ContentImporters/CreateJson.h @@ -9,7 +9,7 @@ #include "Engine/Serialization/Json.h" /// -/// Json resources factory. +/// Json resources factory. Ensure to keep data encoded in UTF-8. /// class CreateJson { @@ -17,7 +17,7 @@ public: static bool Create(const StringView& path, rapidjson_flax::StringBuffer& data, const String& dataTypename); static bool Create(const StringView& path, rapidjson_flax::StringBuffer& data, const char* dataTypename); - static bool Create(const StringView& path, DataContainer& data, DataContainer& dataTypename); + static bool Create(const StringView& path, StringAnsiView& data, StringAnsiView& dataTypename); }; #endif diff --git a/Source/Engine/Core/Types/String.cpp b/Source/Engine/Core/Types/String.cpp index 1cd9fde88..040bb9ab2 100644 --- a/Source/Engine/Core/Types/String.cpp +++ b/Source/Engine/Core/Types/String.cpp @@ -63,6 +63,12 @@ void String::Set(const char* chars, int32 length) StringUtils::ConvertANSI2UTF16(chars, _data, length); } +void String::SetUTF8(const char* chars, int32 length) +{ + Platform::Free(_data); + _data = StringUtils::ConvertUTF82UTF16(chars, length, _length); +} + void String::Append(const Char* chars, int32 count) { if (count == 0) @@ -338,6 +344,84 @@ StringAnsi::StringAnsi(const StringAnsiView& str) Set(str.Get(), str.Length()); } +void StringAnsi::Set(const char* chars, int32 length) +{ + if (length != _length) + { + ASSERT(length >= 0); + Platform::Free(_data); + if (length != 0) + { + _data = (char*)Platform::Allocate((length + 1) * sizeof(char), 16); + _data[length] = 0; + } + else + { + _data = nullptr; + } + _length = length; + } + + Platform::MemoryCopy(_data, chars, length * sizeof(char)); +} + +void StringAnsi::Set(const Char* chars, int32 length) +{ + if (length != _length) + { + Platform::Free(_data); + if (length != 0) + { + _data = (char*)Platform::Allocate((length + 1) * sizeof(char), 16); + _data[length] = 0; + } + else + { + _data = nullptr; + } + _length = length; + } + + if (_data) + StringUtils::ConvertUTF162ANSI(chars, _data, length); +} + +void StringAnsi::Append(const char* chars, int32 count) +{ + if (count == 0) + return; + + const auto oldData = _data; + const auto oldLength = _length; + + _length = oldLength + count; + _data = (char*)Platform::Allocate((_length + 1) * sizeof(char), 16); + + Platform::MemoryCopy(_data, oldData, oldLength * sizeof(char)); + Platform::MemoryCopy(_data + oldLength, chars, count * sizeof(char)); + _data[_length] = 0; + + Platform::Free(oldData); +} + +void StringAnsi::Append(const Char* chars, int32 count) +{ + if (count == 0) + return; + + const auto oldData = _data; + const auto oldLength = _length; + + _length = oldLength + count; + _data = (char*)Platform::Allocate((_length + 1) * sizeof(char), 16); + + Platform::MemoryCopy(_data, oldData, oldLength * sizeof(char)); + StringUtils::ConvertUTF162ANSI(chars, _data + oldLength, count * sizeof(char)); + _data[_length] = 0; + + Platform::Free(oldData); +} + StringAnsi& StringAnsi::operator+=(const StringAnsiView& str) { Append(str.Get(), str.Length()); @@ -368,6 +452,22 @@ bool StringAnsi::EndsWith(const StringAnsiView& suffix, StringSearchCase searchC return !StringUtils::Compare(&(*this)[Length() - suffix.Length()], *suffix); } +StringAnsi StringAnsi::ToLower() const +{ + StringAnsi result(*this); + for (int32 i = 0; i < result.Length(); i++) + result[i] = StringUtils::ToLower(result[i]); + return result; +} + +StringAnsi StringAnsi::ToUpper() const +{ + StringAnsi result(*this); + for (int32 i = 0; i < result.Length(); i++) + result[i] = StringUtils::ToUpper(result[i]); + return result; +} + void StringAnsi::Insert(int32 startIndex, const StringAnsi& other) { ASSERT(other._data != _data); diff --git a/Source/Engine/Core/Types/String.h b/Source/Engine/Core/Types/String.h index c42d82dde..87babc637 100644 --- a/Source/Engine/Core/Types/String.h +++ b/Source/Engine/Core/Types/String.h @@ -628,6 +628,13 @@ public: /// The number of characters to assign. void Set(const char* chars, int32 length); + /// + /// Sets an array of characters to the string. + /// + /// The pointer to the start of an array of characters to set (UTF-8). This array need not be null-terminated, and null characters are not treated specially. + /// The number of characters to assign. + void SetUTF8(const char* chars, int32 length); + /// /// Appends an array of characters to the string. /// @@ -1306,100 +1313,30 @@ public: /// /// Sets an array of characters to the string. /// - /// The pointer to the start of an array of characters to set. This array need not be null-terminated, and null characters are not treated specially. + /// The pointer to the start of an array of characters to set (ANSI). This array need not be null-terminated, and null characters are not treated specially. /// The number of characters to assign. - void Set(const char* chars, int32 length) - { - if (length != _length) - { - ASSERT(length >= 0); - Platform::Free(_data); - if (length != 0) - { - _data = (char*)Platform::Allocate((length + 1) * sizeof(char), 16); - _data[length] = 0; - } - else - { - _data = nullptr; - } - _length = length; - } - - Platform::MemoryCopy(_data, chars, length * sizeof(char)); - } + void Set(const char* chars, int32 length); /// /// Sets an array of characters to the string. /// - /// The pointer to the start of an array of characters to set. This array need not be null-terminated, and null characters are not treated specially. + /// The pointer to the start of an array of characters to set (UTF-16). This array need not be null-terminated, and null characters are not treated specially. /// The number of characters to assign. - void Set(const Char* chars, int32 length) - { - if (length != _length) - { - Platform::Free(_data); - if (length != 0) - { - _data = (char*)Platform::Allocate((length + 1) * sizeof(char), 16); - _data[length] = 0; - } - else - { - _data = nullptr; - } - _length = length; - } - - if (_data) - StringUtils::ConvertUTF162ANSI(chars, _data, length); - } + void Set(const Char* chars, int32 length); /// /// Appends an array of characters to the string. /// /// The array of characters to append. It does not need be null-terminated, and null characters are not treated specially. /// The number of characters to append. - void Append(const char* chars, int32 count) - { - if (count == 0) - return; - - const auto oldData = _data; - const auto oldLength = _length; - - _length = oldLength + count; - _data = (char*)Platform::Allocate((_length + 1) * sizeof(char), 16); - - Platform::MemoryCopy(_data, oldData, oldLength * sizeof(char)); - Platform::MemoryCopy(_data + oldLength, chars, count * sizeof(char)); - _data[_length] = 0; - - Platform::Free(oldData); - } + void Append(const char* chars, int32 count); /// /// Appends an array of characters to the string. /// /// The array of characters to append. It does not need be null-terminated, and null characters are not treated specially. /// The number of characters to append. - void Append(const Char* chars, int32 count) - { - if (count == 0) - return; - - const auto oldData = _data; - const auto oldLength = _length; - - _length = oldLength + count; - _data = (char*)Platform::Allocate((_length + 1) * sizeof(char), 16); - - Platform::MemoryCopy(_data, oldData, oldLength * sizeof(char)); - StringUtils::ConvertUTF162ANSI(chars, _data + oldLength, count * sizeof(char)); - _data[_length] = 0; - - Platform::Free(oldData); - } + void Append(const Char* chars, int32 count); /// /// Appends the specified text to this string. @@ -1663,25 +1600,13 @@ public: /// Converts all uppercase characters to lowercase. /// /// The lowercase string. - StringAnsi ToLower() const - { - StringAnsi result(*this); - for (int32 i = 0; i < result.Length(); i++) - result[i] = StringUtils::ToLower(result[i]); - return result; - } + StringAnsi ToLower() const; /// /// Converts all lowercase characters to uppercase. /// /// The uppercase string. - StringAnsi ToUpper() const - { - StringAnsi result(*this); - for (int32 i = 0; i < result.Length(); i++) - result[i] = StringUtils::ToUpper(result[i]); - return result; - } + StringAnsi ToUpper() const; /// /// Gets the left most given number of characters. diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp index 41e929da0..3753f6140 100644 --- a/Source/Engine/Level/Actor.cpp +++ b/Source/Engine/Level/Actor.cpp @@ -1710,7 +1710,10 @@ String Actor::ToJson() rapidjson_flax::StringBuffer buffer; CompactJsonWriter writer(buffer); writer.SceneObject(this); - return String(buffer.GetString()); + String result; + const char* c = buffer.GetString(); + result.SetUTF8(c, (int32)buffer.GetSize()); + return result; } void Actor::FromJson(const StringAnsiView& json) diff --git a/Source/Engine/Platform/Base/FileBase.cpp b/Source/Engine/Platform/Base/FileBase.cpp index 20260d6e4..d7308b0fd 100644 --- a/Source/Engine/Platform/Base/FileBase.cpp +++ b/Source/Engine/Platform/Base/FileBase.cpp @@ -113,10 +113,9 @@ bool FileBase::ReadAllText(const StringView& path, String& data) } // Convert to UTF-16 - auto utf16Data = (Char*)Allocator::Allocate(count * sizeof(Char)); - uint32 utf16Length; - StringUtils::ConvertUTF82UTF16(reinterpret_cast(bytes.Get()), utf16Data, count, &utf16Length); - data = utf16Data; + int32 utf16Length; + Char* utf16Data = StringUtils::ConvertUTF82UTF16(reinterpret_cast(bytes.Get()), count, utf16Length); + data.Set(utf16Data, utf16Length); Allocator::Free(utf16Data); } break; diff --git a/Source/Engine/Platform/Win32/Win32Network.cpp b/Source/Engine/Platform/Win32/Win32Network.cpp index 38b99a25e..c1d31395a 100644 --- a/Source/Engine/Platform/Win32/Win32Network.cpp +++ b/Source/Engine/Platform/Win32/Win32Network.cpp @@ -442,13 +442,13 @@ bool Win32Network::CreateEndPoint(String* address, String* port, NetworkIPVersio // consider using NUMERICHOST/NUMERICSERV if address is a valid Ipv4 or IPv6 so we can skip some look up ( potentially slow when resolving host names ) if ((status = GetAddrInfoW(address == nullptr ? nullptr : address->Get(), port->Get(), &hints, &info)) != 0) { - LOG(Error, "Unable to query info for address : {0} Error : {1}", address ? address->Get() : String("ANY"), gai_strerror(status)); + LOG(Error, "Unable to query info for address : {0} Error : {1}", address ? address->Get() : TEXT("ANY"), gai_strerror(status)); return true; } if (info == nullptr) { - LOG(Error, "Unable to resolve address! Address : {0}", address ? address->Get() : String("ANY")); + LOG(Error, "Unable to resolve address! Address : {0}", address ? address->Get() : TEXT("ANY")); return true; } diff --git a/Source/Engine/Scripting/ManagedSerialization.cpp b/Source/Engine/Scripting/ManagedSerialization.cpp index 4f971a842..d435fd0f2 100644 --- a/Source/Engine/Scripting/ManagedSerialization.cpp +++ b/Source/Engine/Scripting/ManagedSerialization.cpp @@ -41,9 +41,8 @@ void ManagedSerialization::Serialize(ISerializable::SerializeStream& stream, Mon } // Write result data - const auto length = mono_string_length(invokeResultStr); const auto invokeResultChars = mono_string_to_utf8(invokeResultStr); - stream.RawValue(invokeResultChars, length); + stream.RawValue(invokeResultChars); mono_free(invokeResultChars); } @@ -80,9 +79,8 @@ void ManagedSerialization::SerializeDiff(ISerializable::SerializeStream& stream, } // Write result data - auto length = mono_string_length(invokeResultStr); auto invokeResultChars = mono_string_to_utf8(invokeResultStr); - stream.RawValue(invokeResultChars, length); + stream.RawValue(invokeResultChars); mono_free(invokeResultChars); } diff --git a/Source/Engine/Serialization/JsonTools.h b/Source/Engine/Serialization/JsonTools.h index a20cc6584..5f1314595 100644 --- a/Source/Engine/Serialization/JsonTools.h +++ b/Source/Engine/Serialization/JsonTools.h @@ -358,9 +358,9 @@ public: FORCE_INLINE static void GetString(String& result, const Value& node, const char* name) { const auto member = node.FindMember(name); - if (member != node.MemberEnd() && member->value.IsString()) + if (member != node.MemberEnd()) { - result.Set(member->value.GetString(), member->value.GetStringLength()); + result = member->value.GetText(); } } diff --git a/Source/Engine/Serialization/JsonWriter.h b/Source/Engine/Serialization/JsonWriter.h index 275b2c778..7182ec69a 100644 --- a/Source/Engine/Serialization/JsonWriter.h +++ b/Source/Engine/Serialization/JsonWriter.h @@ -73,21 +73,20 @@ public: void String(const Char* str) { - const int32 length = StringUtils::Length(str); - const StringAsANSI<256> buf(str, length); - String(buf.Get(), length); + const StringAsUTF8<256> buf(str); + String(buf.Get()); } void String(const Char* str, const int32 length) { - const StringAsANSI<256> buf(str, length); - String(buf.Get(), length); + const StringAsUTF8<256> buf(str, length); + String(buf.Get()); } void String(const ::String& value) { - const StringAsANSI<256> buf(*value, value.Length()); - String(buf.Get(), value.Length()); + const StringAsUTF8<256> buf(*value, value.Length()); + String(buf.Get()); } FORCE_INLINE void RawValue(const StringAnsi& str) diff --git a/Source/Engine/UI/UICanvas.cpp b/Source/Engine/UI/UICanvas.cpp index 49d6ac420..ccd0561ad 100644 --- a/Source/Engine/UI/UICanvas.cpp +++ b/Source/Engine/UI/UICanvas.cpp @@ -81,9 +81,8 @@ void UICanvas::Serialize(SerializeStream& stream, const void* otherObj) else { // Write result data - auto length = mono_string_length(invokeResultStr); auto invokeResultChars = mono_string_to_utf8(invokeResultStr); - stream.RawValue(invokeResultChars, length); + stream.RawValue(invokeResultChars); mono_free(invokeResultChars); } } diff --git a/Source/Engine/UI/UIControl.cpp b/Source/Engine/UI/UIControl.cpp index 33fe8a93b..51312ae15 100644 --- a/Source/Engine/UI/UIControl.cpp +++ b/Source/Engine/UI/UIControl.cpp @@ -99,14 +99,13 @@ void UIControl::Serialize(SerializeStream& stream, const void* otherObj) { stream.JKEY("Control"); const auto controlTypeChars = mono_string_to_utf8(controlType); - stream.String(controlTypeChars, controlTypeLength); + stream.String(controlTypeChars); mono_free(controlTypeChars); } stream.JKEY("Data"); - const auto invokeResultLength = mono_string_length(invokeResultStr); const auto invokeResultChars = mono_string_to_utf8(invokeResultStr); - stream.RawValue(invokeResultChars, invokeResultLength); + stream.RawValue(invokeResultChars); mono_free(invokeResultChars); } diff --git a/Source/Engine/Utilities/StringConverter.h b/Source/Engine/Utilities/StringConverter.h index 6aff26199..5820a4990 100644 --- a/Source/Engine/Utilities/StringConverter.h +++ b/Source/Engine/Utilities/StringConverter.h @@ -50,19 +50,8 @@ public: } StringAsANSI(const Char* text) + : StringAsANSI(text, StringUtils::Length(text)) { - const int32 length = StringUtils::Length(text); - if (length + 1 < InlinedSize) - { - StringUtils::ConvertUTF162ANSI(text, this->_inlined, length); - this->_inlined[length] = 0; - } - else - { - this->_dynamic = (CharType*)Allocator::Allocate((length + 1) * sizeof(CharType)); - StringUtils::ConvertUTF162ANSI(text, this->_dynamic, length); - this->_dynamic[length] = 0; - } } StringAsANSI(const Char* text, const int32 length) @@ -81,6 +70,43 @@ public: } }; +template +class StringAsUTF8 : public StringAsBase +{ +public: + + typedef char CharType; + typedef StringAsBase Base; + +public: + + StringAsUTF8(const char* text) + { + this->_static = text; + } + + StringAsUTF8(const Char* text) + : StringAsUTF8(text, StringUtils::Length(text)) + { + } + + StringAsUTF8(const Char* text, const int32 length) + { + if (length + 1 < InlinedSize) + { + int32 lengthUtf8; + StringUtils::ConvertUTF162UTF8(text, this->_inlined, length, lengthUtf8); + this->_inlined[lengthUtf8] = 0; + } + else + { + int32 lengthUtf8; + this->_dynamic = StringUtils::ConvertUTF162UTF8(text, length, lengthUtf8); + this->_dynamic[lengthUtf8] = 0; + } + } +}; + template class StringAsUTF16 : public StringAsBase { @@ -92,19 +118,8 @@ public: public: StringAsUTF16(const char* text) + : StringAsUTF16(text, StringUtils::Length(text)) { - const int32 length = StringUtils::Length(text); - if (length + 1 < InlinedSize) - { - StringUtils::ConvertANSI2UTF16(text, this->_inlined, length); - this->_inlined[length] = 0; - } - else - { - this->_dynamic = (CharType*)Allocator::Allocate((length + 1) * sizeof(CharType)); - StringUtils::ConvertANSI2UTF16(text, this->_dynamic, length); - this->_dynamic[length] = 0; - } } StringAsUTF16(const char* text, const int32 length) diff --git a/Source/ThirdParty/rapidjson/document.h b/Source/ThirdParty/rapidjson/document.h index 2691a6386..b47115147 100644 --- a/Source/ThirdParty/rapidjson/document.h +++ b/Source/ThirdParty/rapidjson/document.h @@ -1685,9 +1685,9 @@ public: if (IsString()) { if (data_.f.flags & kInlineStrFlag) - result.Set(data_.ss.str, data_.ss.GetLength()); + result.SetUTF8(data_.ss.str, data_.ss.GetLength()); else - result.Set(GetStringPointer(), data_.s.length); + result.SetUTF8(GetStringPointer(), data_.s.length); } return result; }