Cleanup and optimize StringUtils::ConvertUTF82UTF16
This commit is contained in:
@@ -64,16 +64,25 @@ const char* StringUtils::FindIgnoreCase(const char* str, const char* toFind)
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void StringUtils::ConvertUTF82UTF16(const char* from, Char* to, uint32 fromLength, uint32* toLength)
|
void PrintUTF8Error(const char* from, uint32 fromLength)
|
||||||
{
|
{
|
||||||
Array<unsigned long> unicode;
|
LOG(Error, "Not a UTF-8 string. Length: {0}", fromLength);
|
||||||
uint32 i = 0;
|
for (uint32 i = 0; i < fromLength; i++)
|
||||||
*toLength = 0;
|
{
|
||||||
|
LOG(Error, "str[{0}] = {0}", i, from[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConvertUTF82UTF16Helper(Array<uint32>& unicode, const char* from, int32 fromLength, int32& toLength)
|
||||||
|
{
|
||||||
|
// Reference: https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
|
||||||
|
unicode.EnsureCapacity(fromLength);
|
||||||
|
int32 i = 0, todo;
|
||||||
|
uint32 uni;
|
||||||
|
toLength = 0;
|
||||||
while (i < fromLength)
|
while (i < fromLength)
|
||||||
{
|
{
|
||||||
unsigned long uni;
|
byte ch = from[i++];
|
||||||
uint32 todo;
|
|
||||||
unsigned char ch = from[i++];
|
|
||||||
|
|
||||||
if (ch <= 0x7F)
|
if (ch <= 0x7F)
|
||||||
{
|
{
|
||||||
@@ -82,7 +91,7 @@ void StringUtils::ConvertUTF82UTF16(const char* from, Char* to, uint32 fromLengt
|
|||||||
}
|
}
|
||||||
else if (ch <= 0xBF)
|
else if (ch <= 0xBF)
|
||||||
{
|
{
|
||||||
LOG(Error, "Not a UTF-8 string.");
|
PrintUTF8Error(from, fromLength);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else if (ch <= 0xDF)
|
else if (ch <= 0xDF)
|
||||||
@@ -102,21 +111,21 @@ void StringUtils::ConvertUTF82UTF16(const char* from, Char* to, uint32 fromLengt
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
LOG(Error, "Not a UTF-8 string.");
|
PrintUTF8Error(from, fromLength);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32 j = 0; j < todo; j++)
|
for (int32 j = 0; j < todo; j++)
|
||||||
{
|
{
|
||||||
if (i == fromLength)
|
if (i == fromLength)
|
||||||
{
|
{
|
||||||
LOG(Error, "Not a UTF-8 string.");
|
PrintUTF8Error(from, fromLength);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ch = from[i++];
|
ch = from[i++];
|
||||||
if (ch < 0x80 || ch > 0xBF)
|
if (ch < 0x80 || ch > 0xBF)
|
||||||
{
|
{
|
||||||
LOG(Error, "Not a UTF-8 string.");
|
PrintUTF8Error(from, fromLength);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -126,28 +135,27 @@ void StringUtils::ConvertUTF82UTF16(const char* from, Char* to, uint32 fromLengt
|
|||||||
|
|
||||||
if ((uni >= 0xD800 && uni <= 0xDFFF) || uni > 0x10FFFF)
|
if ((uni >= 0xD800 && uni <= 0xDFFF) || uni > 0x10FFFF)
|
||||||
{
|
{
|
||||||
LOG(Error, "Not a UTF-8 string.");
|
PrintUTF8Error(from, fromLength);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
unicode.Add(uni);
|
unicode.Add(uni);
|
||||||
}
|
|
||||||
|
|
||||||
// Count chars
|
toLength++;
|
||||||
uint32 length = (uint32)unicode.Count();
|
if (uni > 0xFFFF)
|
||||||
for (i = 0; i < length; i++)
|
|
||||||
{
|
|
||||||
if (unicode[i] > 0xFFFF)
|
|
||||||
{
|
{
|
||||||
length++;
|
toLength++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Copy chars
|
void StringUtils::ConvertUTF82UTF16(const char* from, Char* to, int32 fromLength, int32& toLength)
|
||||||
*toLength = length;
|
{
|
||||||
for (i = 0; i < length; i++)
|
Array<uint32> unicode;
|
||||||
|
ConvertUTF82UTF16Helper(unicode, from, fromLength, toLength);
|
||||||
|
for (int32 i = 0; i < toLength; i++)
|
||||||
{
|
{
|
||||||
unsigned long uni = unicode[i];
|
uint32 uni = unicode[i];
|
||||||
if (uni <= 0xFFFF)
|
if (uni <= 0xFFFF)
|
||||||
{
|
{
|
||||||
to[i] = (Char)uni;
|
to[i] = (Char)uni;
|
||||||
@@ -161,6 +169,31 @@ void StringUtils::ConvertUTF82UTF16(const char* from, Char* to, uint32 fromLengt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Char* StringUtils::ConvertUTF82UTF16(const char* from, int32 fromLength, int32& toLength)
|
||||||
|
{
|
||||||
|
Array<uint32> unicode;
|
||||||
|
ConvertUTF82UTF16Helper(unicode, from, fromLength, toLength);
|
||||||
|
if (toLength == 0)
|
||||||
|
return nullptr;
|
||||||
|
Char* to = (Char*)Allocator::Allocate((toLength + 1) * sizeof(Char));
|
||||||
|
for (int32 i = 0; i < toLength; i++)
|
||||||
|
{
|
||||||
|
uint32 uni = unicode[i];
|
||||||
|
if (uni <= 0xFFFF)
|
||||||
|
{
|
||||||
|
to[i] = (Char)uni;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
uni -= 0x10000;
|
||||||
|
to[i++] += (Char)((uni >> 10) + 0xD800);
|
||||||
|
to[i] += (Char)((uni & 0x3FF) + 0xDC00);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
to[toLength] = 0;
|
||||||
|
return to;
|
||||||
|
}
|
||||||
|
|
||||||
void RemoveLongPathPrefix(const String& path, String& result)
|
void RemoveLongPathPrefix(const String& path, String& result)
|
||||||
{
|
{
|
||||||
if (!path.StartsWith(TEXT("\\\\?\\"), StringSearchCase::CaseSensitive))
|
if (!path.StartsWith(TEXT("\\\\?\\"), StringSearchCase::CaseSensitive))
|
||||||
|
|||||||
@@ -184,14 +184,17 @@ public:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// Convert characters from ANSI to UTF-16
|
// Converts characters from ANSI to UTF-16
|
||||||
static void ConvertANSI2UTF16(const char* from, Char* to, int32 len);
|
static void ConvertANSI2UTF16(const char* from, Char* to, int32 len);
|
||||||
|
|
||||||
// Convert characters from UTF-16 to ANSI
|
// Converts characters from UTF-16 to ANSI
|
||||||
static void ConvertUTF162ANSI(const Char* from, char* to, int32 len);
|
static void ConvertUTF162ANSI(const Char* from, char* to, int32 len);
|
||||||
|
|
||||||
// Convert characters from UTF-8 to UTF-16
|
// Convert characters from UTF-8 to UTF-16
|
||||||
static void ConvertUTF82UTF16(const char* from, Char* to, uint32 fromLength, uint32* toLength);
|
static void ConvertUTF82UTF16(const char* from, Char* to, int32 fromLength, int32& toLength);
|
||||||
|
|
||||||
|
// Convert characters from UTF-8 to UTF-16 (allocates the output buffer with Allocator::Allocate of size toLength + 1, call Allocator::Free after usage). Returns null on empty or invalid string.
|
||||||
|
static Char* ConvertUTF82UTF16(const char* from, int32 fromLength, int32& toLength);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user