Files
FlaxEngine/Source/ThirdParty/UniversalAnalytics/string/encode.cpp
2020-12-07 23:40:54 +01:00

179 lines
5.1 KiB
C++

/******************************************************************************
* Universal Analytics for C
* -- URL encoding module for UTF-8 compatibility with Google Analytics
* Copyright (c) 2013, Analytics Pros
*
* This project is free software, distributed under the BSD license.
* Analytics Pros offers consulting and integration services if your firm needs
* assistance in strategy, implementation, or auditing existing work.
******************************************************************************/
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <wctype.h>
#include "encode.h"
/* The following constants are symbolized for readability in later encoding phases */
#define ASCII_HIGH 0x7F
#define UTF8_INVALID 0xFFFF
#define UTF8_SWAPCHAR 0xFFFE
#define UTF8_HIGH_TWO_BYTES 0x7FF
#define UTF8_HIGH_THREE_BYTES 0xFFFD
#define UTF8_HIGH_FOUR_BYTES 0x1FFFFF
#define UTF8_HIGH_FIVE_BYTES 0x3FFFFFF
#define UTF8_HIGH_SIX_BYTES 0x7FFFFFFF
#define _minimum(a, b) ((a < b) ? a : b)
/* Mapping for hexadecimal conversion */
static const char _hexchar[] = "0123456789abcdef";
// Write a hexidecimal value (32 bit) to a character buffer
unsigned int hexadecimal(char* output, unsigned int value){
assert(NULL != output);
__uint a = (value >> 28) & 0xF;
__uint b = (value >> 24) & 0xF;
__uint c = (value >> 20) & 0xF;
__uint d = (value >> 16) & 0xF;
__uint e = (value >> 12) & 0xF;
__uint f = (value >> 8) & 0xF;
__uint g = (value >> 4) & 0xF;
__uint h = (value & 0xF);
__uint i = 0;
if(a) output[i++] = _hexchar[ (int) a ];
if(b || a) output[i++] = _hexchar[ (int) b ];
if(c || b || a) output[i++] = _hexchar[ (int) c ];
if(d || c || b || a) output[i++] = _hexchar[ (int) d ];
if(e || d || c || b || a) output[i++] = _hexchar[ (int) e ];
if(f || e || d || c || b || a) output[i++] = _hexchar[ (int) f ];
if(g || f || e || d || c || b || a) output[i++] = _hexchar[ (int) g ];
output[i++] = _hexchar[ (int) h ];
return i;
}
// Primarily intended to aid the translation of binary MD5 digests
unsigned int hexdigest(char* hex_output, unsigned char* binary, unsigned int binary_len){
unsigned int i;
unsigned int o = 0;
for(i = 0; i < binary_len; i++){
o += hexadecimal(hex_output + o, (unsigned int) binary[i]);
}
return o;
}
static int isASCIIcompat_char(char char_val){
return (char_val == 0x09 || char_val == 0x0A || char_val == 0x0D || (0x20 <= char_val && char_val <= 0x7F));
}
size_t urlencode_put(char* result, size_t result_max, const char *multibyte_input, size_t input_len){
assert(NULL != result);
assert(NULL != multibyte_input);
unsigned int i = 0;
unsigned int r = 0;
unsigned int offset = 0;
wchar_t current;
mbtowc(NULL, NULL, 0); // Reset multibyte state
do {
// Convert the current multi-byte character into a wide representation (i.e. unsigned long int)
offset += mbtowc(& current, (multibyte_input) + (offset), MB_CUR_MAX);
if(current == 0){
break; // Stop on NULL termination
}
// Spaces are encoded as "plus" (+)
else if(current == ' ' && r < result_max){
result[r++] = '+';
continue;
}
// These characters are allowed as literals
else if((iswalnum(current) || current == '-' || current == '.' || current == '~') && r < result_max){
result[r++] = (char)current;
}
// Standard ASCII characters are encoded simply
else if(isASCIIcompat_char((char)current) && r < result_max){
result[r++] = '%';
r += hexadecimal(result + r, (unsigned int) (current & 0xFF));
}
// The method |mbtowc| (above) takes care of splitting UTF8 bytes,
// so this can run directly...
else if(current >= ASCII_HIGH && current <= UTF8_HIGH_SIX_BYTES && (r +2) < result_max){
result[r++] = '%';
r+= hexadecimal(result + r, (unsigned int) current);
}
// This would seem to be an encoding error.
// Considering fall-back to "hexdigest" representation.
else if(result_max > r) {
result[r++] = '*';
break;
}
} while((i++) < input_len && (r < result_max));
return r;
}
size_t urlencode_put_limit(const char* mb_input, char* output, size_t output_limit){
assert(NULL != mb_input);
assert(NULL != output);
size_t input_len = mbstowcs(NULL, mb_input, 0) +1;
memset(output, 0, output_limit);
return urlencode_put(output, output_limit, mb_input, input_len);
}
/* Create a new character buffer, and write the given input as URL-encoded (UTF-8) */
char* urlencode(const char* mb_input){
assert(NULL != mb_input);
size_t input_len = mbstowcs(NULL, mb_input, 0) +1;
// Prepare the output buffer; in some cases each character input could result
// in 12 characters encoded output ['%' + XX (hex), for up to 4 bytes]
unsigned long int output_allocation = sizeof(char) * ((unsigned long int)input_len * 12);
char* output = (char*)malloc(output_allocation);
memset(output, 0, output_allocation);
urlencode_put(output, output_allocation, mb_input, input_len);
return output;
}
/* For compatibility with our former encoding model... */
unsigned int encodeURIComponent(char input[], char output[], const unsigned int input_len, const unsigned int output_max){
return (unsigned int)urlencode_put(output, output_max, input, input_len);
}