mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
3rdparty: Update simpleini to 4.25
Signed-off-by: SternXD <stern@sidestore.io>
This commit is contained in:
parent
3d2ecafb01
commit
775f381685
2
3rdparty/simpleini/LICENCE.txt
vendored
2
3rdparty/simpleini/LICENCE.txt
vendored
@ -1,6 +1,6 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2006-2022 Brodie Thiesfield
|
||||
Copyright (c) 2006-2024 Brodie Thiesfield
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
|
||||
30
3rdparty/simpleini/include/ConvertUTF.h
vendored
30
3rdparty/simpleini/include/ConvertUTF.h
vendored
@ -1,4 +1,6 @@
|
||||
/*
|
||||
* https://web.archive.org/web/20090529064329/http://www.unicode.org:80/Public/PROGRAMS/CVTUTF/
|
||||
*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
* Disclaimer
|
||||
@ -20,11 +22,11 @@
|
||||
* remains attached.
|
||||
*/
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
/* ---------------------------------------------------------------------
|
||||
|
||||
Conversions between UTF32, UTF-16, and UTF-8. Header file.
|
||||
|
||||
Several functions are included here, forming a complete set of
|
||||
Several funtions are included here, forming a complete set of
|
||||
conversions between the three formats. UTF-7 is not included
|
||||
here, but is handled in a separate source file.
|
||||
|
||||
@ -77,17 +79,17 @@
|
||||
Rev History: Rick McGowan, fixes & updates May 2001.
|
||||
Fixes & updates, Sept 2001.
|
||||
|
||||
------------------------------------------------------------------------ */
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
/* ---------------------------------------------------------------------
|
||||
The following 4 definitions are compiler-specific.
|
||||
The C standard does not guarantee that wchar_t has at least
|
||||
16 bits, so wchar_t is no less portable than unsigned short!
|
||||
All should be unsigned values to avoid sign extension during
|
||||
bit mask & shift operations.
|
||||
------------------------------------------------------------------------ */
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
typedef unsigned int UTF32; /* at least 32 bits */
|
||||
typedef unsigned long UTF32; /* at least 32 bits */
|
||||
typedef unsigned short UTF16; /* at least 16 bits */
|
||||
typedef unsigned char UTF8; /* typically 8 bits */
|
||||
typedef unsigned char Boolean; /* 0 or 1 */
|
||||
@ -102,7 +104,7 @@ typedef unsigned char Boolean; /* 0 or 1 */
|
||||
typedef enum {
|
||||
conversionOK, /* conversion successful */
|
||||
sourceExhausted, /* partial character in source, but hit end */
|
||||
targetExhausted, /* insufficient room in target for conversion */
|
||||
targetExhausted, /* insuff. room in target for conversion */
|
||||
sourceIllegal /* source sequence is illegal/malformed */
|
||||
} ConversionResult;
|
||||
|
||||
@ -116,31 +118,31 @@ typedef enum {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
ConversionResult ConvertUTF8toUTF16 (
|
||||
ConversionResult ConvertUTF8toUTF16(
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF16toUTF8 (
|
||||
ConversionResult ConvertUTF16toUTF8(
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF8toUTF32 (
|
||||
ConversionResult ConvertUTF8toUTF32(
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF32toUTF8 (
|
||||
ConversionResult ConvertUTF32toUTF8(
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF16toUTF32 (
|
||||
ConversionResult ConvertUTF16toUTF32(
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
|
||||
|
||||
ConversionResult ConvertUTF32toUTF16 (
|
||||
ConversionResult ConvertUTF32toUTF16(
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
|
||||
|
||||
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
|
||||
Boolean isLegalUTF8Sequence(const UTF8* source, const UTF8* sourceEnd);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
82
3rdparty/simpleini/include/SimpleIni.h
vendored
82
3rdparty/simpleini/include/SimpleIni.h
vendored
@ -5,7 +5,7 @@
|
||||
<tr><th>File <td>SimpleIni.h
|
||||
<tr><th>Author <td>Brodie Thiesfield
|
||||
<tr><th>Source <td>https://github.com/brofield/simpleini
|
||||
<tr><th>Version <td>4.22
|
||||
<tr><th>Version <td>4.25
|
||||
</table>
|
||||
|
||||
Jump to the @link CSimpleIniTempl CSimpleIni @endlink interface documentation.
|
||||
@ -53,7 +53,7 @@
|
||||
-# If you will only be using straight utf8 files and access the data via the
|
||||
char interface, then you do not need any conversion library and could define
|
||||
SI_NO_CONVERSION. Note that no conversion also means no validation of the data.
|
||||
If no converter is specified then the default converter is SI_CONVERT_GENERIC
|
||||
If no converter is specified then the default converter is SI_NO_CONVERSION
|
||||
on Mac/Linux and SI_CONVERT_WIN32 on Windows. If you need widechar support on
|
||||
Mac/Linux then use either SI_CONVERT_GENERIC or SI_CONVERT_ICU. These are also
|
||||
supported on all platforms.
|
||||
@ -161,6 +161,9 @@
|
||||
|
||||
@section notes NOTES
|
||||
|
||||
- The maximum supported file size is 1 GiB (SI_MAX_FILE_SIZE). Files larger
|
||||
than this will be rejected with SI_FILE error to prevent excessive memory
|
||||
allocation and potential denial of service attacks.
|
||||
- To load UTF-8 data on Windows 95, you need to use Microsoft Layer for
|
||||
Unicode, or SI_CONVERT_GENERIC, or SI_CONVERT_ICU.
|
||||
- When using SI_CONVERT_GENERIC, ConvertUTF.c must be compiled and linked.
|
||||
@ -261,6 +264,10 @@ constexpr int SI_FAIL = -1; //!< Generic failure
|
||||
constexpr int SI_NOMEM = -2; //!< Out of memory error
|
||||
constexpr int SI_FILE = -3; //!< File error (see errno for detail error)
|
||||
|
||||
//! Maximum supported file size (1 GiB). Files larger than this will be rejected
|
||||
//! to prevent excessive memory allocation and potential denial of service.
|
||||
constexpr size_t SI_MAX_FILE_SIZE = 1024ULL * 1024ULL * 1024ULL;
|
||||
|
||||
#define SI_UTF8_SIGNATURE "\xEF\xBB\xBF"
|
||||
|
||||
#ifdef _WIN32
|
||||
@ -357,7 +364,7 @@ public:
|
||||
if (lhs.nOrder != rhs.nOrder) {
|
||||
return lhs.nOrder < rhs.nOrder;
|
||||
}
|
||||
return KeyOrder()(lhs.pItem, rhs.pItem);
|
||||
return KeyOrder()(lhs, rhs);
|
||||
}
|
||||
};
|
||||
};
|
||||
@ -1463,8 +1470,13 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::LoadFile(
|
||||
return SI_OK;
|
||||
}
|
||||
|
||||
// check file size is within supported limits (SI_MAX_FILE_SIZE)
|
||||
if (static_cast<size_t>(lSize) > SI_MAX_FILE_SIZE) {
|
||||
return SI_FILE;
|
||||
}
|
||||
|
||||
// allocate and ensure NULL terminated
|
||||
char * pData = new(std::nothrow) char[lSize+static_cast<size_t>(1)];
|
||||
char * pData = new(std::nothrow) char[static_cast<size_t>(lSize) + 1];
|
||||
if (!pData) {
|
||||
return SI_NOMEM;
|
||||
}
|
||||
@ -1516,13 +1528,18 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::LoadData(
|
||||
return SI_FAIL;
|
||||
}
|
||||
|
||||
// check converted data size is within supported limits (SI_MAX_FILE_SIZE)
|
||||
if (uLen >= (SI_MAX_FILE_SIZE / sizeof(SI_CHAR))) {
|
||||
return SI_FILE;
|
||||
}
|
||||
|
||||
// allocate memory for the data, ensure that there is a NULL
|
||||
// terminator wherever the converted data ends
|
||||
SI_CHAR * pData = new(std::nothrow) SI_CHAR[uLen+1];
|
||||
SI_CHAR * pData = new(std::nothrow) SI_CHAR[uLen + 1];
|
||||
if (!pData) {
|
||||
return SI_NOMEM;
|
||||
}
|
||||
memset(pData, 0, sizeof(SI_CHAR)*(uLen+1));
|
||||
memset(pData, 0, sizeof(SI_CHAR) * (uLen + 1));
|
||||
|
||||
// convert the data
|
||||
if (!converter.ConvertFromStore(a_pData, a_uDataLen, pData, uLen)) {
|
||||
@ -1800,6 +1817,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::IsMultiLineData(
|
||||
}
|
||||
|
||||
// embedded newlines
|
||||
const SI_CHAR * pStart = a_pData;
|
||||
while (*a_pData) {
|
||||
if (IsNewLineChar(*a_pData)) {
|
||||
return true;
|
||||
@ -1807,8 +1825,8 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::IsMultiLineData(
|
||||
++a_pData;
|
||||
}
|
||||
|
||||
// check for suffix
|
||||
if (IsSpace(*--a_pData)) {
|
||||
// check for suffix (ensure we don't go before start of string)
|
||||
if (a_pData > pStart && IsSpace(*(a_pData - 1))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1835,6 +1853,7 @@ CSimpleIniTempl<SI_CHAR, SI_STRLESS, SI_CONVERTER>::IsSingleLineQuotedValue(
|
||||
}
|
||||
|
||||
// embedded newlines
|
||||
const SI_CHAR * pStart = a_pData;
|
||||
while (*a_pData) {
|
||||
if (IsNewLineChar(*a_pData)) {
|
||||
return false;
|
||||
@ -1842,8 +1861,8 @@ CSimpleIniTempl<SI_CHAR, SI_STRLESS, SI_CONVERTER>::IsSingleLineQuotedValue(
|
||||
++a_pData;
|
||||
}
|
||||
|
||||
// check for suffix
|
||||
if (IsSpace(*--a_pData)) {
|
||||
// check for suffix (ensure we don't go before start of string)
|
||||
if (a_pData > pStart && IsSpace(*(a_pData - 1))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2088,7 +2107,8 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::AddEntry(
|
||||
if (pComment) {
|
||||
DeleteString(a_pComment);
|
||||
a_pComment = pComment;
|
||||
CopyString(a_pComment);
|
||||
rc = CopyString(a_pComment);
|
||||
if (rc < 0) return rc;
|
||||
}
|
||||
Delete(a_pSection, a_pKey);
|
||||
iKey = keyval.end();
|
||||
@ -2257,11 +2277,12 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::GetDoubleValue(
|
||||
return a_nDefault;
|
||||
}
|
||||
|
||||
char * pszSuffix = NULL;
|
||||
char * pszSuffix = szValue;
|
||||
double nValue = strtod(szValue, &pszSuffix);
|
||||
|
||||
// any invalid strings will return the default value
|
||||
if (!pszSuffix || *pszSuffix) {
|
||||
// check if no conversion was performed or if there are trailing characters
|
||||
if (pszSuffix == szValue || *pszSuffix) {
|
||||
return a_nDefault;
|
||||
}
|
||||
|
||||
@ -2421,7 +2442,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::GetSectionSize(
|
||||
int nCount = 0;
|
||||
const SI_CHAR * pLastKey = NULL;
|
||||
typename TKeyVal::const_iterator iKeyVal = section.begin();
|
||||
for (int n = 0; iKeyVal != section.end(); ++iKeyVal, ++n) {
|
||||
for (; iKeyVal != section.end(); ++iKeyVal) {
|
||||
if (!pLastKey || IsLess(pLastKey, iKeyVal->first.pItem)) {
|
||||
++nCount;
|
||||
pLastKey = iKeyVal->first.pItem;
|
||||
@ -2464,7 +2485,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::GetAllSections(
|
||||
{
|
||||
a_names.clear();
|
||||
typename TSection::const_iterator i = m_data.begin();
|
||||
for (int n = 0; i != m_data.end(); ++i, ++n ) {
|
||||
for (; i != m_data.end(); ++i) {
|
||||
a_names.push_back(i->first);
|
||||
}
|
||||
}
|
||||
@ -2490,7 +2511,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::GetAllKeys(
|
||||
const TKeyVal & section = iSection->second;
|
||||
const SI_CHAR * pLastKey = NULL;
|
||||
typename TKeyVal::const_iterator iKeyVal = section.begin();
|
||||
for (int n = 0; iKeyVal != section.end(); ++iKeyVal, ++n ) {
|
||||
for (; iKeyVal != section.end(); ++iKeyVal) {
|
||||
if (!pLastKey || IsLess(pLastKey, iKeyVal->first.pItem)) {
|
||||
a_names.push_back(iKeyVal->first);
|
||||
pLastKey = iKeyVal->first.pItem;
|
||||
@ -2828,7 +2849,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::DeleteString(
|
||||
// strings may exist either inside the data block, or they will be
|
||||
// individually allocated and stored in m_strings. We only physically
|
||||
// delete those stored in m_strings.
|
||||
if (a_pString < m_pData || a_pString >= m_pData + m_uDataLen) {
|
||||
if (!m_pData || a_pString < m_pData || a_pString >= m_pData + m_uDataLen) {
|
||||
typename TNamesDepend::iterator i = m_strings.begin();
|
||||
for (;i != m_strings.end(); ++i) {
|
||||
if (a_pString == i->pItem) {
|
||||
@ -2850,17 +2871,19 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::DeleteString(
|
||||
//
|
||||
// SI_NO_CONVERSION Do not make the "W" wide character version of the
|
||||
// library available. Only CSimpleIniA etc is defined.
|
||||
// Default on Linux/MacOS/etc.
|
||||
// SI_CONVERT_WIN32 Use the Win32 API functions for conversion.
|
||||
// Default on Windows.
|
||||
// SI_CONVERT_GENERIC Use the Unicode reference conversion library in
|
||||
// the accompanying files ConvertUTF.h/c
|
||||
// SI_CONVERT_ICU Use the IBM ICU conversion library. Requires
|
||||
// ICU headers on include path and icuuc.lib
|
||||
// SI_CONVERT_WIN32 Use the Win32 API functions for conversion.
|
||||
|
||||
#if !defined(SI_NO_CONVERSION) && !defined(SI_CONVERT_GENERIC) && !defined(SI_CONVERT_WIN32) && !defined(SI_CONVERT_ICU)
|
||||
# ifdef _WIN32
|
||||
# define SI_CONVERT_WIN32
|
||||
# else
|
||||
# define SI_CONVERT_GENERIC
|
||||
# define SI_NO_CONVERSION
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@ -3079,14 +3102,18 @@ public:
|
||||
return a_uInputDataLen;
|
||||
}
|
||||
|
||||
#if defined(SI_NO_MBSTOWCS_NULL) || (!defined(_MSC_VER) && !defined(_linux))
|
||||
// get the required buffer size
|
||||
#if defined(_MSC_VER)
|
||||
size_t uBufSiz;
|
||||
errno_t e = mbstowcs_s(&uBufSiz, NULL, 0, a_pInputData, a_uInputDataLen);
|
||||
return (e == 0) ? uBufSiz : (size_t) -1;
|
||||
#elif !defined(SI_NO_MBSTOWCS_NULL)
|
||||
return mbstowcs(NULL, a_pInputData, a_uInputDataLen);
|
||||
#else
|
||||
// fall back processing for platforms that don't support a NULL dest to mbstowcs
|
||||
// worst case scenario is 1:1, this will be a sufficient buffer size
|
||||
(void)a_pInputData;
|
||||
return a_uInputDataLen;
|
||||
#else
|
||||
// get the actual required buffer size
|
||||
return mbstowcs(NULL, a_pInputData, a_uInputDataLen);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -3135,9 +3162,18 @@ public:
|
||||
}
|
||||
|
||||
// convert to wchar_t
|
||||
#if defined(_MSC_VER)
|
||||
size_t uBufSiz;
|
||||
errno_t e = mbstowcs_s(&uBufSiz,
|
||||
a_pOutputData, a_uOutputDataSize,
|
||||
a_pInputData, a_uInputDataLen);
|
||||
(void)uBufSiz;
|
||||
return (e == 0);
|
||||
#else
|
||||
size_t retval = mbstowcs(a_pOutputData,
|
||||
a_pInputData, a_uOutputDataSize);
|
||||
return retval != (size_t)(-1);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** Calculate the number of char required by the storage format of this
|
||||
|
||||
151
3rdparty/simpleini/src/ConvertUTF.c
vendored
151
3rdparty/simpleini/src/ConvertUTF.c
vendored
@ -1,4 +1,6 @@
|
||||
/*
|
||||
* https://web.archive.org/web/20090529064329/http://www.unicode.org:80/Public/PROGRAMS/CVTUTF/
|
||||
*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
* Disclaimer
|
||||
@ -20,7 +22,7 @@
|
||||
* remains attached.
|
||||
*/
|
||||
|
||||
/* ---------------------------------------------------------------------
|
||||
/* ---------------------------------------------------------------------
|
||||
|
||||
Conversions between UTF32, UTF-16, and UTF-8. Source code file.
|
||||
Author: Mark E. Davis, 1994.
|
||||
@ -36,7 +38,7 @@
|
||||
|
||||
See the header file "ConvertUTF.h" for complete documentation.
|
||||
|
||||
------------------------------------------------------------------------ */
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
|
||||
#include "ConvertUTF.h"
|
||||
@ -58,7 +60,7 @@ static const UTF32 halfMask = 0x3FFUL;
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF32toUTF16 (
|
||||
ConversionResult ConvertUTF32toUTF16(
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
@ -77,19 +79,24 @@ ConversionResult ConvertUTF32toUTF16 (
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
*target++ = (UTF16)ch; /* normal case */
|
||||
}
|
||||
} else if (ch > UNI_MAX_LEGAL_UTF32) {
|
||||
}
|
||||
else if (ch > UNI_MAX_LEGAL_UTF32) {
|
||||
if (flags == strictConversion) {
|
||||
result = sourceIllegal;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||
if (target + 1 >= targetEnd) {
|
||||
--source; /* Back up source pointer! */
|
||||
@ -107,7 +114,7 @@ ConversionResult ConvertUTF32toUTF16 (
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF16toUTF32 (
|
||||
ConversionResult ConvertUTF16toUTF32(
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
@ -127,17 +134,20 @@ ConversionResult ConvertUTF16toUTF32 (
|
||||
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
|
||||
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
|
||||
++source;
|
||||
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||
}
|
||||
else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
} else { /* We don't have the 16 bits following the high surrogate. */
|
||||
}
|
||||
else { /* We don't have the 16 bits following the high surrogate. */
|
||||
--source; /* return to the high surrogate */
|
||||
result = sourceExhausted;
|
||||
break;
|
||||
}
|
||||
} else if (flags == strictConversion) {
|
||||
}
|
||||
else if (flags == strictConversion) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
@ -154,10 +164,10 @@ ConversionResult ConvertUTF16toUTF32 (
|
||||
*sourceStart = source;
|
||||
*targetStart = target;
|
||||
#ifdef CVTUTF_DEBUG
|
||||
if (result == sourceIllegal) {
|
||||
if (result == sourceIllegal) {
|
||||
fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
|
||||
fflush(stderr);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
@ -194,7 +204,7 @@ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080
|
||||
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
|
||||
* into the first byte, depending on how many bytes follow. There are
|
||||
* as many entries in this table as there are UTF-8 sequence types.
|
||||
* (I.e., one byte sequence, two byte... etc.). Remember that sequences
|
||||
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
|
||||
* for *legal* UTF-8 will be 4 or fewer bytes total.
|
||||
*/
|
||||
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
||||
@ -209,9 +219,9 @@ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
|
||||
* into an inline function.
|
||||
*/
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF16toUTF8 (
|
||||
ConversionResult ConvertUTF16toUTF8(
|
||||
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
@ -234,17 +244,20 @@ ConversionResult ConvertUTF16toUTF8 (
|
||||
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
|
||||
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
|
||||
++source;
|
||||
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||
}
|
||||
else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||
--source; /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
} else { /* We don't have the 16 bits following the high surrogate. */
|
||||
}
|
||||
else { /* We don't have the 16 bits following the high surrogate. */
|
||||
--source; /* return to the high surrogate */
|
||||
result = sourceExhausted;
|
||||
break;
|
||||
}
|
||||
} else if (flags == strictConversion) {
|
||||
}
|
||||
else if (flags == strictConversion) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
@ -253,11 +266,20 @@ ConversionResult ConvertUTF16toUTF8 (
|
||||
}
|
||||
}
|
||||
/* Figure out how many bytes the result will require */
|
||||
if (ch < (UTF32)0x80) { bytesToWrite = 1;
|
||||
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
|
||||
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
|
||||
} else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
|
||||
} else { bytesToWrite = 3;
|
||||
if (ch < (UTF32)0x80) {
|
||||
bytesToWrite = 1;
|
||||
}
|
||||
else if (ch < (UTF32)0x800) {
|
||||
bytesToWrite = 2;
|
||||
}
|
||||
else if (ch < (UTF32)0x10000) {
|
||||
bytesToWrite = 3;
|
||||
}
|
||||
else if (ch < (UTF32)0x110000) {
|
||||
bytesToWrite = 4;
|
||||
}
|
||||
else {
|
||||
bytesToWrite = 3;
|
||||
ch = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
|
||||
@ -292,9 +314,9 @@ ConversionResult ConvertUTF16toUTF8 (
|
||||
* definition of UTF-8 goes up to 4-byte sequences.
|
||||
*/
|
||||
|
||||
static Boolean isLegalUTF8(const UTF8 *source, int length) {
|
||||
static Boolean isLegalUTF8(const UTF8* source, int length) {
|
||||
UTF8 a;
|
||||
const UTF8 *srcptr = source+length;
|
||||
const UTF8* srcptr = source + length;
|
||||
switch (length) {
|
||||
default: return false;
|
||||
/* Everything else falls through when "true"... */
|
||||
@ -323,9 +345,9 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) {
|
||||
* Exported function to return whether a UTF-8 sequence is legal or not.
|
||||
* This is not used here; it's just exported.
|
||||
*/
|
||||
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
|
||||
int length = trailingBytesForUTF8[*source]+1;
|
||||
if (source+length > sourceEnd) {
|
||||
Boolean isLegalUTF8Sequence(const UTF8* source, const UTF8* sourceEnd) {
|
||||
int length = trailingBytesForUTF8[*source] + 1;
|
||||
if (source + length > sourceEnd) {
|
||||
return false;
|
||||
}
|
||||
return isLegalUTF8(source, length);
|
||||
@ -333,7 +355,7 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF8toUTF16 (
|
||||
ConversionResult ConvertUTF8toUTF16(
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
@ -346,7 +368,7 @@ ConversionResult ConvertUTF8toUTF16 (
|
||||
result = sourceExhausted; break;
|
||||
}
|
||||
/* Do this check whether lenient or strict */
|
||||
if (! isLegalUTF8(source, extraBytesToRead+1)) {
|
||||
if (!isLegalUTF8(source, extraBytesToRead + 1)) {
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
@ -364,34 +386,39 @@ ConversionResult ConvertUTF8toUTF16 (
|
||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||
|
||||
if (target >= targetEnd) {
|
||||
source -= (extraBytesToRead+1); /* Back up source pointer! */
|
||||
source -= (extraBytesToRead + 1); /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
if (flags == strictConversion) {
|
||||
source -= (extraBytesToRead+1); /* return to the illegal value itself */
|
||||
source -= (extraBytesToRead + 1); /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
*target++ = (UTF16)ch; /* normal case */
|
||||
}
|
||||
} else if (ch > UNI_MAX_UTF16) {
|
||||
}
|
||||
else if (ch > UNI_MAX_UTF16) {
|
||||
if (flags == strictConversion) {
|
||||
result = sourceIllegal;
|
||||
source -= (extraBytesToRead+1); /* return to the start */
|
||||
source -= (extraBytesToRead + 1); /* return to the start */
|
||||
break; /* Bail out; shouldn't continue */
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||
if (target + 1 >= targetEnd) {
|
||||
source -= (extraBytesToRead+1); /* Back up source pointer! */
|
||||
source -= (extraBytesToRead + 1); /* Back up source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
ch -= halfBase;
|
||||
@ -406,7 +433,7 @@ ConversionResult ConvertUTF8toUTF16 (
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF32toUTF8 (
|
||||
ConversionResult ConvertUTF32toUTF8(
|
||||
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
@ -418,7 +445,7 @@ ConversionResult ConvertUTF32toUTF8 (
|
||||
const UTF32 byteMask = 0xBF;
|
||||
const UTF32 byteMark = 0x80;
|
||||
ch = *source++;
|
||||
if (flags == strictConversion ) {
|
||||
if (flags == strictConversion) {
|
||||
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
--source; /* return to the illegal value itself */
|
||||
@ -430,11 +457,20 @@ ConversionResult ConvertUTF32toUTF8 (
|
||||
* Figure out how many bytes the result will require. Turn any
|
||||
* illegally large UTF32 things (> Plane 17) into replacement chars.
|
||||
*/
|
||||
if (ch < (UTF32)0x80) { bytesToWrite = 1;
|
||||
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
|
||||
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
|
||||
} else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
|
||||
} else { bytesToWrite = 3;
|
||||
if (ch < (UTF32)0x80) {
|
||||
bytesToWrite = 1;
|
||||
}
|
||||
else if (ch < (UTF32)0x800) {
|
||||
bytesToWrite = 2;
|
||||
}
|
||||
else if (ch < (UTF32)0x10000) {
|
||||
bytesToWrite = 3;
|
||||
}
|
||||
else if (ch <= UNI_MAX_LEGAL_UTF32) {
|
||||
bytesToWrite = 4;
|
||||
}
|
||||
else {
|
||||
bytesToWrite = 3;
|
||||
ch = UNI_REPLACEMENT_CHAR;
|
||||
result = sourceIllegal;
|
||||
}
|
||||
@ -448,7 +484,7 @@ ConversionResult ConvertUTF32toUTF8 (
|
||||
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||
case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
|
||||
case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
|
||||
}
|
||||
target += bytesToWrite;
|
||||
}
|
||||
@ -459,7 +495,7 @@ ConversionResult ConvertUTF32toUTF8 (
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
ConversionResult ConvertUTF8toUTF32 (
|
||||
ConversionResult ConvertUTF8toUTF32(
|
||||
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
|
||||
ConversionResult result = conversionOK;
|
||||
@ -472,7 +508,7 @@ ConversionResult ConvertUTF8toUTF32 (
|
||||
result = sourceExhausted; break;
|
||||
}
|
||||
/* Do this check whether lenient or strict */
|
||||
if (! isLegalUTF8(source, extraBytesToRead+1)) {
|
||||
if (!isLegalUTF8(source, extraBytesToRead + 1)) {
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
}
|
||||
@ -490,7 +526,7 @@ ConversionResult ConvertUTF8toUTF32 (
|
||||
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||
|
||||
if (target >= targetEnd) {
|
||||
source -= (extraBytesToRead+1); /* Back up the source pointer! */
|
||||
source -= (extraBytesToRead + 1); /* Back up the source pointer! */
|
||||
result = targetExhausted; break;
|
||||
}
|
||||
if (ch <= UNI_MAX_LEGAL_UTF32) {
|
||||
@ -500,16 +536,19 @@ ConversionResult ConvertUTF8toUTF32 (
|
||||
*/
|
||||
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||
if (flags == strictConversion) {
|
||||
source -= (extraBytesToRead+1); /* return to the illegal value itself */
|
||||
source -= (extraBytesToRead + 1); /* return to the illegal value itself */
|
||||
result = sourceIllegal;
|
||||
break;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
*target++ = ch;
|
||||
}
|
||||
} else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
|
||||
}
|
||||
else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
|
||||
result = sourceIllegal;
|
||||
*target++ = UNI_REPLACEMENT_CHAR;
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user