mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-12-16 04:09:39 +00:00
Merge pull request #14020 from jordan-woyak/string-util-cleanups
StringUtil: Cleanups and add some character encoding conversion unit tests.
This commit is contained in:
commit
958db7c78c
@ -12,9 +12,7 @@
|
|||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <istream>
|
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <limits.h>
|
|
||||||
#include <locale>
|
#include <locale>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -25,18 +23,18 @@
|
|||||||
#include <fmt/ranges.h>
|
#include <fmt/ranges.h>
|
||||||
|
|
||||||
#include "Common/CommonFuncs.h"
|
#include "Common/CommonFuncs.h"
|
||||||
#include "Common/CommonPaths.h"
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/Logging/Log.h"
|
#include "Common/Logging/Log.h"
|
||||||
#include "Common/Swap.h"
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#include <Windows.h>
|
#include <Windows.h>
|
||||||
#include <shellapi.h>
|
#include <shellapi.h>
|
||||||
constexpr u32 CODEPAGE_SHIFT_JIS = 932;
|
constexpr u32 CODEPAGE_SHIFT_JIS = 932;
|
||||||
constexpr u32 CODEPAGE_WINDOWS_1252 = 1252;
|
constexpr u32 CODEPAGE_WINDOWS_1252 = 1252;
|
||||||
|
|
||||||
|
#include "Common/Swap.h"
|
||||||
#else
|
#else
|
||||||
#include <errno.h>
|
#include <cerrno>
|
||||||
#include <iconv.h>
|
#include <iconv.h>
|
||||||
#include <locale.h>
|
#include <locale.h>
|
||||||
#endif
|
#endif
|
||||||
@ -85,8 +83,6 @@ std::string HexDump(const u8* data, size_t size)
|
|||||||
|
|
||||||
bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list args)
|
bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list args)
|
||||||
{
|
{
|
||||||
int writtenCount;
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
// You would think *printf are simple, right? Iterate on each character,
|
// You would think *printf are simple, right? Iterate on each character,
|
||||||
// if it's a format specifier handle it properly, etc.
|
// if it's a format specifier handle it properly, etc.
|
||||||
@ -114,28 +110,26 @@ bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list ar
|
|||||||
static _locale_t c_locale = nullptr;
|
static _locale_t c_locale = nullptr;
|
||||||
if (!c_locale)
|
if (!c_locale)
|
||||||
c_locale = _create_locale(LC_ALL, "C");
|
c_locale = _create_locale(LC_ALL, "C");
|
||||||
writtenCount = _vsnprintf_l(out, outsize, format, c_locale, args);
|
const int written_count = _vsnprintf_l(out, outsize, format, c_locale, args);
|
||||||
#else
|
#else
|
||||||
#if !defined(ANDROID) && !defined(__HAIKU__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
|
#if !defined(ANDROID) && !defined(__HAIKU__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
|
||||||
locale_t previousLocale = uselocale(GetCLocale());
|
locale_t previousLocale = uselocale(GetCLocale());
|
||||||
#endif
|
#endif
|
||||||
writtenCount = vsnprintf(out, outsize, format, args);
|
const int written_count = vsnprintf(out, outsize, format, args);
|
||||||
#if !defined(ANDROID) && !defined(__HAIKU__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
|
#if !defined(ANDROID) && !defined(__HAIKU__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
|
||||||
uselocale(previousLocale);
|
uselocale(previousLocale);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (writtenCount > 0 && writtenCount < outsize)
|
if (written_count > 0 && written_count < outsize)
|
||||||
{
|
{
|
||||||
out[writtenCount] = '\0';
|
out[written_count] = '\0';
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
out[outsize - 1] = '\0';
|
out[outsize - 1] = '\0';
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
std::string StringFromFormat(const char* format, ...)
|
std::string StringFromFormat(const char* format, ...)
|
||||||
{
|
{
|
||||||
@ -182,7 +176,7 @@ std::string ArrayToString(const u8* data, u32 size, int line_len, bool spaces)
|
|||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << std::setfill('0') << std::hex;
|
oss << std::setfill('0') << std::hex;
|
||||||
|
|
||||||
for (int line = 0; size; ++data, --size)
|
for (int line = 0; size != 0; ++data, --size)
|
||||||
{
|
{
|
||||||
oss << std::setw(2) << static_cast<int>(*data);
|
oss << std::setw(2) << static_cast<int>(*data);
|
||||||
|
|
||||||
@ -205,7 +199,7 @@ static std::string_view StripEnclosingChars(std::string_view str, T chars)
|
|||||||
|
|
||||||
if (str.npos != s)
|
if (str.npos != s)
|
||||||
return str.substr(s, str.find_last_not_of(chars) - s + 1);
|
return str.substr(s, str.find_last_not_of(chars) - s + 1);
|
||||||
else
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -227,7 +221,7 @@ std::string_view StripQuotes(std::string_view s)
|
|||||||
{
|
{
|
||||||
if (!s.empty() && '\"' == s[0] && '\"' == *s.rbegin())
|
if (!s.empty() && '\"' == s[0] && '\"' == *s.rbegin())
|
||||||
return s.substr(1, s.size() - 2);
|
return s.substr(1, s.size() - 2);
|
||||||
else
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -305,10 +299,12 @@ bool SplitPath(std::string_view full_path, std::string* path, std::string* filen
|
|||||||
if (full_path.empty())
|
if (full_path.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
size_t dir_end = full_path.find_last_of("/"
|
size_t dir_end = full_path.find_last_of(
|
||||||
// Windows needs the : included for something like just "C:" to be considered a directory
|
// Windows needs the : included for something like just "C:" to be considered a directory
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
":"
|
"/:"
|
||||||
|
#else
|
||||||
|
'/'
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
if (std::string::npos == dir_end)
|
if (std::string::npos == dir_end)
|
||||||
@ -351,7 +347,8 @@ std::string WithUnifiedPathSeparators(std::string path)
|
|||||||
|
|
||||||
std::string PathToFileName(std::string_view path)
|
std::string PathToFileName(std::string_view path)
|
||||||
{
|
{
|
||||||
std::string file_name, extension;
|
std::string file_name;
|
||||||
|
std::string extension;
|
||||||
SplitPath(path, nullptr, &file_name, &extension);
|
SplitPath(path, nullptr, &file_name, &extension);
|
||||||
return file_name + extension;
|
return file_name + extension;
|
||||||
}
|
}
|
||||||
@ -485,14 +482,15 @@ std::string UTF16BEToUTF8(const char16_t* str, size_t max_size)
|
|||||||
#else
|
#else
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::string CodeTo(const char* tocode, const char* fromcode, std::basic_string_view<T> input)
|
static std::string CodeTo(const char* tocode, const char* fromcode, std::basic_string_view<T> input)
|
||||||
{
|
{
|
||||||
std::string result;
|
std::string result;
|
||||||
|
|
||||||
iconv_t const conv_desc = iconv_open(tocode, fromcode);
|
auto* const conv_desc = iconv_open(tocode, fromcode);
|
||||||
if ((iconv_t)-1 == conv_desc)
|
if ((iconv_t)-1 == conv_desc)
|
||||||
{
|
{
|
||||||
ERROR_LOG_FMT(COMMON, "Iconv initialization failure [{}]: {}", fromcode, strerror(errno));
|
ERROR_LOG_FMT(COMMON, "Iconv initialization failure [{}]: {}", fromcode,
|
||||||
|
Common::LastStrerrorString());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -502,9 +500,9 @@ std::string CodeTo(const char* tocode, const char* fromcode, std::basic_string_v
|
|||||||
std::string out_buffer;
|
std::string out_buffer;
|
||||||
out_buffer.resize(out_buffer_size);
|
out_buffer.resize(out_buffer_size);
|
||||||
|
|
||||||
auto src_buffer = input.data();
|
auto* src_buffer = input.data();
|
||||||
size_t src_bytes = in_bytes;
|
size_t src_bytes = in_bytes;
|
||||||
auto dst_buffer = out_buffer.data();
|
auto* dst_buffer = out_buffer.data();
|
||||||
size_t dst_bytes = out_buffer.size();
|
size_t dst_bytes = out_buffer.size();
|
||||||
|
|
||||||
while (src_bytes != 0)
|
while (src_bytes != 0)
|
||||||
@ -525,7 +523,7 @@ std::string CodeTo(const char* tocode, const char* fromcode, std::basic_string_v
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ERROR_LOG_FMT(COMMON, "iconv failure [{}]: {}", fromcode, strerror(errno));
|
ERROR_LOG_FMT(COMMON, "iconv failure [{}]: {}", fromcode, Common::LastStrerrorString());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -541,7 +539,7 @@ std::string CodeTo(const char* tocode, const char* fromcode, std::basic_string_v
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::string CodeToUTF8(const char* fromcode, std::basic_string_view<T> input)
|
static std::string CodeToUTF8(const char* fromcode, std::basic_string_view<T> input)
|
||||||
{
|
{
|
||||||
return CodeTo("UTF-8", fromcode, input);
|
return CodeTo("UTF-8", fromcode, input);
|
||||||
}
|
}
|
||||||
@ -566,11 +564,9 @@ std::string UTF8ToSHIFTJIS(std::string_view input)
|
|||||||
|
|
||||||
std::string WStringToUTF8(std::wstring_view input)
|
std::string WStringToUTF8(std::wstring_view input)
|
||||||
{
|
{
|
||||||
using codecvt = std::conditional_t<sizeof(wchar_t) == 2, std::codecvt_utf8_utf16<wchar_t>,
|
// Note: Without LE iconv expects a BOM.
|
||||||
std::codecvt_utf8<wchar_t>>;
|
// The "WCHAR_T" code would be appropriate, but it's apparently not in every iconv implementation.
|
||||||
|
return CodeToUTF8((sizeof(wchar_t) == 2) ? "UTF-16LE" : "UTF-32LE", input);
|
||||||
std::wstring_convert<codecvt, wchar_t> converter;
|
|
||||||
return converter.to_bytes(input.data(), input.data() + input.size());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string UTF16BEToUTF8(const char16_t* str, size_t max_size)
|
std::string UTF16BEToUTF8(const char16_t* str, size_t max_size)
|
||||||
|
|||||||
@ -201,13 +201,13 @@ std::string PathToFileName(std::string_view path);
|
|||||||
void StringPopBackIf(std::string* s, char c);
|
void StringPopBackIf(std::string* s, char c);
|
||||||
size_t StringUTF8CodePointCount(std::string_view str);
|
size_t StringUTF8CodePointCount(std::string_view str);
|
||||||
|
|
||||||
std::string CP1252ToUTF8(std::string_view str);
|
std::string CP1252ToUTF8(std::string_view input);
|
||||||
std::string SHIFTJISToUTF8(std::string_view str);
|
std::string SHIFTJISToUTF8(std::string_view input);
|
||||||
std::string UTF8ToSHIFTJIS(std::string_view str);
|
std::string UTF8ToSHIFTJIS(std::string_view input);
|
||||||
std::string WStringToUTF8(std::wstring_view str);
|
std::string WStringToUTF8(std::wstring_view input);
|
||||||
std::string UTF16BEToUTF8(const char16_t* str, size_t max_size); // Stops at \0
|
std::string UTF16BEToUTF8(const char16_t* str, size_t max_size); // Stops at \0
|
||||||
std::string UTF16ToUTF8(std::u16string_view str);
|
std::string UTF16ToUTF8(std::u16string_view input);
|
||||||
std::u16string UTF8ToUTF16(std::string_view str);
|
std::u16string UTF8ToUTF16(std::string_view input);
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
||||||
@ -311,7 +311,7 @@ std::string GetEscapedHtml(std::string html);
|
|||||||
void ToLower(std::string* str);
|
void ToLower(std::string* str);
|
||||||
void ToUpper(std::string* str);
|
void ToUpper(std::string* str);
|
||||||
bool CaseInsensitiveEquals(std::string_view a, std::string_view b);
|
bool CaseInsensitiveEquals(std::string_view a, std::string_view b);
|
||||||
bool CaseInsensitiveContains(std::string_view a, std::string_view b);
|
bool CaseInsensitiveContains(std::string_view haystack, std::string_view needle);
|
||||||
|
|
||||||
// 'std::less'-like comparison function object type for case-insensitive strings.
|
// 'std::less'-like comparison function object type for case-insensitive strings.
|
||||||
struct CaseInsensitiveLess
|
struct CaseInsensitiveLess
|
||||||
|
|||||||
@ -2,10 +2,12 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "Common/StringUtil.h"
|
#include "Common/StringUtil.h"
|
||||||
|
#include "Common/Swap.h"
|
||||||
|
|
||||||
TEST(StringUtil, StringPopBackIf)
|
TEST(StringUtil, StringPopBackIf)
|
||||||
{
|
{
|
||||||
@ -256,3 +258,26 @@ TEST(StringUtil, CaseInsensitiveContains_OverlappingMatches)
|
|||||||
EXPECT_TRUE(Common::CaseInsensitiveContains("aaaaaa", "aa"));
|
EXPECT_TRUE(Common::CaseInsensitiveContains("aaaaaa", "aa"));
|
||||||
EXPECT_TRUE(Common::CaseInsensitiveContains("ababababa", "bABa"));
|
EXPECT_TRUE(Common::CaseInsensitiveContains("ababababa", "bABa"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(StringUtil, CharacterEncodingConversion)
|
||||||
|
{
|
||||||
|
// wstring
|
||||||
|
EXPECT_EQ(WStringToUTF8(L"hello 🐬"), "hello 🐬");
|
||||||
|
|
||||||
|
// UTF-16
|
||||||
|
EXPECT_EQ(UTF16ToUTF8(u"hello 🐬"), "hello 🐬");
|
||||||
|
EXPECT_EQ(UTF8ToUTF16("hello 🐬"), u"hello 🐬");
|
||||||
|
|
||||||
|
// UTF-16BE
|
||||||
|
char16_t utf16be_str[] = u"hello 🐬";
|
||||||
|
for (auto& c : utf16be_str)
|
||||||
|
c = Common::swap16(c);
|
||||||
|
EXPECT_EQ(UTF16BEToUTF8(utf16be_str, 99), "hello 🐬");
|
||||||
|
|
||||||
|
// Shift JIS
|
||||||
|
EXPECT_EQ(SHIFTJISToUTF8("\x83\x43\x83\x8b\x83\x4a"), "イルカ");
|
||||||
|
EXPECT_EQ(UTF8ToSHIFTJIS("イルカ"), "\x83\x43\x83\x8b\x83\x4a");
|
||||||
|
|
||||||
|
// CP1252
|
||||||
|
EXPECT_EQ(CP1252ToUTF8("hello \xa5"), "hello ¥");
|
||||||
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user