Merge pull request #14020 from jordan-woyak/string-util-cleanups

StringUtil: Cleanups and add some character encoding conversion unit tests.
2025-12-16 04:09:39 +00:00 · 2025-11-08 17:36:54 +01:00 · 2025-11-08 17:36:54 +01:00 · 958db7c78c
commit 958db7c78c
parent 126bbcd72f 5650b2ef37
3 changed files with 64 additions and 43 deletions
--- a/Source/Core/Common/StringUtil.cpp
+++ b/Source/Core/Common/StringUtil.cpp
@ -12,9 +12,7 @@
 #include <cstdlib>
 #include <cstring>
 #include <iomanip>
 #include <istream>
 #include <iterator>
 #include <limits.h>
 #include <locale>
 #include <sstream>
 #include <string>
@ -25,18 +23,18 @@
 #include <fmt/ranges.h>
 #include "Common/CommonFuncs.h"
 #include "Common/CommonPaths.h"
 #include "Common/CommonTypes.h"
 #include "Common/Logging/Log.h"
 #include "Common/Swap.h"
 #ifdef _WIN32
 #include <Windows.h>
 #include <shellapi.h>
 constexpr u32 CODEPAGE_SHIFT_JIS = 932;
 constexpr u32 CODEPAGE_WINDOWS_1252 = 1252;
 #include "Common/Swap.h"
 #else
-#include <errno.h>
+#include <cerrno>
 #include <iconv.h>
 #include <locale.h>
 #endif
@ -85,8 +83,6 @@ std::string HexDump(const u8* data, size_t size)
 bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list args)
 {
  int writtenCount;
 #ifdef _WIN32
  // You would think *printf are simple, right? Iterate on each character,
  // if it's a format specifier handle it properly, etc.
@ -114,28 +110,26 @@ bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list ar
  static _locale_t c_locale = nullptr;
  if (!c_locale)
    c_locale = _create_locale(LC_ALL, "C");
-  writtenCount = _vsnprintf_l(out, outsize, format, c_locale, args);
+  const int written_count = _vsnprintf_l(out, outsize, format, c_locale, args);
 #else
 #if !defined(ANDROID) && !defined(__HAIKU__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
  locale_t previousLocale = uselocale(GetCLocale());
 #endif
-  writtenCount = vsnprintf(out, outsize, format, args);
+  const int written_count = vsnprintf(out, outsize, format, args);
 #if !defined(ANDROID) && !defined(__HAIKU__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
  uselocale(previousLocale);
 #endif
 #endif
-  if (writtenCount > 0 && writtenCount < outsize)
+  if (written_count > 0 && written_count < outsize)
  {
-    out[writtenCount] = '\0';
+    out[written_count] = '\0';
    return true;
  }
-  else
+
  {
  out[outsize - 1] = '\0';
  return false;
 }
 }
 std::string StringFromFormat(const char* format, ...)
 {
@ -182,7 +176,7 @@ std::string ArrayToString(const u8* data, u32 size, int line_len, bool spaces)
  std::ostringstream oss;
  oss << std::setfill('0') << std::hex;
-  for (int line = 0; size; ++data, --size)
+  for (int line = 0; size != 0; ++data, --size)
  {
    oss << std::setw(2) << static_cast<int>(*data);
@ -205,7 +199,7 @@ static std::string_view StripEnclosingChars(std::string_view str, T chars)
  if (str.npos != s)
    return str.substr(s, str.find_last_not_of(chars) - s + 1);
-  else
+
  return "";
 }
@ -227,7 +221,7 @@ std::string_view StripQuotes(std::string_view s)
 {
  if (!s.empty() && '\"' == s[0] && '\"' == *s.rbegin())
    return s.substr(1, s.size() - 2);
-  else
+
  return s;
 }
@ -305,10 +299,12 @@ bool SplitPath(std::string_view full_path, std::string* path, std::string* filen
  if (full_path.empty())
    return false;
-  size_t dir_end = full_path.find_last_of("/"
+  size_t dir_end = full_path.find_last_of(
 // Windows needs the : included for something like just "C:" to be considered a directory
 #ifdef _WIN32
-                                          ":"
+      "/:"
 #else
      '/'
 #endif
  );
  if (std::string::npos == dir_end)
@ -351,7 +347,8 @@ std::string WithUnifiedPathSeparators(std::string path)
 std::string PathToFileName(std::string_view path)
 {
-  std::string file_name, extension;
+  std::string file_name;
  std::string extension;
  SplitPath(path, nullptr, &file_name, &extension);
  return file_name + extension;
 }
@ -485,14 +482,15 @@ std::string UTF16BEToUTF8(const char16_t* str, size_t max_size)
 #else
 template <typename T>
-std::string CodeTo(const char* tocode, const char* fromcode, std::basic_string_view<T> input)
+static std::string CodeTo(const char* tocode, const char* fromcode, std::basic_string_view<T> input)
 {
  std::string result;
-  iconv_t const conv_desc = iconv_open(tocode, fromcode);
+  auto* const conv_desc = iconv_open(tocode, fromcode);
  if ((iconv_t)-1 == conv_desc)
  {
-    ERROR_LOG_FMT(COMMON, "Iconv initialization failure [{}]: {}", fromcode, strerror(errno));
+    ERROR_LOG_FMT(COMMON, "Iconv initialization failure [{}]: {}", fromcode,
                  Common::LastStrerrorString());
  }
  else
  {
@ -502,9 +500,9 @@ std::string CodeTo(const char* tocode, const char* fromcode, std::basic_string_v
    std::string out_buffer;
    out_buffer.resize(out_buffer_size);
-    auto src_buffer = input.data();
+    auto* src_buffer = input.data();
    size_t src_bytes = in_bytes;
-    auto dst_buffer = out_buffer.data();
+    auto* dst_buffer = out_buffer.data();
    size_t dst_bytes = out_buffer.size();
    while (src_bytes != 0)
@ -525,7 +523,7 @@ std::string CodeTo(const char* tocode, const char* fromcode, std::basic_string_v
        }
        else
        {
-          ERROR_LOG_FMT(COMMON, "iconv failure [{}]: {}", fromcode, strerror(errno));
+          ERROR_LOG_FMT(COMMON, "iconv failure [{}]: {}", fromcode, Common::LastStrerrorString());
          break;
        }
      }
@ -541,7 +539,7 @@ std::string CodeTo(const char* tocode, const char* fromcode, std::basic_string_v
 }
 template <typename T>
-std::string CodeToUTF8(const char* fromcode, std::basic_string_view<T> input)
+static std::string CodeToUTF8(const char* fromcode, std::basic_string_view<T> input)
 {
  return CodeTo("UTF-8", fromcode, input);
 }
@ -566,11 +564,9 @@ std::string UTF8ToSHIFTJIS(std::string_view input)
 std::string WStringToUTF8(std::wstring_view input)
 {
-  using codecvt = std::conditional_t<sizeof(wchar_t) == 2, std::codecvt_utf8_utf16<wchar_t>,
+  // Note: Without LE iconv expects a BOM.
-                                     std::codecvt_utf8<wchar_t>>;
+  // The "WCHAR_T" code would be appropriate, but it's apparently not in every iconv implementation.
-
+  return CodeToUTF8((sizeof(wchar_t) == 2) ? "UTF-16LE" : "UTF-32LE", input);
  std::wstring_convert<codecvt, wchar_t> converter;
  return converter.to_bytes(input.data(), input.data() + input.size());
 }
 std::string UTF16BEToUTF8(const char16_t* str, size_t max_size)
--- a/Source/Core/Common/StringUtil.h
+++ b/Source/Core/Common/StringUtil.h
@ -201,13 +201,13 @@ std::string PathToFileName(std::string_view path);
 void StringPopBackIf(std::string* s, char c);
 size_t StringUTF8CodePointCount(std::string_view str);
-std::string CP1252ToUTF8(std::string_view str);
+std::string CP1252ToUTF8(std::string_view input);
-std::string SHIFTJISToUTF8(std::string_view str);
+std::string SHIFTJISToUTF8(std::string_view input);
-std::string UTF8ToSHIFTJIS(std::string_view str);
+std::string UTF8ToSHIFTJIS(std::string_view input);
-std::string WStringToUTF8(std::wstring_view str);
+std::string WStringToUTF8(std::wstring_view input);
 std::string UTF16BEToUTF8(const char16_t* str, size_t max_size);  // Stops at \0
-std::string UTF16ToUTF8(std::u16string_view str);
+std::string UTF16ToUTF8(std::u16string_view input);
-std::u16string UTF8ToUTF16(std::string_view str);
+std::u16string UTF8ToUTF16(std::string_view input);
 #ifdef _WIN32
@ -311,7 +311,7 @@ std::string GetEscapedHtml(std::string html);
 void ToLower(std::string* str);
 void ToUpper(std::string* str);
 bool CaseInsensitiveEquals(std::string_view a, std::string_view b);
-bool CaseInsensitiveContains(std::string_view a, std::string_view b);
+bool CaseInsensitiveContains(std::string_view haystack, std::string_view needle);
 // 'std::less'-like comparison function object type for case-insensitive strings.
 struct CaseInsensitiveLess
--- a/Source/UnitTests/Common/StringUtilTest.cpp
+++ b/Source/UnitTests/Common/StringUtilTest.cpp
@ -2,10 +2,12 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include <gtest/gtest.h>
 #include <string>
 #include <vector>
 #include "Common/StringUtil.h"
 #include "Common/Swap.h"
 TEST(StringUtil, StringPopBackIf)
 {
@ -256,3 +258,26 @@ TEST(StringUtil, CaseInsensitiveContains_OverlappingMatches)
  EXPECT_TRUE(Common::CaseInsensitiveContains("aaaaaa", "aa"));
  EXPECT_TRUE(Common::CaseInsensitiveContains("ababababa", "bABa"));
 }
 TEST(StringUtil, CharacterEncodingConversion)
 {
  // wstring
  EXPECT_EQ(WStringToUTF8(L"hello 🐬"), "hello 🐬");
  // UTF-16
  EXPECT_EQ(UTF16ToUTF8(u"hello 🐬"), "hello 🐬");
  EXPECT_EQ(UTF8ToUTF16("hello 🐬"), u"hello 🐬");
  // UTF-16BE
  char16_t utf16be_str[] = u"hello 🐬";
  for (auto& c : utf16be_str)
    c = Common::swap16(c);
  EXPECT_EQ(UTF16BEToUTF8(utf16be_str, 99), "hello 🐬");
  // Shift JIS
  EXPECT_EQ(SHIFTJISToUTF8("\x83\x43\x83\x8b\x83\x4a"), "イルカ");
  EXPECT_EQ(UTF8ToSHIFTJIS("イルカ"), "\x83\x43\x83\x8b\x83\x4a");
  // CP1252
  EXPECT_EQ(CP1252ToUTF8("hello \xa5"), "hello ¥");
 }