VideoCommon: Don't create mipmap vector in TextureInfo

The TextureInfo constructor creates a vector of MipLevels. This could be
good for performance if MipLevels are accessed very often for each
TextureInfo, but that's not the case. Dolphin creates thousands of
TextureInfos per second that it never accesses the mipmap levels of
because there's a hit in the texture cache, and in the uncommon case of
a texture cache miss, the mipmap levels only get looped through once.

To make the common case of texture cache hits as fast as possible, let's
not create a vector in the TextureInfo constructor. This commit
implements a custom iterator for MipLevels instead.

In my testing on the Death Star level of Rogue Squadron 2, this speeds
up TextureInfo::FromStage by 200%, giving an overall emulation speedup
of a bit over 1%. Results on the Hoth level are even better, with
TextureInfo::FromStage being close to 300% faster and overall emulation
being over 4% faster. (Single core, no GPU texture decoding.)
This commit is contained in:
JosJuice 2026-01-17 17:43:06 +01:00
parent 2aee998a8e
commit f7b4d2738b
3 changed files with 152 additions and 79 deletions

View File

@ -1721,32 +1721,36 @@ RcTcacheEntry TextureCacheBase::CreateTextureEntry(
dst_buffer += decoded_texture_size;
}
for (u32 level = 1; level != texLevels; ++level)
for (const auto& mip_level : texture_info.GetMipMapLevels())
{
auto mip_level = texture_info.GetMipMapLevel(level - 1);
if (!mip_level)
if (!mip_level.IsDataValid())
{
ERROR_LOG_FMT(VIDEO, "Trying to use an invalid mipmap address {:#010x}",
texture_info.GetRawAddress());
continue;
}
if (!decode_on_gpu ||
!DecodeTextureOnGPU(entry, level, mip_level->GetData(), mip_level->GetTextureSize(),
texture_info.GetTextureFormat(), mip_level->GetRawWidth(),
mip_level->GetRawHeight(), mip_level->GetExpandedWidth(),
mip_level->GetExpandedHeight(),
!DecodeTextureOnGPU(entry, mip_level.GetLevel(), mip_level.GetData(),
mip_level.GetTextureSize(), texture_info.GetTextureFormat(),
mip_level.GetRawWidth(), mip_level.GetRawHeight(),
mip_level.GetExpandedWidth(), mip_level.GetExpandedHeight(),
creation_info.bytes_per_block *
(mip_level->GetExpandedWidth() / texture_info.GetBlockWidth()),
(mip_level.GetExpandedWidth() / texture_info.GetBlockWidth()),
texture_info.GetTlutAddress(), texture_info.GetTlutFormat()))
{
// No need to call CheckTempSize here, as the whole buffer is preallocated at the beginning
const u32 decoded_mip_size =
mip_level->GetExpandedWidth() * sizeof(u32) * mip_level->GetExpandedHeight();
TexDecoder_Decode(dst_buffer, mip_level->GetData(), mip_level->GetExpandedWidth(),
mip_level->GetExpandedHeight(), texture_info.GetTextureFormat(),
mip_level.GetExpandedWidth() * sizeof(u32) * mip_level.GetExpandedHeight();
TexDecoder_Decode(dst_buffer, mip_level.GetData(), mip_level.GetExpandedWidth(),
mip_level.GetExpandedHeight(), texture_info.GetTextureFormat(),
texture_info.GetTlutAddress(), texture_info.GetTlutFormat());
entry->texture->Load(level, mip_level->GetRawWidth(), mip_level->GetRawHeight(),
mip_level->GetExpandedWidth(), dst_buffer, decoded_mip_size);
entry->texture->Load(mip_level.GetLevel(), mip_level.GetRawWidth(),
mip_level.GetRawHeight(), mip_level.GetExpandedWidth(), dst_buffer,
decoded_mip_size);
arbitrary_mip_detector.AddLevel(mip_level->GetRawWidth(), mip_level->GetRawHeight(),
mip_level->GetExpandedWidth(), dst_buffer);
arbitrary_mip_detector.AddLevel(mip_level.GetRawWidth(), mip_level.GetRawHeight(),
mip_level.GetExpandedWidth(), dst_buffer);
dst_buffer += decoded_mip_size;
}

View File

@ -61,9 +61,9 @@ TextureInfo::TextureInfo(u32 stage, std::span<const u8> data, std::span<const u8
u32 address, TextureFormat texture_format, TLUTFormat tlut_format,
u32 width, u32 height, bool from_tmem, std::span<const u8> tmem_odd,
std::span<const u8> tmem_even, std::optional<u32> mip_count)
: m_ptr(data.data()), m_tlut_ptr(tlut_data.data()), m_address(address), m_from_tmem(from_tmem),
m_tmem_odd(tmem_odd.data()), m_texture_format(texture_format), m_tlut_format(tlut_format),
m_raw_width(width), m_raw_height(height), m_stage(stage)
: m_data(data), m_tlut_data(tlut_data), m_address(address), m_from_tmem(from_tmem),
m_tmem_even(tmem_even), m_tmem_odd(tmem_odd), m_texture_format(texture_format),
m_tlut_format(tlut_format), m_raw_width(width), m_raw_height(height), m_stage(stage)
{
const bool is_palette_texture = IsColorIndexed(m_texture_format);
if (is_palette_texture)
@ -103,23 +103,8 @@ TextureInfo::TextureInfo(u32 stage, std::span<const u8> data, std::span<const u8
// the mipmap chain
// e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,0x0, so
// we limit the mipmap count to 6 there
const u32 limited_mip_count =
m_limited_mip_count =
std::min<u32>(MathUtil::IntLog2(std::max(width, height)) + 1, raw_mip_count + 1) - 1;
// load mips
std::span<const u8> src_data = Common::SafeSubspan(data, GetTextureSize());
tmem_even = Common::SafeSubspan(tmem_even, GetTextureSize());
for (u32 i = 0; i < limited_mip_count; i++)
{
MipLevel mip_level(i + 1, *this, m_from_tmem, &src_data, &tmem_even, &tmem_odd);
if (!mip_level.IsDataValid())
{
ERROR_LOG_FMT(VIDEO, "Trying to use an invalid mipmap address {:#010x}", GetRawAddress());
break;
}
m_mip_levels.push_back(std::move(mip_level));
}
}
}
@ -133,7 +118,7 @@ TextureInfo::NameDetails TextureInfo::CalculateTextureName() const
if (!IsDataValid())
return NameDetails{};
const u8* tlut = m_tlut_ptr;
const u8* tlut = m_tlut_data.data();
size_t tlut_size = m_palette_size ? *m_palette_size : 0;
// checking for min/max on paletted textures
@ -146,8 +131,8 @@ TextureInfo::NameDetails TextureInfo::CalculateTextureName() const
case 16 * 2:
for (size_t i = 0; i < m_texture_size; i++)
{
const u32 low_nibble = m_ptr[i] & 0xf;
const u32 high_nibble = m_ptr[i] >> 4;
const u32 low_nibble = m_data[i] & 0xf;
const u32 high_nibble = m_data[i] >> 4;
min = std::min({min, low_nibble, high_nibble});
max = std::max({max, low_nibble, high_nibble});
@ -156,7 +141,7 @@ TextureInfo::NameDetails TextureInfo::CalculateTextureName() const
case 256 * 2:
for (size_t i = 0; i < m_texture_size; i++)
{
const u32 texture_byte = m_ptr[i];
const u32 texture_byte = m_data[i];
min = std::min(min, texture_byte);
max = std::max(max, texture_byte);
@ -165,7 +150,7 @@ TextureInfo::NameDetails TextureInfo::CalculateTextureName() const
case 16384 * 2:
for (size_t i = 0; i < m_texture_size; i += sizeof(u16))
{
const u32 texture_halfword = Common::swap16(m_ptr[i]) & 0x3fff;
const u32 texture_halfword = Common::swap16(m_data[i]) & 0x3fff;
min = std::min(min, texture_halfword);
max = std::max(max, texture_halfword);
@ -180,7 +165,7 @@ TextureInfo::NameDetails TextureInfo::CalculateTextureName() const
DEBUG_ASSERT(tlut_size <= m_palette_size.value_or(0));
const u64 tex_hash = XXH64(m_ptr, m_texture_size, 0);
const u64 tex_hash = XXH64(m_data.data(), m_texture_size, 0);
const u64 tlut_hash = tlut_size ? XXH64(tlut, tlut_size, 0) : 0;
return {.base_name = fmt::format("{}{}x{}{}", format_prefix, m_raw_width, m_raw_height,
@ -197,12 +182,12 @@ bool TextureInfo::IsDataValid() const
const u8* TextureInfo::GetData() const
{
return m_ptr;
return m_data.data();
}
const u8* TextureInfo::GetTlutAddress() const
{
return m_tlut_ptr;
return m_tlut_data.data();
}
u32 TextureInfo::GetRawAddress() const
@ -217,7 +202,7 @@ bool TextureInfo::IsFromTmem() const
const u8* TextureInfo::GetTmemOddAddress() const
{
return m_tmem_odd;
return m_tmem_odd.data();
}
TextureFormat TextureInfo::GetTextureFormat() const
@ -277,25 +262,43 @@ u32 TextureInfo::GetStage() const
bool TextureInfo::HasMipMaps() const
{
return !m_mip_levels.empty();
return m_limited_mip_count != 0;
}
u32 TextureInfo::GetLevelCount() const
{
return static_cast<u32>(m_mip_levels.size()) + 1;
return m_limited_mip_count + 1;
}
const TextureInfo::MipLevel* TextureInfo::GetMipMapLevel(u32 level) const
TextureInfo::MipLevels TextureInfo::GetMipMapLevels() const
{
if (level < m_mip_levels.size())
return &m_mip_levels[level];
MipLevelIterator begin;
begin.m_parent = this;
begin.m_from_tmem = m_from_tmem;
begin.m_data = Common::SafeSubspan(m_data, GetTextureSize());
begin.m_tmem_even = Common::SafeSubspan(m_tmem_even, GetTextureSize());
begin.m_tmem_odd = m_tmem_odd;
begin.CreateMipLevel();
return nullptr;
MipLevelIterator end;
end.m_level_index = m_limited_mip_count;
return MipLevels(begin, end);
}
TextureInfo::MipLevel::MipLevel(u32 level, const TextureInfo& parent, bool from_tmem,
std::span<const u8>* src_data, std::span<const u8>* tmem_even,
std::span<const u8>* tmem_odd)
u32 TextureInfo::GetFullLevelSize() const
{
u32 all_mips_size = 0;
for (const auto& mip_map : GetMipMapLevels())
{
if (mip_map.IsDataValid())
all_mips_size += mip_map.GetTextureSize();
}
return m_texture_size + all_mips_size;
}
TextureInfo::MipLevel::MipLevel(u32 level, const TextureInfo& parent, std::span<const u8>* data)
: m_level(level)
{
m_raw_width = std::max(parent.GetRawWidth() >> level, 1u);
m_raw_height = std::max(parent.GetRawHeight() >> level, 1u);
@ -305,23 +308,12 @@ TextureInfo::MipLevel::MipLevel(u32 level, const TextureInfo& parent, bool from_
m_texture_size = TexDecoder_GetTextureSizeInBytes(m_expanded_width, m_expanded_height,
parent.GetTextureFormat());
std::span<const u8>* data = from_tmem ? ((level % 2) ? tmem_odd : tmem_even) : src_data;
m_ptr = data->data();
m_data_valid = data->size() >= m_texture_size;
*data = Common::SafeSubspan(*data, m_texture_size);
}
u32 TextureInfo::GetFullLevelSize() const
{
u32 all_mips_size = 0;
for (const auto& mip_map : m_mip_levels)
{
all_mips_size += mip_map.GetTextureSize();
}
return m_texture_size + all_mips_size;
}
bool TextureInfo::MipLevel::IsDataValid() const
{
return m_data_valid;
@ -356,3 +348,24 @@ u32 TextureInfo::MipLevel::GetRawHeight() const
{
return m_raw_height;
}
u32 TextureInfo::MipLevel::GetLevel() const
{
return m_level;
}
TextureInfo::MipLevelIterator& TextureInfo::MipLevelIterator::operator++()
{
++m_level_index;
CreateMipLevel();
return *this;
}
void TextureInfo::MipLevelIterator::CreateMipLevel()
{
const u32 level = m_level_index + 1;
std::span<const u8>* data = m_from_tmem ? ((level % 2) ? &m_tmem_odd : &m_tmem_even) : &m_data;
// The MipLevel constructor mutates the data argument so the next MipLevel gets the right data
m_mip_level = MipLevel(level, *m_parent, data);
}

View File

@ -7,7 +7,6 @@
#include <span>
#include <string>
#include <string_view>
#include <vector>
#include "Common/CommonTypes.h"
@ -61,11 +60,11 @@ public:
u32 GetStage() const;
class MipLevel
class MipLevel final
{
public:
MipLevel(u32 level, const TextureInfo& parent, bool from_tmem, std::span<const u8>* src_data,
std::span<const u8>* tmem_even, std::span<const u8>* tmem_odd);
MipLevel() = default;
MipLevel(u32 level, const TextureInfo& parent, std::span<const u8>* data);
bool IsDataValid() const;
@ -78,43 +77,100 @@ public:
u32 GetRawWidth() const;
u32 GetRawHeight() const;
private:
bool m_data_valid;
u32 GetLevel() const;
const u8* m_ptr;
private:
bool m_data_valid = false;
const u8* m_ptr = nullptr;
u32 m_texture_size = 0;
u32 m_expanded_width;
u32 m_raw_width;
u32 m_expanded_width = 0;
u32 m_raw_width = 0;
u32 m_expanded_height;
u32 m_raw_height;
u32 m_expanded_height = 0;
u32 m_raw_height = 0;
u32 m_level = 0;
};
class MipLevelIterator final
{
friend TextureInfo;
public:
using difference_type = u32;
using value_type = MipLevel;
MipLevel& operator*() { return m_mip_level; }
const MipLevel& operator*() const { return m_mip_level; }
MipLevelIterator& operator++();
MipLevelIterator operator++(int)
{
auto tmp = *this;
++*this;
return tmp;
}
bool operator==(const MipLevelIterator& other) const
{
return m_level_index == other.m_level_index;
}
private:
void CreateMipLevel();
MipLevel m_mip_level;
const TextureInfo* m_parent = nullptr;
u32 m_level_index = 0;
bool m_from_tmem = false;
std::span<const u8> m_data;
std::span<const u8> m_tmem_even;
std::span<const u8> m_tmem_odd;
};
class MipLevels final
{
public:
MipLevels(MipLevelIterator begin, MipLevelIterator end) : m_begin(begin), m_end(end) {}
MipLevelIterator begin() const { return m_begin; }
MipLevelIterator end() const { return m_end; }
private:
MipLevelIterator m_begin;
MipLevelIterator m_end;
};
bool HasMipMaps() const;
u32 GetLevelCount() const;
const MipLevel* GetMipMapLevel(u32 level) const;
MipLevels GetMipMapLevels() const;
u32 GetFullLevelSize() const;
static constexpr std::string_view format_prefix{"tex1_"};
private:
const u8* m_ptr;
const u8* m_tlut_ptr;
std::span<const u8> m_data;
std::span<const u8> m_tlut_data;
u32 m_address;
bool m_data_valid;
bool m_from_tmem;
const u8* m_tmem_odd;
std::span<const u8> m_tmem_even;
std::span<const u8> m_tmem_odd;
TextureFormat m_texture_format;
TLUTFormat m_tlut_format;
bool m_mipmaps_enabled = false;
std::vector<MipLevel> m_mip_levels;
u32 m_limited_mip_count = 0;
u32 m_texture_size = 0;
std::optional<u32> m_palette_size;