This commit is contained in:
Jordan Woyak 2025-12-15 17:14:34 -06:00 committed by GitHub
commit 4291ad5c66
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 64 additions and 195 deletions

View File

@ -75,31 +75,6 @@ void PerfQuery::ResetQuery()
result.store(0, std::memory_order_relaxed);
}
u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
}
return result / 4;
}
void PerfQuery::FlushOne()
{
auto& entry = m_query_buffer[m_query_read_pos];
@ -161,9 +136,4 @@ void PerfQuery::WeakFlush()
}
}
bool PerfQuery::IsFlushed() const
{
return m_query_count.load(std::memory_order_relaxed) == 0;
}
} // namespace DX11

View File

@ -9,7 +9,7 @@
namespace DX11
{
class PerfQuery : public PerfQueryBase
class PerfQuery : public HardwarePerfQueryBase
{
public:
PerfQuery();
@ -18,9 +18,7 @@ public:
void EnableQuery(PerfQueryGroup group) override;
void DisableQuery(PerfQueryGroup group) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
bool IsFlushed() const override;
private:
struct ActiveQuery

View File

@ -109,41 +109,12 @@ void PerfQuery::ResetQuery()
}
}
u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
}
return result / 4;
}
void PerfQuery::FlushResults()
{
while (!IsFlushed())
PartialFlush(true, true);
}
bool PerfQuery::IsFlushed() const
{
return m_query_count.load(std::memory_order_relaxed) == 0;
}
void PerfQuery::ResolveQueries()
{
// Do we need to split the resolve as it's wrapping around?

View File

@ -9,7 +9,7 @@
namespace DX12
{
class PerfQuery final : public PerfQueryBase
class PerfQuery final : public HardwarePerfQueryBase
{
public:
PerfQuery();
@ -23,9 +23,7 @@ public:
void EnableQuery(PerfQueryGroup group) override;
void DisableQuery(PerfQueryGroup group) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
bool IsFlushed() const override;
private:
struct ActiveQuery

View File

@ -10,15 +10,13 @@
namespace Metal
{
class PerfQuery final : public PerfQueryBase
class PerfQuery final : public HardwarePerfQueryBase
{
public:
void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
bool IsFlushed() const override;
/// Notify PerfQuery of a new pending encoder
/// One call to ReturnResults should be made for every call to IncCount

View File

@ -25,30 +25,6 @@ void Metal::PerfQuery::ResetQuery()
result.store(0, std::memory_order_relaxed);
}
u32 Metal::PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
}
return result / 4;
}
void Metal::PerfQuery::FlushResults()
{
if (IsFlushed())
@ -63,11 +39,6 @@ void Metal::PerfQuery::FlushResults()
m_cv.wait(lock);
}
bool Metal::PerfQuery::IsFlushed() const
{
return m_query_count.load(std::memory_order_acquire) == 0;
}
void Metal::PerfQuery::ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count,
u32 query_id)
{

View File

@ -30,27 +30,6 @@ PerfQuery::PerfQuery() : m_query_read_pos()
ResetQuery();
}
void PerfQuery::EnableQuery(PerfQueryGroup group)
{
m_query->EnableQuery(group);
}
void PerfQuery::DisableQuery(PerfQueryGroup group)
{
m_query->DisableQuery(group);
}
bool PerfQuery::IsFlushed() const
{
return m_query_count.load(std::memory_order_relaxed) == 0;
}
// TODO: could selectively flush things, but I don't think that will do much
void PerfQuery::FlushResults()
{
m_query->FlushResults();
}
void PerfQuery::ResetQuery()
{
m_query_count.store(0, std::memory_order_relaxed);
@ -58,31 +37,6 @@ void PerfQuery::ResetQuery()
result.store(0, std::memory_order_relaxed);
}
u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
}
return result / 4;
}
// Implementations
PerfQueryGL::PerfQueryGL(GLenum query_type) : m_query_type(query_type)
{

View File

@ -14,17 +14,12 @@ namespace OGL
{
std::unique_ptr<PerfQueryBase> GetPerfQuery(bool is_gles);
class PerfQuery : public PerfQueryBase
class PerfQuery : public HardwarePerfQueryBase
{
public:
PerfQuery();
~PerfQuery() override {}
void EnableQuery(PerfQueryGroup group) override;
void DisableQuery(PerfQueryGroup group) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
bool IsFlushed() const override;
protected:
struct ActiveQuery
@ -39,10 +34,6 @@ protected:
// This contains gl query objects with unretrieved results.
std::array<ActiveQuery, PERF_QUERY_BUFFER_SIZE> m_query_buffer;
u32 m_query_read_pos;
private:
// Implementation
std::unique_ptr<PerfQuery> m_query;
};
// Implementations

View File

@ -102,30 +102,6 @@ void PerfQuery::ResetQuery()
std::memset(m_query_buffer.data(), 0, sizeof(ActiveQuery) * m_query_buffer.size());
}
u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
}
return result / 4;
}
void PerfQuery::FlushResults()
{
if (!IsFlushed())
@ -134,11 +110,6 @@ void PerfQuery::FlushResults()
ASSERT(IsFlushed());
}
bool PerfQuery::IsFlushed() const
{
return m_query_count.load(std::memory_order_relaxed) == 0;
}
bool PerfQuery::CreateQueryPool()
{
VkQueryPoolCreateInfo info = {

View File

@ -12,7 +12,7 @@
namespace Vulkan
{
class PerfQuery : public PerfQueryBase
class PerfQuery : public HardwarePerfQueryBase
{
public:
PerfQuery();
@ -25,9 +25,7 @@ public:
void EnableQuery(PerfQueryGroup group) override;
void DisableQuery(PerfQueryGroup group) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
bool IsFlushed() const override;
private:
// u32 is used for the sample counts.

View File

@ -9,7 +9,50 @@
std::unique_ptr<PerfQueryBase> g_perf_query;
PerfQueryBase::~PerfQueryBase() = default;
bool PerfQueryBase::ShouldEmulate()
{
return g_ActiveConfig.bPerfQueriesEnable;
}
u32 HardwarePerfQueryBase::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
switch (type)
{
case PQ_ZCOMP_INPUT_ZCOMPLOC:
case PQ_ZCOMP_OUTPUT_ZCOMPLOC:
// Need for Speed: Most Wanted uses this for the sun.
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed) / 4;
break;
case PQ_ZCOMP_INPUT:
case PQ_ZCOMP_OUTPUT:
// Timesplitters Future Perfect uses this for the story mode opening cinematic lens flare.
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
break;
case PQ_BLEND_INPUT:
// Super Mario Sunshine uses this to complete "Scrubbing Sirena Beach".
result = (m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed)) /
4;
break;
case PQ_EFB_COPY_CLOCKS:
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
break;
default:
break;
}
return result;
}
bool HardwarePerfQueryBase::IsFlushed() const
{
return m_query_count.load(std::memory_order_relaxed) == 0;
}

View File

@ -31,8 +31,7 @@ enum PerfQueryGroup
class PerfQueryBase
{
public:
PerfQueryBase() : m_query_count(0) {}
virtual ~PerfQueryBase() {}
virtual ~PerfQueryBase();
virtual bool Initialize() { return true; }
@ -44,29 +43,36 @@ public:
// The call to EnableQuery() should be placed immediately before the draw command, otherwise
// there is a risk of GPU resets if the query is left open and the buffer is submitted during
// resource binding (D3D12/Vulkan).
virtual void EnableQuery(PerfQueryGroup type) {}
virtual void EnableQuery(PerfQueryGroup type) = 0;
// Stop querying the specified value for the following host GPU commands
virtual void DisableQuery(PerfQueryGroup type) {}
virtual void DisableQuery(PerfQueryGroup type) = 0;
// Reset query counters to zero and drop any pending queries
virtual void ResetQuery() {}
virtual void ResetQuery() = 0;
// Return the measured value for the specified query type
// NOTE: Called from CPU thread
virtual u32 GetQueryResult(PerfQueryType type) { return 0; }
virtual u32 GetQueryResult(PerfQueryType type) = 0;
// Request the value of any pending queries - causes a pipeline flush and thus should be used
// carefully!
virtual void FlushResults() {}
virtual void FlushResults() = 0;
// True if there are no further pending query results
// NOTE: Called from CPU thread
virtual bool IsFlushed() const { return true; }
virtual bool IsFlushed() const = 0;
};
class HardwarePerfQueryBase : public PerfQueryBase
{
public:
u32 GetQueryResult(PerfQueryType type) final;
bool IsFlushed() const final;
protected:
std::atomic<u32> m_query_count;
std::array<std::atomic<u32>, PQG_NUM_MEMBERS> m_results;
std::atomic<u32> m_query_count{};
std::array<std::atomic<u32>, PQG_NUM_MEMBERS> m_results{};
};
extern std::unique_ptr<PerfQueryBase> g_perf_query;