Merge da68aaaf50 into ed2fe134aa

2025-12-16 04:09:39 +00:00 · 2025-12-15 17:14:34 -06:00 · 2025-12-15 17:14:34 -06:00 · 4291ad5c66
commit 4291ad5c66
parent ed2fe134aa da68aaaf50
12 changed files with 64 additions and 195 deletions
--- a/Source/Core/VideoBackends/D3D/D3DPerfQuery.cpp
+++ b/Source/Core/VideoBackends/D3D/D3DPerfQuery.cpp
@ -75,31 +75,6 @@ void PerfQuery::ResetQuery()
    result.store(0, std::memory_order_relaxed);
 }

-u32 PerfQuery::GetQueryResult(PerfQueryType type)
-{
-  u32 result = 0;
-
-  if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
-  {
-    result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
-  {
-    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_BLEND_INPUT)
-  {
-    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
-             m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_EFB_COPY_CLOCKS)
-  {
-    result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
-  }
-
-  return result / 4;
-}
-
 void PerfQuery::FlushOne()
 {
  auto& entry = m_query_buffer[m_query_read_pos];
@ -161,9 +136,4 @@ void PerfQuery::WeakFlush()
  }
 }

-bool PerfQuery::IsFlushed() const
-{
-  return m_query_count.load(std::memory_order_relaxed) == 0;
-}
-
 }  // namespace DX11
--- a/Source/Core/VideoBackends/D3D/D3DPerfQuery.h
+++ b/Source/Core/VideoBackends/D3D/D3DPerfQuery.h
@ -9,7 +9,7 @@

 namespace DX11
 {
-class PerfQuery : public PerfQueryBase
+class PerfQuery : public HardwarePerfQueryBase
 {
 public:
  PerfQuery();
@ -18,9 +18,7 @@ public:
  void EnableQuery(PerfQueryGroup group) override;
  void DisableQuery(PerfQueryGroup group) override;
  void ResetQuery() override;
-  u32 GetQueryResult(PerfQueryType type) override;
  void FlushResults() override;
-  bool IsFlushed() const override;

 private:
  struct ActiveQuery
--- a/Source/Core/VideoBackends/D3D12/D3D12PerfQuery.cpp
+++ b/Source/Core/VideoBackends/D3D12/D3D12PerfQuery.cpp
@ -109,41 +109,12 @@ void PerfQuery::ResetQuery()
  }
 }

-u32 PerfQuery::GetQueryResult(PerfQueryType type)
-{
-  u32 result = 0;
-  if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
-  {
-    result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
-  {
-    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_BLEND_INPUT)
-  {
-    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
-             m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_EFB_COPY_CLOCKS)
-  {
-    result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
-  }
-
-  return result / 4;
-}
-
 void PerfQuery::FlushResults()
 {
  while (!IsFlushed())
    PartialFlush(true, true);
 }

-bool PerfQuery::IsFlushed() const
-{
-  return m_query_count.load(std::memory_order_relaxed) == 0;
-}
-
 void PerfQuery::ResolveQueries()
 {
  // Do we need to split the resolve as it's wrapping around?
--- a/Source/Core/VideoBackends/D3D12/D3D12PerfQuery.h
+++ b/Source/Core/VideoBackends/D3D12/D3D12PerfQuery.h
@ -9,7 +9,7 @@

 namespace DX12
 {
-class PerfQuery final : public PerfQueryBase
+class PerfQuery final : public HardwarePerfQueryBase
 {
 public:
  PerfQuery();
@ -23,9 +23,7 @@ public:
  void EnableQuery(PerfQueryGroup group) override;
  void DisableQuery(PerfQueryGroup group) override;
  void ResetQuery() override;
-  u32 GetQueryResult(PerfQueryType type) override;
  void FlushResults() override;
-  bool IsFlushed() const override;

 private:
  struct ActiveQuery
--- a/Source/Core/VideoBackends/Metal/MTLPerfQuery.h
+++ b/Source/Core/VideoBackends/Metal/MTLPerfQuery.h
@ -10,15 +10,13 @@

 namespace Metal
 {
-class PerfQuery final : public PerfQueryBase
+class PerfQuery final : public HardwarePerfQueryBase
 {
 public:
  void EnableQuery(PerfQueryGroup type) override;
  void DisableQuery(PerfQueryGroup type) override;
  void ResetQuery() override;
-  u32 GetQueryResult(PerfQueryType type) override;
  void FlushResults() override;
-  bool IsFlushed() const override;

  /// Notify PerfQuery of a new pending encoder
  /// One call to ReturnResults should be made for every call to IncCount
--- a/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm
+++ b/Source/Core/VideoBackends/Metal/MTLPerfQuery.mm
@ -25,30 +25,6 @@ void Metal::PerfQuery::ResetQuery()
    result.store(0, std::memory_order_relaxed);
 }

-u32 Metal::PerfQuery::GetQueryResult(PerfQueryType type)
-{
-  u32 result = 0;
-  if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
-  {
-    result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
-  {
-    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_BLEND_INPUT)
-  {
-    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
-             m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_EFB_COPY_CLOCKS)
-  {
-    result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
-  }
-
-  return result / 4;
-}
-
 void Metal::PerfQuery::FlushResults()
 {
  if (IsFlushed())
@ -63,11 +39,6 @@ void Metal::PerfQuery::FlushResults()
    m_cv.wait(lock);
 }

-bool Metal::PerfQuery::IsFlushed() const
-{
-  return m_query_count.load(std::memory_order_acquire) == 0;
-}
-
 void Metal::PerfQuery::ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count,
                                     u32 query_id)
 {
--- a/Source/Core/VideoBackends/OGL/OGLPerfQuery.cpp
+++ b/Source/Core/VideoBackends/OGL/OGLPerfQuery.cpp
@ -30,27 +30,6 @@ PerfQuery::PerfQuery() : m_query_read_pos()
  ResetQuery();
 }

-void PerfQuery::EnableQuery(PerfQueryGroup group)
-{
-  m_query->EnableQuery(group);
-}
-
-void PerfQuery::DisableQuery(PerfQueryGroup group)
-{
-  m_query->DisableQuery(group);
-}
-
-bool PerfQuery::IsFlushed() const
-{
-  return m_query_count.load(std::memory_order_relaxed) == 0;
-}
-
-// TODO: could selectively flush things, but I don't think that will do much
-void PerfQuery::FlushResults()
-{
-  m_query->FlushResults();
-}
-
 void PerfQuery::ResetQuery()
 {
  m_query_count.store(0, std::memory_order_relaxed);
@ -58,31 +37,6 @@ void PerfQuery::ResetQuery()
    result.store(0, std::memory_order_relaxed);
 }

-u32 PerfQuery::GetQueryResult(PerfQueryType type)
-{
-  u32 result = 0;
-
-  if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
-  {
-    result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
-  {
-    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_BLEND_INPUT)
-  {
-    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
-             m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_EFB_COPY_CLOCKS)
-  {
-    result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
-  }
-
-  return result / 4;
-}
-
 // Implementations
 PerfQueryGL::PerfQueryGL(GLenum query_type) : m_query_type(query_type)
 {
--- a/Source/Core/VideoBackends/OGL/OGLPerfQuery.h
+++ b/Source/Core/VideoBackends/OGL/OGLPerfQuery.h
@ -14,17 +14,12 @@ namespace OGL
 {
 std::unique_ptr<PerfQueryBase> GetPerfQuery(bool is_gles);

-class PerfQuery : public PerfQueryBase
+class PerfQuery : public HardwarePerfQueryBase
 {
 public:
  PerfQuery();
  ~PerfQuery() override {}
-  void EnableQuery(PerfQueryGroup group) override;
-  void DisableQuery(PerfQueryGroup group) override;
  void ResetQuery() override;
-  u32 GetQueryResult(PerfQueryType type) override;
-  void FlushResults() override;
-  bool IsFlushed() const override;

 protected:
  struct ActiveQuery
@ -39,10 +34,6 @@ protected:
  // This contains gl query objects with unretrieved results.
  std::array<ActiveQuery, PERF_QUERY_BUFFER_SIZE> m_query_buffer;
  u32 m_query_read_pos;
-
-private:
-  // Implementation
-  std::unique_ptr<PerfQuery> m_query;
 };

 // Implementations
--- a/Source/Core/VideoBackends/Vulkan/VKPerfQuery.cpp
+++ b/Source/Core/VideoBackends/Vulkan/VKPerfQuery.cpp
@ -102,30 +102,6 @@ void PerfQuery::ResetQuery()
  std::memset(m_query_buffer.data(), 0, sizeof(ActiveQuery) * m_query_buffer.size());
 }

-u32 PerfQuery::GetQueryResult(PerfQueryType type)
-{
-  u32 result = 0;
-  if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
-  {
-    result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
-  {
-    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_BLEND_INPUT)
-  {
-    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
-             m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
-  }
-  else if (type == PQ_EFB_COPY_CLOCKS)
-  {
-    result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
-  }
-
-  return result / 4;
-}
-
 void PerfQuery::FlushResults()
 {
  if (!IsFlushed())
@ -134,11 +110,6 @@ void PerfQuery::FlushResults()
  ASSERT(IsFlushed());
 }

-bool PerfQuery::IsFlushed() const
-{
-  return m_query_count.load(std::memory_order_relaxed) == 0;
-}
-
 bool PerfQuery::CreateQueryPool()
 {
  VkQueryPoolCreateInfo info = {
--- a/Source/Core/VideoBackends/Vulkan/VKPerfQuery.h
+++ b/Source/Core/VideoBackends/Vulkan/VKPerfQuery.h
@ -12,7 +12,7 @@

 namespace Vulkan
 {
-class PerfQuery : public PerfQueryBase
+class PerfQuery : public HardwarePerfQueryBase
 {
 public:
  PerfQuery();
@ -25,9 +25,7 @@ public:
  void EnableQuery(PerfQueryGroup group) override;
  void DisableQuery(PerfQueryGroup group) override;
  void ResetQuery() override;
-  u32 GetQueryResult(PerfQueryType type) override;
  void FlushResults() override;
-  bool IsFlushed() const override;

 private:
  // u32 is used for the sample counts.
--- a/Source/Core/VideoCommon/PerfQueryBase.cpp
+++ b/Source/Core/VideoCommon/PerfQueryBase.cpp
@ -9,7 +9,50 @@

 std::unique_ptr<PerfQueryBase> g_perf_query;

+PerfQueryBase::~PerfQueryBase() = default;
+
 bool PerfQueryBase::ShouldEmulate()
 {
  return g_ActiveConfig.bPerfQueriesEnable;
 }
+
+u32 HardwarePerfQueryBase::GetQueryResult(PerfQueryType type)
+{
+  u32 result = 0;
+
+  switch (type)
+  {
+  case PQ_ZCOMP_INPUT_ZCOMPLOC:
+  case PQ_ZCOMP_OUTPUT_ZCOMPLOC:
+    // Need for Speed: Most Wanted uses this for the sun.
+    result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed) / 4;
+    break;
+
+  case PQ_ZCOMP_INPUT:
+  case PQ_ZCOMP_OUTPUT:
+    // Timesplitters Future Perfect uses this for the story mode opening cinematic lens flare.
+    result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
+    break;
+
+  case PQ_BLEND_INPUT:
+    // Super Mario Sunshine uses this to complete "Scrubbing Sirena Beach".
+    result = (m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
+              m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed)) /
+             4;
+    break;
+
+  case PQ_EFB_COPY_CLOCKS:
+    result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
+    break;
+
+  default:
+    break;
+  }
+
+  return result;
+}
+
+bool HardwarePerfQueryBase::IsFlushed() const
+{
+  return m_query_count.load(std::memory_order_relaxed) == 0;
+}
--- a/Source/Core/VideoCommon/PerfQueryBase.h
+++ b/Source/Core/VideoCommon/PerfQueryBase.h
@ -31,8 +31,7 @@ enum PerfQueryGroup
 class PerfQueryBase
 {
 public:
-  PerfQueryBase() : m_query_count(0) {}
-  virtual ~PerfQueryBase() {}
+  virtual ~PerfQueryBase();

  virtual bool Initialize() { return true; }

@ -44,29 +43,36 @@ public:
  // The call to EnableQuery() should be placed immediately before the draw command, otherwise
  // there is a risk of GPU resets if the query is left open and the buffer is submitted during
  // resource binding (D3D12/Vulkan).
-  virtual void EnableQuery(PerfQueryGroup type) {}
+  virtual void EnableQuery(PerfQueryGroup type) = 0;

  // Stop querying the specified value for the following host GPU commands
-  virtual void DisableQuery(PerfQueryGroup type) {}
+  virtual void DisableQuery(PerfQueryGroup type) = 0;

  // Reset query counters to zero and drop any pending queries
-  virtual void ResetQuery() {}
+  virtual void ResetQuery() = 0;

  // Return the measured value for the specified query type
  // NOTE: Called from CPU thread
-  virtual u32 GetQueryResult(PerfQueryType type) { return 0; }
+  virtual u32 GetQueryResult(PerfQueryType type) = 0;

  // Request the value of any pending queries - causes a pipeline flush and thus should be used
  // carefully!
-  virtual void FlushResults() {}
+  virtual void FlushResults() = 0;

  // True if there are no further pending query results
  // NOTE: Called from CPU thread
-  virtual bool IsFlushed() const { return true; }
+  virtual bool IsFlushed() const = 0;
+};
+
+class HardwarePerfQueryBase : public PerfQueryBase
+{
+public:
+  u32 GetQueryResult(PerfQueryType type) final;
+  bool IsFlushed() const final;

 protected:
-  std::atomic<u32> m_query_count;
-  std::array<std::atomic<u32>, PQG_NUM_MEMBERS> m_results;
+  std::atomic<u32> m_query_count{};
+  std::array<std::atomic<u32>, PQG_NUM_MEMBERS> m_results{};
 };

 extern std::unique_ptr<PerfQueryBase> g_perf_query;