dolphin/Source/Core/VideoCommon/CPUCull.cpp
Martino Fontana a14c88ba67 Remove unused imports
Yellow squiggly lines begone!
Done automatically on .cpp files through `run-clang-tidy`, with manual corrections to the mistakes.
If an import is directly used, but is technically unnecessary since it's recursively imported by something else, it is *not* removed.
The tool doesn't touch .h files, so I did some of them by hand while fixing errors due to old recursive imports.
Not everything is removed, but the cleanup should be substantial enough.
Because this done on Linux, code that isn't used on it is mostly untouched.
(Hopefully no open PR is depending on these imports...)
2026-01-25 16:12:15 +01:00

175 lines
6.4 KiB
C++

// Copyright 2022 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoCommon/CPUCull.h"
#include "Common/Assert.h"
#include "Common/CPUDetect.h"
#include "Common/MathUtil.h"
#include "Common/MemoryUtil.h"
#include "Core/System.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/XFMemory.h"
// We really want things like c.w * a.x - a.w * c.x to stay symmetric, so they cancel to zero on
// degenerate triangles. Make sure the compiler doesn't optimize in fmas where not requested.
#ifdef _MSC_VER
#pragma fp_contract(off)
#else
// GCC doesn't support any in-file way to turn off fp contract yet
// Not ideal, but worst case scenario its cpu cull is worse at detecting degenerate triangles
// (Most likely to happen on arm, as we don't compile the cull code for x86 fma)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunknown-pragmas"
#pragma STDC FP_CONTRACT OFF
#pragma GCC diagnostic pop
#endif
#if defined(_M_X86) || defined(_M_X86_64)
#define USE_SSE
#elif defined(_M_ARM_64)
#define USE_NEON
#else
#define NO_SIMD
#endif
#if defined(USE_SSE)
#include <immintrin.h>
#elif defined(USE_NEON)
#include <arm_neon.h>
#endif
#include "VideoCommon/CPUCullImpl.h"
#ifdef USE_SSE
#define USE_SSE3
#include "VideoCommon/CPUCullImpl.h"
#define USE_SSE41
#include "VideoCommon/CPUCullImpl.h"
#define USE_AVX
#include "VideoCommon/CPUCullImpl.h"
#define USE_FMA
#include "VideoCommon/CPUCullImpl.h"
#endif
#if defined(USE_SSE)
#if defined(__AVX__) && defined(__FMA__)
static constexpr int MIN_SSE = 51;
#elif defined(__AVX__)
static constexpr int MIN_SSE = 50;
#elif defined(__SSE4_1__)
static constexpr int MIN_SSE = 41;
#elif defined(__SSE3__)
static constexpr int MIN_SSE = 30;
#else
static constexpr int MIN_SSE = 0;
#endif
#endif
template <bool PositionHas3Elems, bool PerVertexPosMtx>
static CPUCull::TransformFunction GetTransformFunction()
{
#if defined(USE_SSE)
if (MIN_SSE >= 51 || (cpu_info.bAVX && cpu_info.bFMA))
return CPUCull_FMA::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
else if (MIN_SSE >= 50 || cpu_info.bAVX)
return CPUCull_AVX::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
else if (PositionHas3Elems && PerVertexPosMtx && (MIN_SSE >= 41 || cpu_info.bSSE4_1))
return CPUCull_SSE41::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
else if (PositionHas3Elems && (MIN_SSE >= 30 || cpu_info.bSSE3))
return CPUCull_SSE3::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
else
return CPUCull_SSE::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
#elif defined(USE_NEON)
return CPUCull_NEON::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
#else
return CPUCull_Scalar::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
#endif
}
template <OpcodeDecoder::Primitive Primitive, CullMode Mode>
static CPUCull::CullFunction GetCullFunction0()
{
#if defined(USE_SSE)
// Note: AVX version only actually AVX on compilers that support __attribute__((target))
// Sorry, MSVC + Sandy Bridge. (Ivy+ and AMD see very little benefit thanks to mov elimination)
if (MIN_SSE >= 50 || cpu_info.bAVX)
return CPUCull_AVX::AreAllVerticesCulled<Primitive, Mode>;
else if (MIN_SSE >= 30 || cpu_info.bSSE3)
return CPUCull_SSE3::AreAllVerticesCulled<Primitive, Mode>;
else
return CPUCull_SSE::AreAllVerticesCulled<Primitive, Mode>;
#elif defined(USE_NEON)
return CPUCull_NEON::AreAllVerticesCulled<Primitive, Mode>;
#else
return CPUCull_Scalar::AreAllVerticesCulled<Primitive, Mode>;
#endif
}
template <OpcodeDecoder::Primitive Primitive>
static Common::EnumMap<CPUCull::CullFunction, CullMode::All> GetCullFunction1()
{
return {
GetCullFunction0<Primitive, CullMode::None>(),
GetCullFunction0<Primitive, CullMode::Back>(),
GetCullFunction0<Primitive, CullMode::Front>(),
GetCullFunction0<Primitive, CullMode::All>(),
};
}
CPUCull::~CPUCull() = default;
void CPUCull::Init()
{
m_transform_table[false][false] = GetTransformFunction<false, false>();
m_transform_table[false][true] = GetTransformFunction<false, true>();
m_transform_table[true][false] = GetTransformFunction<true, false>();
m_transform_table[true][true] = GetTransformFunction<true, true>();
using Prim = OpcodeDecoder::Primitive;
m_cull_table[Prim::GX_DRAW_QUADS] = GetCullFunction1<Prim::GX_DRAW_QUADS>();
m_cull_table[Prim::GX_DRAW_QUADS_2] = GetCullFunction1<Prim::GX_DRAW_QUADS>();
m_cull_table[Prim::GX_DRAW_TRIANGLES] = GetCullFunction1<Prim::GX_DRAW_TRIANGLES>();
m_cull_table[Prim::GX_DRAW_TRIANGLE_STRIP] = GetCullFunction1<Prim::GX_DRAW_TRIANGLE_STRIP>();
m_cull_table[Prim::GX_DRAW_TRIANGLE_FAN] = GetCullFunction1<Prim::GX_DRAW_TRIANGLE_FAN>();
}
bool CPUCull::AreAllVerticesCulled(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive,
const u8* src, u32 count)
{
ASSERT_MSG(VIDEO, primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES,
"CPUCull should not be called on lines or points");
const u32 stride = loader->m_native_vtx_decl.stride;
const bool posHas3Elems = loader->m_native_vtx_decl.position.components >= 3;
const bool perVertexPosMtx = loader->m_native_vtx_decl.posmtx.enable;
if (m_transform_buffer_size < count) [[unlikely]]
{
u32 new_size = MathUtil::NextPowerOf2(count);
m_transform_buffer_size = new_size;
m_transform_buffer.reset(static_cast<TransformedVertex*>(
Common::AllocateAlignedMemory(new_size * sizeof(TransformedVertex), 32)));
}
// transform functions need the projection matrix to transform to clip space
auto& system = Core::System::GetInstance();
system.GetVertexShaderManager().SetProjectionMatrix(system.GetXFStateManager());
static constexpr Common::EnumMap<CullMode, CullMode::All> cullmode_invert = {
CullMode::None, CullMode::Front, CullMode::Back, CullMode::All};
CullMode cull_mode = bpmem.genMode.cull_mode;
if (xfmem.viewport.ht > 0) // See videosoftware Clipper.cpp:IsBackface
cull_mode = cullmode_invert[cull_mode];
const TransformFunction transform = m_transform_table[posHas3Elems][perVertexPosMtx];
transform(m_transform_buffer.get(), src, stride, count);
const CullFunction cull = m_cull_table[primitive][cull_mode];
return cull(m_transform_buffer.get(), count);
}
template <typename T>
void CPUCull::BufferDeleter<T>::operator()(T* ptr)
{
Common::FreeAlignedMemory(ptr);
}