mirror of
https://github.com/cemu-project/Cemu.git
synced 2026-04-26 21:15:16 -06:00
This reverts commit 657d9f6f431c34ff905436c11817b76e97107a00. Revert "C++-ify GL default shader code" This reverts commit 81a6f92f8d43fdcc344346111fc1ccdf8a534d5d.
390 lines
13 KiB
C++
390 lines
13 KiB
C++
#include "Cafe/OS/common/OSCommon.h"
|
|
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
|
#include "GX2.h"
|
|
#include "Cafe/HW/Latte/Core/Latte.h"
|
|
#include "Cafe/HW/Latte/Core/LatteDraw.h"
|
|
|
|
#include "Cafe/HW/Latte/Core/LattePM4.h"
|
|
|
|
#include "GX2_Command.h"
|
|
#include "GX2_State.h"
|
|
#include "Cafe/CafeSystem.h"
|
|
|
|
#define GPU7_REG_AREA_SIZE_CONFIG_REG 0xB00
|
|
#define GPU7_REG_AREA_SIZE_CONTEXT_REG 0x400
|
|
#define GPU7_REG_AREA_SIZE_ALU_CONST 0x800
|
|
#define GPU7_REG_AREA_SIZE_LOOP_CONST 0x60
|
|
#define GPU7_REG_AREA_SIZE_RESOURCE 0xD9E
|
|
#define GPU7_REG_AREA_SIZE_SAMPLER 0xA2 // (guessed)
|
|
|
|
#define _GX2_CALC_SHADOWMEM_NUM_U32(__v) (((((__v)*4)+0xFF)&~0xFF)/4)
|
|
|
|
MPTR gx2CurrentContextStateMPTR = MPTR_NULL;
|
|
|
|
typedef struct
|
|
{
|
|
uint32 regOffset;
|
|
uint32 regCount;
|
|
}GX2RegLoadPktEntry_t;
|
|
|
|
GX2RegLoadPktEntry_t aluConst_loadPktEntries[1] = // base: 0xC000
|
|
{
|
|
{0, 0x800},
|
|
};
|
|
|
|
GX2RegLoadPktEntry_t loopConst_loadPktEntries[1] = // base: 0xF880
|
|
{
|
|
{0, 0x60},
|
|
};
|
|
|
|
GX2RegLoadPktEntry_t samplerReg_loadPktEntries[3] = // base: 0xF000
|
|
{
|
|
{0, 0x36},
|
|
{0x36, 0x36},
|
|
{0x6C, 0x36},
|
|
};
|
|
|
|
GX2RegLoadPktEntry_t configReg_loadPktEntries[0xF] = // base: 0x2000
|
|
{
|
|
{0x300, 0x6},
|
|
{0x900, 0x48},
|
|
{0x980, 0x48},
|
|
{0xA00, 0x48},
|
|
{0x310, 0xC},
|
|
{0x542, 0x1},
|
|
{0x235, 0x1},
|
|
{0x232, 0x2},
|
|
{0x23A, 0x1},
|
|
{0x256, 0x1},
|
|
{0x60C, 0x1},
|
|
{0x5C5, 0x1},
|
|
{0x2C8, 0x1},
|
|
{0x363, 0x1},
|
|
{0x404, 0x2}
|
|
};
|
|
|
|
GX2RegLoadPktEntry_t contextReg_loadPktEntries[0x2D] = // base: 0xA000
|
|
{
|
|
{0x0, 0x2},
|
|
{0x3, 0x3},
|
|
{0xA, 0x4},
|
|
{0x10, 0x38},
|
|
{0x50, 0x34},
|
|
{0x8E, 0x4},
|
|
{0x94, 0x40},
|
|
{0x100, 0x9},
|
|
{0x10C, 0x3},
|
|
{0x10F, 0x60},
|
|
{0x185, 0xA},
|
|
{0x191, 0x27},
|
|
{0x1E0, 0x9},
|
|
{0x200, 0x1},
|
|
{0x202, 0x7},
|
|
{0xE0, 0x20},
|
|
{0x210, 0x29},
|
|
{0x250, 0x34},
|
|
{0x290, 0x1},
|
|
{0x292, 0x2},
|
|
{0x2A1, 0x1},
|
|
{0x2A5, 0x1},
|
|
{0x2A8, 0x2},
|
|
{0x2AC, 0x3},
|
|
{0x2CA, 0x1},
|
|
{0x2CC, 0x1},
|
|
{0x2CE, 0x1},
|
|
{0x300, 0x9},
|
|
{0x30C, 0x1},
|
|
{0x312, 0x1},
|
|
{0x316, 0x2},
|
|
{0x343, 0x2},
|
|
{0x349, 0x3},
|
|
{0x34C, 0x2},
|
|
{0x351, 0x1},
|
|
{0x37E, 0x6},
|
|
{0x2B4, 0x3},
|
|
{0x2B8, 0x3},
|
|
{0x2BC, 0x3},
|
|
{0x2C0, 0x3},
|
|
{0x2C8, 0x1},
|
|
{0x29B, 0x1},
|
|
{0x8C, 0x1},
|
|
{0xD5, 0x1},
|
|
{0x284, 0xC}
|
|
};
|
|
|
|
GX2RegLoadPktEntry_t resourceReg_loadPktEntries[9] = // base: 0xE000
|
|
{
|
|
{0, 0x70}, // ps tex
|
|
{0x380, 0x70},
|
|
{0x460, 0x70}, // vs tex
|
|
{0x7E0, 0x70},
|
|
{0x8B9, 0x7},
|
|
{0x8C0, 0x70},
|
|
{0x930, 0x70}, // gs tex
|
|
{0xCB0, 0x70},
|
|
{0xD89, 0x7}
|
|
};
|
|
|
|
typedef struct
|
|
{
|
|
// Hardware view of context state (register areas)
|
|
uint32 areaConfigReg[_GX2_CALC_SHADOWMEM_NUM_U32(GPU7_REG_AREA_SIZE_CONFIG_REG)];
|
|
uint32 areaContextReg[_GX2_CALC_SHADOWMEM_NUM_U32(GPU7_REG_AREA_SIZE_CONTEXT_REG)];
|
|
uint32 areaALUConst[_GX2_CALC_SHADOWMEM_NUM_U32(GPU7_REG_AREA_SIZE_ALU_CONST)];
|
|
uint32 areaLoopConst[_GX2_CALC_SHADOWMEM_NUM_U32(GPU7_REG_AREA_SIZE_LOOP_CONST)];
|
|
uint32 areaResource[_GX2_CALC_SHADOWMEM_NUM_U32(GPU7_REG_AREA_SIZE_RESOURCE)];
|
|
uint32 areaSampler[_GX2_CALC_SHADOWMEM_NUM_U32(GPU7_REG_AREA_SIZE_SAMPLER)];
|
|
}GX2HwContextState_t;
|
|
|
|
typedef struct
|
|
{
|
|
GX2HwContextState_t hwContext;
|
|
uint32 enableProfling;
|
|
/* + 0x9804 */ uint32be loadDL_size;
|
|
uint8 ukn9808[0x3FC-4];
|
|
uint8 ukn9C00[0x200];
|
|
/* +0x9E00 */ uint8 loadDL_buffer[0x300]; // this displaylist caches the IT_LOAD_* commands for swapping out context
|
|
}GX2ContextState_t;
|
|
|
|
static_assert(offsetof(GX2ContextState_t, loadDL_size) == 0x9804);
|
|
static_assert(sizeof(GX2ContextState_t) == 0xA100);
|
|
|
|
uint32 _GX2Context_CalcShadowMemSize(uint32 regCount)
|
|
{
|
|
return (regCount*4+0xFF)&~0xFF;
|
|
}
|
|
|
|
uint32 _GX2Context_CalcStateSize()
|
|
{
|
|
uint32 contextStateSize = 0;
|
|
contextStateSize += _GX2Context_CalcShadowMemSize(GPU7_REG_AREA_SIZE_CONFIG_REG);
|
|
contextStateSize += _GX2Context_CalcShadowMemSize(GPU7_REG_AREA_SIZE_CONTEXT_REG);
|
|
contextStateSize += _GX2Context_CalcShadowMemSize(GPU7_REG_AREA_SIZE_ALU_CONST);
|
|
contextStateSize += _GX2Context_CalcShadowMemSize(GPU7_REG_AREA_SIZE_LOOP_CONST);
|
|
contextStateSize += _GX2Context_CalcShadowMemSize(GPU7_REG_AREA_SIZE_RESOURCE);
|
|
contextStateSize += _GX2Context_CalcShadowMemSize(GPU7_REG_AREA_SIZE_SAMPLER);
|
|
return contextStateSize;
|
|
}
|
|
|
|
void _GX2Context_CreateLoadDL()
|
|
{
|
|
GX2::GX2ReserveCmdSpace(3);
|
|
gx2WriteGather_submitU32AsBE(pm4HeaderType3(IT_CONTEXT_CONTROL, 2));
|
|
gx2WriteGather_submitU32AsBE(0x80000077);
|
|
gx2WriteGather_submitU32AsBE(0x80000077);
|
|
}
|
|
|
|
void _GX2Context_WriteCmdDisableStateShadowing()
|
|
{
|
|
GX2::GX2ReserveCmdSpace(3);
|
|
gx2WriteGather_submitU32AsBE(pm4HeaderType3(IT_CONTEXT_CONTROL, 2));
|
|
gx2WriteGather_submitU32AsBE(0x80000000);
|
|
gx2WriteGather_submitU32AsBE(0x80000000);
|
|
}
|
|
|
|
void _GX2Context_cmdLoad(void* gx2ukn, uint32 pm4Header, MPTR physAddrRegArea, uint32 waitForIdle, uint32 numRegOffsetEntries, GX2RegLoadPktEntry_t* regOffsetEntries)
|
|
{
|
|
GX2::GX2ReserveCmdSpace(3 + numRegOffsetEntries*2);
|
|
gx2WriteGather_submitU32AsBE(pm4Header);
|
|
gx2WriteGather_submitU32AsBE(physAddrRegArea);
|
|
gx2WriteGather_submitU32AsBE(waitForIdle);
|
|
for(uint32 i=0; i<numRegOffsetEntries; i++)
|
|
{
|
|
gx2WriteGather_submitU32AsBE(regOffsetEntries[i].regOffset); // regOffset
|
|
gx2WriteGather_submitU32AsBE(regOffsetEntries[i].regCount); // regCount
|
|
}
|
|
}
|
|
|
|
#define __cmdStateLoad(__gx2State, __pm4Command, __regArea, __waitForIdle, __regLoadPktEntries) _GX2Context_cmdLoad(NULL, pm4HeaderType3(__pm4Command, 2+sizeof(__regLoadPktEntries)/sizeof(__regLoadPktEntries[0])*2), memory_virtualToPhysical(memory_getVirtualOffsetFromPointer(__regArea)), __waitForIdle, sizeof(__regLoadPktEntries)/sizeof(__regLoadPktEntries[0]), __regLoadPktEntries)
|
|
|
|
void _GX2Context_WriteCmdRestoreState(GX2ContextState_t* gx2ContextState, uint32 ukn)
|
|
{
|
|
MPTR physAddrContextState = memory_virtualToPhysical(memory_getVirtualOffsetFromPointer(gx2ContextState));
|
|
_GX2Context_CreateLoadDL();
|
|
__cmdStateLoad(NULL, IT_LOAD_CONFIG_REG, gx2ContextState->hwContext.areaConfigReg, 0x80000000, configReg_loadPktEntries);
|
|
__cmdStateLoad(NULL, IT_LOAD_CONTEXT_REG, gx2ContextState->hwContext.areaContextReg, 0, contextReg_loadPktEntries);
|
|
__cmdStateLoad(NULL, IT_LOAD_ALU_CONST, gx2ContextState->hwContext.areaALUConst, 0, aluConst_loadPktEntries);
|
|
__cmdStateLoad(NULL, IT_LOAD_LOOP_CONST, gx2ContextState->hwContext.areaLoopConst, 0, loopConst_loadPktEntries);
|
|
__cmdStateLoad(NULL, IT_LOAD_RESOURCE, gx2ContextState->hwContext.areaResource, 0, resourceReg_loadPktEntries);
|
|
__cmdStateLoad(NULL, IT_LOAD_SAMPLER, gx2ContextState->hwContext.areaSampler, 0, samplerReg_loadPktEntries);
|
|
}
|
|
|
|
void GX2SetDefaultState()
|
|
{
|
|
GX2::GX2ReserveCmdSpace(0x100);
|
|
|
|
Latte::LATTE_PA_CL_VTE_CNTL reg{};
|
|
reg.set_VPORT_X_OFFSET_ENA(true).set_VPORT_X_SCALE_ENA(true);
|
|
reg.set_VPORT_Y_OFFSET_ENA(true).set_VPORT_Y_SCALE_ENA(true);
|
|
reg.set_VPORT_Z_OFFSET_ENA(true).set_VPORT_Z_SCALE_ENA(true);
|
|
reg.set_VTX_W0_FMT(true);
|
|
gx2WriteGather_submit(pm4HeaderType3(IT_SET_CONTEXT_REG, 1 + 1),
|
|
Latte::REGADDR::PA_CL_VTE_CNTL - 0xA000,
|
|
reg);
|
|
|
|
uint32 stencilTestEnable = GX2_FALSE;
|
|
uint32 backStencilEnable = GX2_FALSE;
|
|
uint32 frontStencilFunc = 0;
|
|
uint32 frontStencilZPass = 0;
|
|
uint32 frontStencilZFail = 0;
|
|
uint32 frontStencilFail = 0;
|
|
uint32 backStencilFunc = 0;
|
|
uint32 backStencilZPass = 0;
|
|
uint32 backStencilZFail = 0;
|
|
uint32 backStencilFail = 0;
|
|
|
|
uint32 depthControlReg = 0;
|
|
// depth stuff
|
|
depthControlReg |= (1<<1);
|
|
depthControlReg |= (1<<2);
|
|
depthControlReg |= ((1&7)<<4);
|
|
// stencil stuff
|
|
depthControlReg |= ((stencilTestEnable&1)<<0);
|
|
depthControlReg |= ((backStencilEnable&1)<<7);
|
|
depthControlReg |= ((frontStencilFunc&7)<<8);
|
|
depthControlReg |= ((frontStencilZPass&7)<<14);
|
|
depthControlReg |= ((frontStencilZFail&7)<<17);
|
|
depthControlReg |= ((frontStencilFail&7)<<11);
|
|
depthControlReg |= ((backStencilFunc&7)<<20);
|
|
depthControlReg |= ((backStencilZPass&7)<<26);
|
|
depthControlReg |= ((backStencilZFail&3)<<29);
|
|
depthControlReg |= ((backStencilFail&7)<<23);
|
|
|
|
gx2WriteGather_submitU32AsBE(pm4HeaderType3(IT_SET_CONTEXT_REG, 1+1));
|
|
gx2WriteGather_submitU32AsBE(Latte::REGADDR::DB_DEPTH_CONTROL-0xA000);
|
|
gx2WriteGather_submitU32AsBE(depthControlReg);
|
|
|
|
GX2::GX2SetAlphaTest(GX2_DISABLE, GX2::GX2_ALPHAFUNC::LESS, 0.0f);
|
|
GX2::GX2SetPolygonControl(Latte::LATTE_PA_SU_SC_MODE_CNTL::E_FRONTFACE::CCW, GX2_DISABLE, GX2_DISABLE, Latte::LATTE_PA_SU_SC_MODE_CNTL::E_POLYGONMODE::UKN0, Latte::LATTE_PA_SU_SC_MODE_CNTL::E_PTYPE::TRIANGLES, Latte::LATTE_PA_SU_SC_MODE_CNTL::E_PTYPE::TRIANGLES, GX2_DISABLE, GX2_DISABLE, GX2_DISABLE);
|
|
GX2::GX2SetPolygonOffset(0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
|
|
GX2::GX2SetPrimitiveRestartIndex(0xffffffff);
|
|
GX2::GX2SetTargetChannelMasks(0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF);
|
|
GX2::GX2SetBlendConstantColor(0.0f, 0.0f, 0.0f, 0.0f);
|
|
GX2::GX2SetPointSize(1.0f, 1.0f);
|
|
GX2::GX2SetPointLimits(1.0f, 1.0f);
|
|
GX2::GX2SetColorControl(GX2::GX2_LOGICOP::COPY, GX2_DISABLE, GX2_DISABLE, GX2_ENABLE);
|
|
GX2::GX2SetRasterizerClipControlEx(true, true, false);
|
|
|
|
// Set clear depth to 1.0 (workaround for Darksiders 2. Investigate whether actual GX2 driver also sets this)
|
|
float depth = 1.0;
|
|
gx2WriteGather_submitU32AsBE(pm4HeaderType3(IT_SET_CONTEXT_REG, 1+1));
|
|
gx2WriteGather_submitU32AsBE(mmDB_DEPTH_CLEAR - 0xA000);
|
|
gx2WriteGather_submitU32AsBE(*(uint32*)&depth); // depth (as float)
|
|
|
|
// reset HLE special states
|
|
for (sint32 i = 0; i <= GX2_SPECIAL_STATE_COUNT; i++)
|
|
{
|
|
gx2WriteGather_submitU32AsBE(pm4HeaderType3(IT_HLE_SPECIAL_STATE, 2));
|
|
gx2WriteGather_submitU32AsBE(i); // state id
|
|
gx2WriteGather_submitU32AsBE(0); // disable
|
|
}
|
|
}
|
|
|
|
void gx2Export_GX2SetDefaultState(PPCInterpreter_t* hCPU)
|
|
{
|
|
cemuLog_log(LogType::GX2, "GX2SetDefaultState()");
|
|
GX2SetDefaultState();
|
|
osLib_returnFromFunction(hCPU, 0);
|
|
}
|
|
|
|
void _GX2ContextCreateRestoreStateDL(GX2ContextState_t* gx2ContextState)
|
|
{
|
|
// begin display list
|
|
cemu_assert_debug(!GX2::GX2GetDisplayListWriteStatus()); // must not already be writing to a display list
|
|
GX2::GX2BeginDisplayList((void*)gx2ContextState->loadDL_buffer, sizeof(gx2ContextState->loadDL_buffer));
|
|
_GX2Context_WriteCmdRestoreState(gx2ContextState, 0);
|
|
uint32 displayListSize = GX2::GX2EndDisplayList((void*)gx2ContextState->loadDL_buffer);
|
|
gx2ContextState->loadDL_size = displayListSize;
|
|
}
|
|
|
|
void gx2Export_GX2SetupContextStateEx(PPCInterpreter_t* hCPU)
|
|
{
|
|
cemuLog_log(LogType::GX2, "GX2SetupContextStateEx(0x{:08x})", hCPU->gpr[3]);
|
|
cemu_assert_debug(hCPU->gpr[4] == 0 || hCPU->gpr[4] == 1);
|
|
|
|
GX2ContextState_t* gx2ContextState = (GX2ContextState_t*)memory_getPointerFromVirtualOffset(hCPU->gpr[3]);
|
|
|
|
uint32 hwContextSize = _GX2Context_CalcStateSize();
|
|
if( hwContextSize != sizeof(GX2HwContextState_t) )
|
|
assert_dbg();
|
|
if( sizeof(GX2HwContextState_t) != 0x9800 )
|
|
assert_dbg(); // GX2 HW context size mismatch
|
|
if( sizeof(GX2ContextState_t) != 0xA100 )
|
|
assert_dbg(); // GX2 context size mismatch
|
|
|
|
memset(gx2ContextState, 0x00, sizeof(GX2ContextState_t));
|
|
gx2ContextState->enableProfling = _swapEndianU32(hCPU->gpr[4]&1);
|
|
_GX2Context_WriteCmdRestoreState(gx2ContextState, 1);
|
|
|
|
gx2CurrentContextStateMPTR = hCPU->gpr[3];
|
|
|
|
_GX2Context_CreateLoadDL();
|
|
GX2SetDefaultState();
|
|
_GX2ContextCreateRestoreStateDL(gx2ContextState);
|
|
|
|
osLib_returnFromFunction(hCPU, 0);
|
|
}
|
|
|
|
void gx2Export_GX2SetContextState(PPCInterpreter_t* hCPU)
|
|
{
|
|
cemuLog_log(LogType::GX2, "GX2SetContextState(0x{:08x})", hCPU->gpr[3]);
|
|
// parameters:
|
|
if( hCPU->gpr[3] == MPTR_NULL )
|
|
{
|
|
// disable state shadowing
|
|
_GX2Context_WriteCmdDisableStateShadowing();
|
|
osLib_returnFromFunction(hCPU, 0);
|
|
return;
|
|
}
|
|
// check if context state changed
|
|
bool boiWorkaround = CafeSystem::GetRPXHashBase() == 0x6BCD618E; // workaround for a bug in Binding of Isaac to avoid flicker
|
|
if( boiWorkaround )
|
|
{
|
|
if( hCPU->gpr[3] != gx2CurrentContextStateMPTR ) // dont reload same state
|
|
{
|
|
GX2ContextState_t* gx2ContextState = (GX2ContextState_t*)memory_getPointerFromVirtualOffset(hCPU->gpr[3]);
|
|
_GX2Context_WriteCmdRestoreState(gx2ContextState, 0);
|
|
_GX2Context_CreateLoadDL();
|
|
// set new context state
|
|
gx2CurrentContextStateMPTR = hCPU->gpr[3];
|
|
}
|
|
else
|
|
{
|
|
// even if it's the same context, make sure state shadowing is enabled.
|
|
_GX2Context_CreateLoadDL();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
GX2ContextState_t* gx2ContextState = (GX2ContextState_t*)memory_getPointerFromVirtualOffset(hCPU->gpr[3]);
|
|
if (gx2ContextState->loadDL_size == 0)
|
|
{
|
|
_GX2Context_CreateLoadDL();
|
|
_GX2Context_WriteCmdRestoreState(gx2ContextState, 0);
|
|
}
|
|
else
|
|
{
|
|
_GX2Context_CreateLoadDL();
|
|
GX2::GX2CallDisplayList(memory_getVirtualOffsetFromPointer(gx2ContextState->loadDL_buffer), gx2ContextState->loadDL_size);
|
|
}
|
|
// set new context state
|
|
gx2CurrentContextStateMPTR = hCPU->gpr[3];
|
|
|
|
}
|
|
// todo: Save/restore GX2 special state as well -> handle this by correctly emulating the state
|
|
osLib_returnFromFunction(hCPU, 0);
|
|
}
|
|
|
|
void gx2Export_GX2GetContextStateDisplayList(PPCInterpreter_t* hCPU)
|
|
{
|
|
cemuLog_log(LogType::GX2, "GX2GetContextStateDisplayList(0x{:08x}, 0x{:08x}, 0x{:08x})", hCPU->gpr[3], hCPU->gpr[4], hCPU->gpr[5]);
|
|
ppcDefineParamStructPtr(gx2ContextState, GX2ContextState_t, 0);
|
|
ppcDefineParamU32BEPtr(displayListPtrOut, 1);
|
|
ppcDefineParamU32BEPtr(displayListSizeOut, 2);
|
|
|
|
*displayListPtrOut = memory_getVirtualOffsetFromPointer(gx2ContextState->loadDL_buffer);
|
|
*displayListSizeOut = gx2ContextState->loadDL_size;
|
|
|
|
osLib_returnFromFunction(hCPU, 0);
|
|
}
|