mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
iR5900: Faster FTOI
This commit is contained in:
parent
5bc2342d47
commit
c72e894fc7
@ -249,7 +249,6 @@ void CTC1() {
|
|||||||
|
|
||||||
void CVT_S() {
|
void CVT_S() {
|
||||||
_FdValf_ = (float)_FsValSl_;
|
_FdValf_ = (float)_FsValSl_;
|
||||||
_FdValf_ = fpuDouble( _FdValUl_ );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CVT_W() {
|
void CVT_W() {
|
||||||
|
|||||||
@ -26,8 +26,6 @@ namespace DOUBLE
|
|||||||
void recC_EQ_xmm(int info);
|
void recC_EQ_xmm(int info);
|
||||||
void recC_LE_xmm(int info);
|
void recC_LE_xmm(int info);
|
||||||
void recC_LT_xmm(int info);
|
void recC_LT_xmm(int info);
|
||||||
void recCVT_S_xmm(int info);
|
|
||||||
void recCVT_W();
|
|
||||||
void recDIV_S_xmm(int info);
|
void recDIV_S_xmm(int info);
|
||||||
void recMADD_S_xmm(int info);
|
void recMADD_S_xmm(int info);
|
||||||
void recMADDA_S_xmm(int info);
|
void recMADDA_S_xmm(int info);
|
||||||
@ -993,15 +991,16 @@ void recCVT_S_xmm(int info)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS);
|
void recCVT_S()
|
||||||
|
{
|
||||||
|
// Float version is fully accurate, no double version
|
||||||
|
eeFPURecompileCode(recCVT_S_xmm, R5900::Interpreter::OpcodeImpl::COP1::CVT_S, XMMINFO_WRITED | XMMINFO_READS);
|
||||||
|
}
|
||||||
|
|
||||||
void recCVT_W()
|
void recCVT_W()
|
||||||
{
|
{
|
||||||
if (CHECK_FPU_FULL)
|
// Float version is fully accurate, no double version
|
||||||
{
|
|
||||||
DOUBLE::recCVT_W();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// If we have the following EmitOP() on the top then it'll get calculated twice when CHECK_FPU_FULL is true
|
// If we have the following EmitOP() on the top then it'll get calculated twice when CHECK_FPU_FULL is true
|
||||||
// as we also have an EmitOP() at recCVT_W() on iFPUd.cpp. hence we have it below the possible return.
|
// as we also have an EmitOP() at recCVT_W() on iFPUd.cpp. hence we have it below the possible return.
|
||||||
EE::Profiler.EmitOp(eeOpcode::CVTW);
|
EE::Profiler.EmitOp(eeOpcode::CVTW);
|
||||||
@ -1010,26 +1009,23 @@ void recCVT_W()
|
|||||||
|
|
||||||
if (regs >= 0)
|
if (regs >= 0)
|
||||||
{
|
{
|
||||||
if (CHECK_FPU_EXTRA_OVERFLOW)
|
|
||||||
fpuFloat2(regs);
|
|
||||||
xCVTTSS2SI(eax, xRegisterSSE(regs));
|
xCVTTSS2SI(eax, xRegisterSSE(regs));
|
||||||
xMOVMSKPS(edx, xRegisterSSE(regs)); //extract the signs
|
xMOVD(edx, xRegisterSSE(regs));
|
||||||
xAND(edx, 1); // keep only LSB
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[_Fs_]]);
|
xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[_Fs_]]);
|
||||||
xMOV(edx, ptr[&fpuRegs.fpr[_Fs_]]);
|
xMOV(edx, ptr[&fpuRegs.fpr[_Fs_]]);
|
||||||
xSHR(edx, 31); // mov sign to lsb
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
|
//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
|
||||||
_deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK);
|
_deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||||
|
|
||||||
xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative
|
// cvttss2si converts unrepresentable values to 0x80000000, so negative values are already handled.
|
||||||
|
// So we just need to handle positive values.
|
||||||
xCMP(eax, 0x80000000); // If the result is indefinitive
|
xCMP(edx, 0x4f000000); // If the input is greater than INT_MAX
|
||||||
xCMOVE(eax, edx); // Saturate it
|
xMOV(edx, 0x7fffffff);
|
||||||
|
xCMOVGE(eax, edx); // Saturate it
|
||||||
|
|
||||||
//Write the result
|
//Write the result
|
||||||
xMOV(ptr[&fpuRegs.fpr[_Fd_]], eax);
|
xMOV(ptr[&fpuRegs.fpr[_Fd_]], eax);
|
||||||
|
|||||||
@ -540,57 +540,10 @@ FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS | XMMINFO_READT);
|
|||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// CVT.x XMM
|
// CVT.x XMM
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
void recCVT_S_xmm(int info)
|
|
||||||
{
|
|
||||||
EE::Profiler.EmitOp(eeOpcode::CVTS_F);
|
|
||||||
|
|
||||||
if (info & PROCESS_EE_D)
|
// CVT.S: Identical to non-double variant, omitted
|
||||||
{
|
// CVT.W: Identical to non-double variant, omitted
|
||||||
if (info & PROCESS_EE_S)
|
|
||||||
xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
|
||||||
else
|
|
||||||
xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const int temp = _allocTempXMMreg(XMMT_FPS);
|
|
||||||
xCVTSI2SS(xRegisterSSE(temp), ptr32[&fpuRegs.fpr[_Fs_]]);
|
|
||||||
xMOVSS(ptr32[&fpuRegs.fpr[_Fd_]], xRegisterSSE(temp));
|
|
||||||
_freeXMMreg(temp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS);
|
|
||||||
|
|
||||||
void recCVT_W() //called from iFPU.cpp's recCVT_W
|
|
||||||
{
|
|
||||||
EE::Profiler.EmitOp(eeOpcode::CVTW);
|
|
||||||
int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
|
|
||||||
|
|
||||||
if (regs >= 0)
|
|
||||||
{
|
|
||||||
xCVTTSS2SI(eax, xRegisterSSE(regs));
|
|
||||||
xMOVMSKPS(edx, xRegisterSSE(regs)); // extract the signs
|
|
||||||
xAND(edx, 1); // keep only LSB
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[_Fs_]]);
|
|
||||||
xMOV(edx, ptr[&fpuRegs.fpr[_Fs_]]);
|
|
||||||
xSHR(edx, 31); //mov sign to lsb
|
|
||||||
}
|
|
||||||
|
|
||||||
//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
|
|
||||||
_deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK);
|
|
||||||
|
|
||||||
xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative
|
|
||||||
|
|
||||||
xCMP(eax, 0x80000000); // If the result is indefinitive
|
|
||||||
xCMOVE(eax, edx); // Saturate it
|
|
||||||
|
|
||||||
//Write the result
|
|
||||||
xMOV(ptr[&fpuRegs.fpr[_Fd_]], eax);
|
|
||||||
}
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -42,6 +42,7 @@ struct mVU_Globals
|
|||||||
u32 E4 [4] = __four(0x3933e553);
|
u32 E4 [4] = __four(0x3933e553);
|
||||||
u32 E5 [4] = __four(0x36b63510);
|
u32 E5 [4] = __four(0x36b63510);
|
||||||
u32 E6 [4] = __four(0x353961ac);
|
u32 E6 [4] = __four(0x353961ac);
|
||||||
|
u32 I32MAXF [4] = __four(0x4effffff);
|
||||||
float FTOI_4 [4] = __four(16.0);
|
float FTOI_4 [4] = __four(16.0);
|
||||||
float FTOI_12 [4] = __four(4096.0);
|
float FTOI_12 [4] = __four(4096.0);
|
||||||
float FTOI_15 [4] = __four(32768.0);
|
float FTOI_15 [4] = __four(32768.0);
|
||||||
|
|||||||
@ -484,23 +484,19 @@ static void mVU_FTOIx(mP, const float* addr, microOpcode opEnum)
|
|||||||
return;
|
return;
|
||||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
|
||||||
const xmm& t1 = mVU.regAlloc->allocReg();
|
const xmm& t1 = mVU.regAlloc->allocReg();
|
||||||
const xmm& t2 = mVU.regAlloc->allocReg();
|
|
||||||
|
|
||||||
// Note: For help understanding this algorithm see recVUMI_FTOI_Saturate()
|
// cvttps2dq returns 0x8000000 for any unrepresentable values.
|
||||||
xMOVAPS(t1, Fs);
|
// We want it to return 0x8000000 for negative and 0x7fffffff for positive.
|
||||||
|
// So for unrepresentable positive values, xor with 0xffffffff to turn 0x80000000 into 0x7fffffff.
|
||||||
if (addr)
|
if (addr)
|
||||||
xMUL.PS(Fs, ptr128[addr]);
|
xMUL.PS(Fs, ptr128[addr]);
|
||||||
|
xMOVAPS(t1, Fs);
|
||||||
|
xPCMP.GTD(t1, ptr128[mVUglob.I32MAXF]);
|
||||||
xCVTTPS2DQ(Fs, Fs);
|
xCVTTPS2DQ(Fs, Fs);
|
||||||
xPXOR(t1, ptr128[mVUglob.signbit]);
|
xPXOR(Fs, t1);
|
||||||
xPSRA.D(t1, 31);
|
|
||||||
xMOVAPS(t2, Fs);
|
|
||||||
xPCMP.EQD(t2, ptr128[mVUglob.signbit]);
|
|
||||||
xAND.PS(t1, t2);
|
|
||||||
xPADD.D(Fs, t1);
|
|
||||||
|
|
||||||
mVU.regAlloc->clearNeeded(Fs);
|
mVU.regAlloc->clearNeeded(Fs);
|
||||||
mVU.regAlloc->clearNeeded(t1);
|
mVU.regAlloc->clearNeeded(t1);
|
||||||
mVU.regAlloc->clearNeeded(t2);
|
|
||||||
mVU.profiler.EmitOp(opEnum);
|
mVU.profiler.EmitOp(opEnum);
|
||||||
}
|
}
|
||||||
pass3
|
pass3
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user