diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index f8c5a74d44..e40e35a0c8 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -249,7 +249,6 @@ void CTC1() { void CVT_S() { _FdValf_ = (float)_FsValSl_; - _FdValf_ = fpuDouble( _FdValUl_ ); } void CVT_W() { diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index cd30674690..b9fad09fa8 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -26,8 +26,6 @@ namespace DOUBLE void recC_EQ_xmm(int info); void recC_LE_xmm(int info); void recC_LT_xmm(int info); - void recCVT_S_xmm(int info); - void recCVT_W(); void recDIV_S_xmm(int info); void recMADD_S_xmm(int info); void recMADDA_S_xmm(int info); @@ -993,15 +991,16 @@ void recCVT_S_xmm(int info) } } -FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS); +void recCVT_S() +{ + // Float version is fully accurate, no double version + eeFPURecompileCode(recCVT_S_xmm, R5900::Interpreter::OpcodeImpl::COP1::CVT_S, XMMINFO_WRITED | XMMINFO_READS); +} void recCVT_W() { - if (CHECK_FPU_FULL) - { - DOUBLE::recCVT_W(); - return; - } + // Float version is fully accurate, no double version + // If we have the following EmitOP() on the top then it'll get calculated twice when CHECK_FPU_FULL is true // as we also have an EmitOP() at recCVT_W() on iFPUd.cpp. hence we have it below the possible return. EE::Profiler.EmitOp(eeOpcode::CVTW); @@ -1010,26 +1009,23 @@ void recCVT_W() if (regs >= 0) { - if (CHECK_FPU_EXTRA_OVERFLOW) - fpuFloat2(regs); xCVTTSS2SI(eax, xRegisterSSE(regs)); - xMOVMSKPS(edx, xRegisterSSE(regs)); //extract the signs - xAND(edx, 1); // keep only LSB + xMOVD(edx, xRegisterSSE(regs)); } else { xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[_Fs_]]); xMOV(edx, ptr[&fpuRegs.fpr[_Fs_]]); - xSHR(edx, 31); // mov sign to lsb } //kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_] _deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK); - xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative - - xCMP(eax, 0x80000000); // If the result is indefinitive - xCMOVE(eax, edx); // Saturate it + // cvttss2si converts unrepresentable values to 0x80000000, so negative values are already handled. + // So we just need to handle positive values. + xCMP(edx, 0x4f000000); // If the input is greater than INT_MAX + xMOV(edx, 0x7fffffff); + xCMOVGE(eax, edx); // Saturate it //Write the result xMOV(ptr[&fpuRegs.fpr[_Fd_]], eax); diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp index db1f722a19..074de8d8c1 100644 --- a/pcsx2/x86/iFPUd.cpp +++ b/pcsx2/x86/iFPUd.cpp @@ -540,57 +540,10 @@ FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS | XMMINFO_READT); //------------------------------------------------------------------ // CVT.x XMM //------------------------------------------------------------------ -void recCVT_S_xmm(int info) -{ - EE::Profiler.EmitOp(eeOpcode::CVTS_F); - if (info & PROCESS_EE_D) - { - if (info & PROCESS_EE_S) - xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else - xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]); - } - else - { - const int temp = _allocTempXMMreg(XMMT_FPS); - xCVTSI2SS(xRegisterSSE(temp), ptr32[&fpuRegs.fpr[_Fs_]]); - xMOVSS(ptr32[&fpuRegs.fpr[_Fd_]], xRegisterSSE(temp)); - _freeXMMreg(temp); - } -} +// CVT.S: Identical to non-double variant, omitted +// CVT.W: Identical to non-double variant, omitted -FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS); - -void recCVT_W() //called from iFPU.cpp's recCVT_W -{ - EE::Profiler.EmitOp(eeOpcode::CVTW); - int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); - - if (regs >= 0) - { - xCVTTSS2SI(eax, xRegisterSSE(regs)); - xMOVMSKPS(edx, xRegisterSSE(regs)); // extract the signs - xAND(edx, 1); // keep only LSB - } - else - { - xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[_Fs_]]); - xMOV(edx, ptr[&fpuRegs.fpr[_Fs_]]); - xSHR(edx, 31); //mov sign to lsb - } - - //kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_] - _deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK); - - xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative - - xCMP(eax, 0x80000000); // If the result is indefinitive - xCMOVE(eax, edx); // Saturate it - - //Write the result - xMOV(ptr[&fpuRegs.fpr[_Fd_]], eax); -} //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 101feffd0b..e222be6922 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -42,6 +42,7 @@ struct mVU_Globals u32 E4 [4] = __four(0x3933e553); u32 E5 [4] = __four(0x36b63510); u32 E6 [4] = __four(0x353961ac); + u32 I32MAXF [4] = __four(0x4effffff); float FTOI_4 [4] = __four(16.0); float FTOI_12 [4] = __four(4096.0); float FTOI_15 [4] = __four(32768.0); diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 3c1f426313..42fbdc97d4 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -484,23 +484,19 @@ static void mVU_FTOIx(mP, const float* addr, microOpcode opEnum) return; const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); const xmm& t1 = mVU.regAlloc->allocReg(); - const xmm& t2 = mVU.regAlloc->allocReg(); - // Note: For help understanding this algorithm see recVUMI_FTOI_Saturate() - xMOVAPS(t1, Fs); + // cvttps2dq returns 0x8000000 for any unrepresentable values. + // We want it to return 0x8000000 for negative and 0x7fffffff for positive. + // So for unrepresentable positive values, xor with 0xffffffff to turn 0x80000000 into 0x7fffffff. if (addr) xMUL.PS(Fs, ptr128[addr]); + xMOVAPS(t1, Fs); + xPCMP.GTD(t1, ptr128[mVUglob.I32MAXF]); xCVTTPS2DQ(Fs, Fs); - xPXOR(t1, ptr128[mVUglob.signbit]); - xPSRA.D(t1, 31); - xMOVAPS(t2, Fs); - xPCMP.EQD(t2, ptr128[mVUglob.signbit]); - xAND.PS(t1, t2); - xPADD.D(Fs, t1); + xPXOR(Fs, t1); mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(t1); - mVU.regAlloc->clearNeeded(t2); mVU.profiler.EmitOp(opEnum); } pass3