iR5900: Faster FTOI

2025-12-16 04:08:48 +00:00 · 2025-05-23 02:02:58 -05:00 · 2025-05-23 02:02:58 -05:00 · c72e894fc7
commit c72e894fc7
parent 5bc2342d47
5 changed files with 22 additions and 77 deletions
--- a/pcsx2/FPU.cpp
+++ b/pcsx2/FPU.cpp
@ -249,7 +249,6 @@ void CTC1() {

 void CVT_S() {
 	_FdValf_ = (float)_FsValSl_;
-	_FdValf_ = fpuDouble( _FdValUl_ );
 }

 void CVT_W() {
--- a/pcsx2/x86/iFPU.cpp
+++ b/pcsx2/x86/iFPU.cpp
@ -26,8 +26,6 @@ namespace DOUBLE
 	void recC_EQ_xmm(int info);
 	void recC_LE_xmm(int info);
 	void recC_LT_xmm(int info);
-	void recCVT_S_xmm(int info);
-	void recCVT_W();
 	void recDIV_S_xmm(int info);
 	void recMADD_S_xmm(int info);
 	void recMADDA_S_xmm(int info);
@ -993,15 +991,16 @@ void recCVT_S_xmm(int info)
 	}
 }

-FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS);
+void recCVT_S()
+{
+	// Float version is fully accurate, no double version
+	eeFPURecompileCode(recCVT_S_xmm, R5900::Interpreter::OpcodeImpl::COP1::CVT_S, XMMINFO_WRITED | XMMINFO_READS);
+}

 void recCVT_W()
 {
-	if (CHECK_FPU_FULL)
-	{
-		DOUBLE::recCVT_W();
-		return;
-	}
+	// Float version is fully accurate, no double version
+
 	// If we have the following EmitOP() on the top then it'll get calculated twice when CHECK_FPU_FULL is true
 	// as we also have an EmitOP() at recCVT_W() on iFPUd.cpp.  hence we have it below the possible return.
 	EE::Profiler.EmitOp(eeOpcode::CVTW);
@ -1010,26 +1009,23 @@ void recCVT_W()

 	if (regs >= 0)
 	{
-		if (CHECK_FPU_EXTRA_OVERFLOW)
-			fpuFloat2(regs);
 		xCVTTSS2SI(eax, xRegisterSSE(regs));
-		xMOVMSKPS(edx, xRegisterSSE(regs)); //extract the signs
-		xAND(edx, 1); // keep only LSB
+		xMOVD(edx, xRegisterSSE(regs));
 	}
 	else
 	{
 		xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[_Fs_]]);
 		xMOV(edx, ptr[&fpuRegs.fpr[_Fs_]]);
-		xSHR(edx, 31); // mov sign to lsb
 	}

 	//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
 	_deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK);

-	xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative
-
-	xCMP(eax, 0x80000000); // If the result is indefinitive
-	xCMOVE(eax, edx);      // Saturate it
+	// cvttss2si converts unrepresentable values to 0x80000000, so negative values are already handled.
+	// So we just need to handle positive values.
+	xCMP(edx, 0x4f000000); // If the input is greater than INT_MAX
+	xMOV(edx, 0x7fffffff);
+	xCMOVGE(eax, edx);     // Saturate it

 	//Write the result
 	xMOV(ptr[&fpuRegs.fpr[_Fd_]], eax);
--- a/pcsx2/x86/iFPUd.cpp
+++ b/pcsx2/x86/iFPUd.cpp
@ -540,57 +540,10 @@ FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS | XMMINFO_READT);
 //------------------------------------------------------------------
 // CVT.x XMM
 //------------------------------------------------------------------
-void recCVT_S_xmm(int info)
-{
-	EE::Profiler.EmitOp(eeOpcode::CVTS_F);

-	if (info & PROCESS_EE_D)
-	{
-		if (info & PROCESS_EE_S)
-			xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
-		else
-			xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]);
-	}
-	else
-	{
-		const int temp = _allocTempXMMreg(XMMT_FPS);
-		xCVTSI2SS(xRegisterSSE(temp), ptr32[&fpuRegs.fpr[_Fs_]]);
-		xMOVSS(ptr32[&fpuRegs.fpr[_Fd_]], xRegisterSSE(temp));
-		_freeXMMreg(temp);
-	}
-}
+// CVT.S: Identical to non-double variant, omitted
+// CVT.W: Identical to non-double variant, omitted

-FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS);
-
-void recCVT_W() //called from iFPU.cpp's recCVT_W
-{
-	EE::Profiler.EmitOp(eeOpcode::CVTW);
-	int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
-
-	if (regs >= 0)
-	{
-		xCVTTSS2SI(eax, xRegisterSSE(regs));
-		xMOVMSKPS(edx, xRegisterSSE(regs)); // extract the signs
-		xAND(edx, 1);                       // keep only LSB
-	}
-	else
-	{
-		xCVTTSS2SI(eax, ptr32[&fpuRegs.fpr[_Fs_]]);
-		xMOV(edx, ptr[&fpuRegs.fpr[_Fs_]]);
-		xSHR(edx, 31); //mov sign to lsb
-	}
-
-	//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
-	_deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK);
-
-	xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative
-
-	xCMP(eax, 0x80000000); // If the result is indefinitive
-	xCMOVE(eax, edx);      // Saturate it
-
-	//Write the result
-	xMOV(ptr[&fpuRegs.fpr[_Fd_]], eax);
-}
 //------------------------------------------------------------------


--- a/pcsx2/x86/microVU_Misc.h
+++ b/pcsx2/x86/microVU_Misc.h
@ -42,6 +42,7 @@ struct mVU_Globals
 	u32   E4      [4] = __four(0x3933e553);
 	u32   E5      [4] = __four(0x36b63510);
 	u32   E6      [4] = __four(0x353961ac);
+	u32   I32MAXF [4] = __four(0x4effffff);
 	float FTOI_4  [4] = __four(16.0);
 	float FTOI_12 [4] = __four(4096.0);
 	float FTOI_15 [4] = __four(32768.0);
--- a/pcsx2/x86/microVU_Upper.inl
+++ b/pcsx2/x86/microVU_Upper.inl
@ -484,23 +484,19 @@ static void mVU_FTOIx(mP, const float* addr, microOpcode opEnum)
 			return;
 		const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf)));
 		const xmm& t1 = mVU.regAlloc->allocReg();
-		const xmm& t2 = mVU.regAlloc->allocReg();

-		// Note: For help understanding this algorithm see recVUMI_FTOI_Saturate()
-		xMOVAPS(t1, Fs);
+		// cvttps2dq returns 0x8000000 for any unrepresentable values.
+		// We want it to return 0x8000000 for negative and 0x7fffffff for positive.
+		// So for unrepresentable positive values, xor with 0xffffffff to turn 0x80000000 into 0x7fffffff.
 		if (addr)
 			xMUL.PS(Fs, ptr128[addr]);
+		xMOVAPS(t1, Fs);
+		xPCMP.GTD(t1, ptr128[mVUglob.I32MAXF]);
 		xCVTTPS2DQ(Fs, Fs);
-		xPXOR(t1, ptr128[mVUglob.signbit]);
-		xPSRA.D(t1, 31);
-		xMOVAPS(t2, Fs);
-		xPCMP.EQD(t2, ptr128[mVUglob.signbit]);
-		xAND.PS(t1, t2);
-		xPADD.D(Fs, t1);
+		xPXOR(Fs, t1);

 		mVU.regAlloc->clearNeeded(Fs);
 		mVU.regAlloc->clearNeeded(t1);
-		mVU.regAlloc->clearNeeded(t2);
 		mVU.profiler.EmitOp(opEnum);
 	}
 	pass3