From 414aa9d6b34108ae2076870a694d8e04d582ad89 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Fri, 3 Apr 2026 16:56:27 +0200 Subject: [PATCH] GX2+Latte: Rework GX2CopySurface --- src/Cafe/CMakeLists.txt | 1 + src/Cafe/HW/Latte/Core/Latte.h | 4 - src/Cafe/HW/Latte/Core/LatteAsyncCommands.cpp | 27 ++ src/Cafe/HW/Latte/Core/LatteAsyncCommands.h | 6 +- .../HW/Latte/Core/LatteCommandProcessor.cpp | 59 +-- src/Cafe/HW/Latte/Core/LatteSurfaceCopy.cpp | 87 +++- src/Cafe/HW/Latte/Core/LatteSurfaceCopy.h | 25 ++ src/Cafe/HW/Latte/Core/LatteTexture.cpp | 1 + src/Cafe/OS/libs/gx2/GX2_Surface_Copy.cpp | 404 ++++++------------ 9 files changed, 285 insertions(+), 329 deletions(-) create mode 100644 src/Cafe/HW/Latte/Core/LatteSurfaceCopy.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index e5e41143..16202b8f 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -133,6 +133,7 @@ add_library(CemuCafe HW/Latte/Core/LatteSoftware.h HW/Latte/Core/LatteStreamoutGPU.cpp HW/Latte/Core/LatteSurfaceCopy.cpp + HW/Latte/Core/LatteSurfaceCopy.h HW/Latte/Core/LatteTextureCache.cpp HW/Latte/Core/LatteTexture.cpp HW/Latte/Core/LatteTexture.h diff --git a/src/Cafe/HW/Latte/Core/Latte.h b/src/Cafe/HW/Latte/Core/Latte.h index 99468801..548944ec 100644 --- a/src/Cafe/HW/Latte/Core/Latte.h +++ b/src/Cafe/HW/Latte/Core/Latte.h @@ -98,10 +98,6 @@ void LatteRenderTarget_itHLECopyColorBufferToScanBuffer(MPTR colorBufferPtr, uin void LatteRenderTarget_unloadAll(); -// surface copy - -void LatteSurfaceCopy_copySurfaceNew(MPTR srcPhysAddr, MPTR srcMipAddr, uint32 srcSwizzle, Latte::E_GX2SURFFMT srcSurfaceFormat, sint32 srcWidth, sint32 srcHeight, sint32 srcDepth, uint32 srcPitch, sint32 srcSlice, Latte::E_DIM srcDim, Latte::E_HWTILEMODE srcTilemode, sint32 srcAA, sint32 srcLevel, MPTR dstPhysAddr, MPTR dstMipAddr, uint32 dstSwizzle, Latte::E_GX2SURFFMT dstSurfaceFormat, sint32 dstWidth, sint32 dstHeight, sint32 dstDepth, uint32 dstPitch, sint32 dstSlice, Latte::E_DIM dstDim, Latte::E_HWTILEMODE dstTilemode, sint32 dstAA, sint32 dstLevel); - // texture cache void LatteTC_Init(); diff --git a/src/Cafe/HW/Latte/Core/LatteAsyncCommands.cpp b/src/Cafe/HW/Latte/Core/LatteAsyncCommands.cpp index 4b114ddf..12873b05 100644 --- a/src/Cafe/HW/Latte/Core/LatteAsyncCommands.cpp +++ b/src/Cafe/HW/Latte/Core/LatteAsyncCommands.cpp @@ -1,6 +1,7 @@ #include "Cafe/HW/Latte/Core/Latte.h" #include "Cafe/HW/Latte/Core/LatteAsyncCommands.h" #include "Cafe/HW/Latte/Core/LatteShader.h" +#include "Cafe/HW/Latte/Core/LatteSurfaceCopy.h" #include "Cafe/HW/Latte/Core/LatteTexture.h" void LatteThread_Exit(); @@ -35,11 +36,19 @@ typedef struct uint64 shaderAuxHash; LatteConst::ShaderType shaderType; }deleteShader; + + struct + { + LatteSurfaceCopyParam src; + LatteSurfaceCopyParam dst; + LatteSurfaceCopyRect rect; + }textureCopy; }; }LatteAsyncCommand_t; #define ASYNC_CMD_FORCE_TEXTURE_READBACK 1 #define ASYNC_CMD_DELETE_SHADER 2 +#define ASYNC_CMD_TEXTURE_COPY 3 std::queue LatteAsyncCommandQueue; @@ -82,6 +91,20 @@ void LatteAsyncCommands_queueDeleteShader(uint64 shaderBaseHash, uint64 shaderAu swl_gpuAsyncCommands.UnlockWrite(); } +void LatteAsyncCommand_queueTextureCopy(const LatteSurfaceCopyParam& src, const LatteSurfaceCopyParam& dst, const LatteSurfaceCopyRect& rect) +{ + LatteAsyncCommand_t asyncCommand = {}; + // setup command + asyncCommand.type = ASYNC_CMD_TEXTURE_COPY; + asyncCommand.textureCopy.src = src; + asyncCommand.textureCopy.dst = dst; + asyncCommand.textureCopy.rect = rect; + + swl_gpuAsyncCommands.LockWrite(); + LatteAsyncCommandQueue.push(asyncCommand); + swl_gpuAsyncCommands.UnlockWrite(); +} + void LatteAsyncCommands_waitUntilAllProcessed() { while (LatteAsyncCommandQueue.empty() == false) @@ -127,6 +150,10 @@ void LatteAsyncCommands_checkAndExecute() { LatteSHRC_RemoveFromCacheByHash(asyncCommand.deleteShader.shaderBaseHash, asyncCommand.deleteShader.shaderAuxHash, asyncCommand.deleteShader.shaderType); } + else if (asyncCommand.type == ASYNC_CMD_TEXTURE_COPY) + { + LatteSurfaceCopy_copySurfaceNew(asyncCommand.textureCopy.src, asyncCommand.textureCopy.dst, asyncCommand.textureCopy.rect); + } else { cemu_assert_unimplemented(); diff --git a/src/Cafe/HW/Latte/Core/LatteAsyncCommands.h b/src/Cafe/HW/Latte/Core/LatteAsyncCommands.h index f3ba80cf..b0859066 100644 --- a/src/Cafe/HW/Latte/Core/LatteAsyncCommands.h +++ b/src/Cafe/HW/Latte/Core/LatteAsyncCommands.h @@ -1,7 +1,11 @@ #pragma once +struct LatteSurfaceCopyParam; +struct LatteSurfaceCopyRect; + void LatteAsyncCommands_queueForceTextureReadback(MPTR physAddr, MPTR mipAddr, uint32 swizzle, sint32 format, sint32 width, sint32 height, sint32 depth, uint32 pitch, uint32 slice, sint32 dim, Latte::E_HWTILEMODE tilemode, sint32 aa, sint32 level); +void LatteAsyncCommand_queueTextureCopy(const LatteSurfaceCopyParam& src, const LatteSurfaceCopyParam& dst, const LatteSurfaceCopyRect& rect); void LatteAsyncCommands_queueDeleteShader(uint64 shaderBaseHash, uint64 shaderAuxHash, LatteConst::ShaderType shaderType); void LatteAsyncCommands_waitUntilAllProcessed(); -void LatteAsyncCommands_checkAndExecute(); \ No newline at end of file +void LatteAsyncCommands_checkAndExecute(); diff --git a/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp b/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp index 963c49f7..ceec8d24 100644 --- a/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp +++ b/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp @@ -11,6 +11,7 @@ #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cafe/HW/Latte/Core/LatteBufferCache.h" #include "Cafe/HW/Latte/Core/LattePM4.h" +#include "Cafe/HW/Latte/Core/LatteSurfaceCopy.h" #include "Cafe/OS/libs/coreinit/coreinit_Time.h" #include "Cafe/OS/libs/TCL/TCL.h" // TCL currently handles the GPU command ringbuffer @@ -805,37 +806,37 @@ LatteCMDPtr LatteCP_itHLEBottomOfPipeCB(LatteCMDPtr cmd, uint32 nWords) // GPU-side handler for GX2CopySurface/GX2CopySurfaceEx and similar LatteCMDPtr LatteCP_itHLECopySurfaceNew(LatteCMDPtr cmd, uint32 nWords) { - cemu_assert_debug(nWords == 26); + cemu_assert_debug(nWords == 4+9*2); + // copy rect + LatteSurfaceCopyRect copyRect; + copyRect.x = LatteReadCMD(); + copyRect.y = LatteReadCMD(); + copyRect.width = LatteReadCMD(); + copyRect.height = LatteReadCMD(); // src - MPTR srcPhysAddr = LatteReadCMD(); - MPTR srcMipAddr = LatteReadCMD(); - uint32 srcSwizzle = LatteReadCMD(); - Latte::E_GX2SURFFMT srcSurfaceFormat = (Latte::E_GX2SURFFMT)LatteReadCMD(); - sint32 srcWidth = LatteReadCMD(); - sint32 srcHeight = LatteReadCMD(); - sint32 srcDepth = LatteReadCMD(); - uint32 srcPitch = LatteReadCMD(); - uint32 srcSlice = LatteReadCMD(); - Latte::E_DIM srcDim = (Latte::E_DIM)LatteReadCMD(); - Latte::E_HWTILEMODE srcTilemode = (Latte::E_HWTILEMODE)LatteReadCMD(); - sint32 srcAA = LatteReadCMD(); - sint32 srcLevel = LatteReadCMD(); + LatteSurfaceCopyParam src{}; + src.physDataAddr = LatteReadCMD(); + src.swizzle = LatteReadCMD(); + src.surfaceFormat = (Latte::E_GX2SURFFMT)LatteReadCMD(); + src.pitch = LatteReadCMD(); + src.heightInTexels = LatteReadCMD(); + src.sliceIndex = LatteReadCMD(); + src.dim = (Latte::E_DIM)LatteReadCMD(); + src.tilemode = (Latte::E_GX2TILEMODE)LatteReadCMD(); + src.aa = LatteReadCMD(); // dst - MPTR dstPhysAddr = LatteReadCMD(); - MPTR dstMipAddr = LatteReadCMD(); - uint32 dstSwizzle = LatteReadCMD(); - Latte::E_GX2SURFFMT dstSurfaceFormat = (Latte::E_GX2SURFFMT)LatteReadCMD(); - sint32 dstWidth = LatteReadCMD(); - sint32 dstHeight = LatteReadCMD(); - sint32 dstDepth = LatteReadCMD(); - uint32 dstPitch = LatteReadCMD(); - uint32 dstSlice = LatteReadCMD(); - Latte::E_DIM dstDim = (Latte::E_DIM)LatteReadCMD(); - Latte::E_HWTILEMODE dstTilemode = (Latte::E_HWTILEMODE)LatteReadCMD(); - sint32 dstAA = LatteReadCMD(); - sint32 dstLevel = LatteReadCMD(); + LatteSurfaceCopyParam dst{}; + dst.physDataAddr = LatteReadCMD(); + dst.swizzle = LatteReadCMD(); + dst.surfaceFormat = (Latte::E_GX2SURFFMT)LatteReadCMD(); + dst.pitch = LatteReadCMD(); + dst.heightInTexels = LatteReadCMD(); + dst.sliceIndex = LatteReadCMD(); + dst.dim = (Latte::E_DIM)LatteReadCMD(); + dst.tilemode = (Latte::E_GX2TILEMODE)LatteReadCMD(); + dst.aa = LatteReadCMD(); - LatteSurfaceCopy_copySurfaceNew(srcPhysAddr, srcMipAddr, srcSwizzle, srcSurfaceFormat, srcWidth, srcHeight, srcDepth, srcPitch, srcSlice, srcDim, srcTilemode, srcAA, srcLevel, dstPhysAddr, dstMipAddr, dstSwizzle, dstSurfaceFormat, dstWidth, dstHeight, dstDepth, dstPitch, dstSlice, dstDim, dstTilemode, dstAA, dstLevel); + LatteSurfaceCopy_copySurfaceNew(src, dst, copyRect); return cmd; } @@ -1921,4 +1922,4 @@ void LatteCP_DebugPrintCmdBuffer(uint32be* bufferPtr, uint32 size) } } } -#endif \ No newline at end of file +#endif diff --git a/src/Cafe/HW/Latte/Core/LatteSurfaceCopy.cpp b/src/Cafe/HW/Latte/Core/LatteSurfaceCopy.cpp index 45be6843..fe52547f 100644 --- a/src/Cafe/HW/Latte/Core/LatteSurfaceCopy.cpp +++ b/src/Cafe/HW/Latte/Core/LatteSurfaceCopy.cpp @@ -3,22 +3,65 @@ #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LatteDefaultShaders.h" #include "Cafe/HW/Latte/Core/LatteTexture.h" +#include "Cafe/HW/Latte/Core/LatteSurfaceCopy.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" -void LatteSurfaceCopy_copySurfaceNew(MPTR srcPhysAddr, MPTR srcMipAddr, uint32 srcSwizzle, Latte::E_GX2SURFFMT srcSurfaceFormat, sint32 srcWidth, sint32 srcHeight, sint32 srcDepth, uint32 srcPitch, sint32 srcSlice, Latte::E_DIM srcDim, Latte::E_HWTILEMODE srcTilemode, sint32 srcAA, sint32 srcLevel, MPTR dstPhysAddr, MPTR dstMipAddr, uint32 dstSwizzle, Latte::E_GX2SURFFMT dstSurfaceFormat, sint32 dstWidth, sint32 dstHeight, sint32 dstDepth, uint32 dstPitch, sint32 dstSlice, Latte::E_DIM dstDim, Latte::E_HWTILEMODE dstTilemode, sint32 dstAA, sint32 dstLevel) +/* Surface copies are tricky to handle because we simulate unified memory on top of two separate memory systems: RAM and VRAM + * Cemu may only have texture data in either RAM or VRAM, or both. And doing transfers to or from CPU can quickly become prohibitively expensive + * So we need to make best guesses on where the data needs to come from and where it needs to go + * A complicating factor is that texture copies are more like memcpy, in the sense that they don't care about the actual underlying pixel format + * For example a R32F texture can be copied as RGBA8. Similiarly, different tile modes can sometimes be identical under specific circumstances + */ + +void gx2SurfaceCopySoftware( + uint8* inputData, sint32 surfSrcHeight, sint32 srcPitch, sint32 srcDepth, uint32 srcSlice, uint32 srcSwizzle, uint32 srcHwTileMode, + uint8* outputData, sint32 surfDstHeight, sint32 dstPitch, sint32 dstDepth, uint32 dstSlice, uint32 dstSwizzle, uint32 dstHwTileMode, + uint32 copyWidth, uint32 copyHeight, uint32 copyBpp); + +void LatteSurfaceCopy_CopyInRAM(const LatteSurfaceCopyParam& src, const LatteSurfaceCopyParam& dst, const LatteSurfaceCopyRect& rect) { - // check if source is within valid mip range - if (srcDim == Latte::E_DIM::DIM_3D && (srcDepth >> srcLevel) == 0 && (srcWidth >> srcLevel) == 0 && (srcHeight >> srcLevel) == 0) - return; - else if ((srcWidth >> srcLevel) == 0 && (srcHeight >> srcLevel) == 0) + Latte::E_HWSURFFMT dstHwFormat = Latte::GetHWFormat(dst.surfaceFormat); + + sint32 copyWidth = rect.width; + sint32 copyHeight = rect.height; + if (Latte::IsCompressedFormat(dstHwFormat)) + { + copyWidth = (copyWidth + 3) / 4; + copyHeight = (copyHeight + 3) / 4; + } + + uint32 dstBpp = Latte::GetFormatBits(dstHwFormat); + + gx2SurfaceCopySoftware((uint8*)MEMPTR(src.physDataAddr).GetPtr(), src.heightInTexels, src.pitch, 1, src.sliceIndex, src.swizzle, (uint32)src.tilemode, + (uint8*)MEMPTR(dst.physDataAddr).GetPtr(), dst.heightInTexels, dst.pitch, 1, dst.sliceIndex, dst.swizzle, (uint32)dst.tilemode, + copyWidth, copyHeight, dstBpp); +} + +void LatteSurfaceCopy_copySurfaceNew(const LatteSurfaceCopyParam& src, const LatteSurfaceCopyParam& dst, const LatteSurfaceCopyRect& rect) +{ + cemu_assert_debug(rect.x == 0 && rect.y == 0); // origin offset not yet supported + + cemu_assert_debug((rect.x + rect.width) <= dst.pitch * (Latte::IsCompressedFormat(dst.surfaceFormat)?4:1)); + cemu_assert_debug((rect.x + rect.width) <= src.pitch * (Latte::IsCompressedFormat(src.surfaceFormat)?4:1)); + + if (src.tilemode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL || dst.tilemode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL) + { + // todo - it's technically possible for a matching linear texture to be in the texture cache already + // there is also a case of tiled to linear_special where we should trigger a readback without an actual destination texture + LatteSurfaceCopy_CopyInRAM(src, dst, rect); return; + } + // look up source texture + // todo - for non-zero slices heightInTexels matters (used to calculate the slice size). We should take this into account during lookup LatteTexture* sourceTexture = nullptr; - LatteTextureView* sourceView = LatteTC_GetTextureSliceViewOrTryCreate(srcPhysAddr, srcMipAddr, srcSurfaceFormat, srcTilemode, srcWidth, srcHeight, srcDepth, srcPitch, srcSwizzle, srcSlice, srcLevel); + LatteTextureView* sourceView = LatteTC_GetTextureSliceViewOrTryCreate(src.physDataAddr, MPTR_NULL, src.surfaceFormat, Latte::MakeHWTileMode(src.tilemode), rect.x + rect.width, rect.y + rect.height, 1, src.pitch, src.swizzle, src.sliceIndex, 0); if (sourceView == nullptr) { - debug_printf("HLECopySurface(): Source texture is not in list of dynamic textures\n"); + // source texture doesn't exist (yet) in texture cache + // operate on RAM instead + LatteSurfaceCopy_CopyInRAM(src, dst, rect); return; } sourceTexture = sourceView->baseTexture; @@ -29,15 +72,14 @@ void LatteSurfaceCopy_copySurfaceNew(MPTR srcPhysAddr, MPTR srcMipAddr, uint32 s } // look up destination texture LatteTexture* destinationTexture = nullptr; - LatteTextureView* destinationView = LatteTextureViewLookupCache::lookupSlice(dstPhysAddr, dstWidth, dstHeight, dstPitch, dstLevel, dstSlice, dstSurfaceFormat); + LatteTextureView* destinationView = LatteTextureViewLookupCache::lookupSliceMinSize(dst.physDataAddr, rect.x + rect.width, rect.y + rect.height, dst.pitch, 0, dst.sliceIndex, dst.surfaceFormat); + // todo - Instead of lookupSliceMinSize lookup the base texture by data range instead and return mip/slice index if (destinationView) destinationTexture = destinationView->baseTexture; - // create destination texture if it doesnt exist if (!destinationTexture) { - LatteTexture* renderTargetConf = nullptr; - destinationView = LatteTexture_CreateMapping(dstPhysAddr, dstMipAddr, dstWidth, dstHeight, dstDepth, dstPitch, dstTilemode, dstSwizzle, dstLevel, 1, dstSlice, 1, dstSurfaceFormat, dstDim, Latte::IsMSAA(dstDim) ? Latte::E_DIM::DIM_2D_MSAA : Latte::E_DIM::DIM_2D, false); + destinationView = LatteTexture_CreateMapping(dst.physDataAddr, MPTR_NULL, rect.x + rect.width, rect.y + rect.height, 1, dst.pitch, Latte::MakeHWTileMode(dst.tilemode), dst.swizzle, 0, 1, dst.sliceIndex, 1, dst.surfaceFormat, dst.dim, Latte::IsMSAA(dst.dim) ? Latte::E_DIM::DIM_2D_MSAA : Latte::E_DIM::DIM_2D, false); destinationTexture = destinationView->baseTexture; } // copy texture @@ -46,15 +88,12 @@ void LatteSurfaceCopy_copySurfaceNew(MPTR srcPhysAddr, MPTR srcMipAddr, uint32 s // mark source and destination texture as still in use LatteTC_MarkTextureStillInUse(destinationTexture); LatteTC_MarkTextureStillInUse(sourceTexture); - sint32 realSrcSlice = srcSlice; + sint32 realSrcSlice = src.sliceIndex; if (LatteTexture_doesEffectiveRescaleRatioMatch(sourceTexture, sourceView->firstMip, destinationTexture, destinationView->firstMip)) { - // adjust copy size - sint32 copyWidth = std::max(srcWidth >> srcLevel, 1); - sint32 copyHeight = std::max(srcHeight >> srcLevel, 1); - // use the smaller width/height as copy size - copyWidth = std::min(copyWidth, std::max(dstWidth >> dstLevel, 1)); - copyHeight = std::min(copyHeight, std::max(dstHeight >> dstLevel, 1)); + cemu_assert_debug(rect.x == 0 && rect.y == 0); + sint32 copyWidth = rect.width; + sint32 copyHeight = rect.height; sint32 effectiveCopyWidth = copyWidth; sint32 effectiveCopyHeight = copyHeight; LatteTexture_scaleToEffectiveSize(sourceTexture, &effectiveCopyWidth, &effectiveCopyHeight, 0); @@ -77,11 +116,19 @@ void LatteSurfaceCopy_copySurfaceNew(MPTR srcPhysAddr, MPTR srcMipAddr, uint32 s } else debug_printf("Source or destination texture does not exist\n"); - - // download destination texture if it matches known accessed formats + // if the texture is updated from a tiled to a linear format it's a strong indicator for CPU reads + // in which case we should sync the texture back to CPU RAM + const bool sourceIsLinear = sourceTexture->tileMode == Latte::E_HWTILEMODE::TM_LINEAR_ALIGNED || sourceTexture->tileMode == Latte::E_HWTILEMODE::TM_LINEAR_GENERAL; + const bool destinationIsLinear = destinationTexture->tileMode == Latte::E_HWTILEMODE::TM_LINEAR_ALIGNED || destinationTexture->tileMode == Latte::E_HWTILEMODE::TM_LINEAR_GENERAL; + bool shouldReadback = !sourceIsLinear && destinationIsLinear; + // special case for Bayonetta 2 if (destinationTexture->width == 8 && destinationTexture->height == 8 && destinationTexture->tileMode == Latte::E_HWTILEMODE::TM_1D_TILED_THIN1) { cemuLog_logDebug(LogType::Force, "Texture readback after copy for Bayonetta 2 (phys: 0x{:08x})", destinationTexture->physAddress); + shouldReadback = true; + } + if (shouldReadback) + { LatteTextureReadback_Initate(destinationView); } } diff --git a/src/Cafe/HW/Latte/Core/LatteSurfaceCopy.h b/src/Cafe/HW/Latte/Core/LatteSurfaceCopy.h new file mode 100644 index 00000000..f817f75d --- /dev/null +++ b/src/Cafe/HW/Latte/Core/LatteSurfaceCopy.h @@ -0,0 +1,25 @@ +#pragma once + +struct LatteSurfaceCopyParam +{ + // effective parameters (with mip index baked into them) + MPTR physDataAddr; // points to actual mip + uint32 swizzle; + Latte::E_GX2SURFFMT surfaceFormat; + sint32 heightInTexels; + uint32 pitch; + Latte::E_DIM dim; + Latte::E_GX2TILEMODE tilemode; + sint32 aa; + sint32 sliceIndex; +}; + +struct LatteSurfaceCopyRect +{ + uint32 x; + uint32 y; + uint32 width; // in pixels + uint32 height; +}; + +void LatteSurfaceCopy_copySurfaceNew(const LatteSurfaceCopyParam& src, const LatteSurfaceCopyParam& dst, const LatteSurfaceCopyRect& rect); \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Core/LatteTexture.cpp b/src/Cafe/HW/Latte/Core/LatteTexture.cpp index 4445fb26..c2d4f05b 100644 --- a/src/Cafe/HW/Latte/Core/LatteTexture.cpp +++ b/src/Cafe/HW/Latte/Core/LatteTexture.cpp @@ -1138,6 +1138,7 @@ void LatteTC_LookupTexturesByPhysAddr(MPTR physAddr, std::vector& } } +// return or create a view, requires existing base texture. Returns nullptr if it doesn't exist yet LatteTextureView* LatteTC_GetTextureSliceViewOrTryCreate(MPTR srcImagePtr, MPTR srcMipPtr, Latte::E_GX2SURFFMT srcFormat, Latte::E_HWTILEMODE srcTileMode, uint32 srcWidth, uint32 srcHeight, uint32 srcDepth, uint32 srcPitch, uint32 srcSwizzle, uint32 srcSlice, uint32 srcMip, const bool requireExactResolution) { LatteTextureView* sourceView; diff --git a/src/Cafe/OS/libs/gx2/GX2_Surface_Copy.cpp b/src/Cafe/OS/libs/gx2/GX2_Surface_Copy.cpp index f52f38e7..e4ac4443 100644 --- a/src/Cafe/OS/libs/gx2/GX2_Surface_Copy.cpp +++ b/src/Cafe/OS/libs/gx2/GX2_Surface_Copy.cpp @@ -4,6 +4,7 @@ #include "Cafe/HW/Latte/Core/Latte.h" #include "Cafe/HW/Latte/Core/LatteDraw.h" #include "Cafe/HW/Latte/Core/LatteAsyncCommands.h" +#include "Cafe/HW/Latte/Core/LatteSurfaceCopy.h" #include "Cafe/HW/Latte/LatteAddrLib/LatteAddrLib.h" #include "util/highresolutiontimer/HighResolutionTimer.h" #include "GX2.h" @@ -127,6 +128,11 @@ void gx2SurfaceCopySoftware( uint8* outputData, sint32 surfDstHeight, sint32 dstPitch, sint32 dstDepth, uint32 dstSlice, uint32 dstSwizzle, uint32 dstHwTileMode, uint32 copyWidth, uint32 copyHeight, uint32 copyBpp) { + if (srcHwTileMode == 16) + srcHwTileMode = 0; + if (dstHwTileMode == 16) + dstHwTileMode = 0; + if (srcHwTileMode == 4 && dstHwTileMode == 4 && (copyWidth & 7) == 0 && (copyHeight & 7) == 0 && copyBpp <= 32) // todo - check sample == 1 { gx2SurfaceCopySoftware_fastPath_tm4Copy(inputData, surfSrcHeight, srcPitch, srcDepth, srcSlice, srcSwizzle, outputData, surfDstHeight, dstPitch, dstDepth, dstSlice, dstSwizzle, copyWidth, copyHeight, copyBpp); @@ -147,7 +153,19 @@ void gx2SurfaceCopySoftware( cemu_assert_debug(false); } -void gx2Surface_GX2CopySurface(GX2Surface* srcSurface, uint32 srcMip, uint32 srcSlice, GX2Surface* dstSurface, uint32 dstMip, uint32 dstSlice) +// Surface copy handling is complicated because of having to simulate unified memory +// GX2CopySurface supports two modes: +// - When source or destination surface have a tilemode of LINEAR_SPECIAL (16) -> Copy is done synchronously on the CPU +// - In all other cases -> Copy is done on the GPU via draw commands + +// But in Cemu things are more complicated, we generally can't do pure CPU copies because a surface's texture data +// may only exist in VRAM right now. So we always need to delegate copying to the renderer thread (which has access to the texture cache) + +// In Cemu we thus handle it like this: +// For GX2 CPU copies -> Submit as async command to the renderer (will be processed asap) and stall until completed +// For GX2 GPU copies -> Submit as HLE command to Latte's command queue to be executed in order + +void GX2CopySurfaceInternal(GX2Surface* srcSurface, uint32 srcMip, uint32 srcSlice, GX2Surface* dstSurface, uint32 dstMip, uint32 dstSlice) { sint32 dstWidth = dstSurface->width; sint32 dstHeight = dstSurface->height; @@ -167,8 +185,6 @@ void gx2Surface_GX2CopySurface(GX2Surface* srcSurface, uint32 srcMip, uint32 src // handle format Latte::E_GX2SURFFMT srcFormat = srcSurface->format; Latte::E_GX2SURFFMT dstFormat = dstSurface->format; - uint32 srcBPP = Latte::GetFormatBits(srcFormat); - uint32 dstBPP = Latte::GetFormatBits(dstFormat); auto srcHwFormat = Latte::GetHWFormat(srcFormat); auto dstHwFormat = Latte::GetHWFormat(dstFormat); // get texture info @@ -182,173 +198,130 @@ void gx2Surface_GX2CopySurface(GX2Surface* srcSurface, uint32 srcMip, uint32 src debug_printf("GX2CopySurface(): mip count is 0\n"); return; } - // get input pointer - uint8* inputData = NULL; - cemu_assert(srcMip < srcSurface->numLevels); - if( srcMip == 0 ) - inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->imagePtr); - else if( srcMip == 1 ) - inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr); - else - { - inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr + srcSurface->mipOffset[srcMip - 1]); - } - // get output pointer - uint8* outputData = NULL; - cemu_assert(dstMip < dstSurface->numLevels); - if( dstMip == 0 ) - outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->imagePtr); - else if( dstMip == 1 ) - outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr); - else - { - outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr + dstSurface->mipOffset[dstMip - 1]); - } - + // make sure formats are compatible if( srcHwFormat != dstHwFormat ) { // mismatching format - cemuLog_logDebug(LogType::Force, "GX2CopySurface(): Format mismatch"); + cemuLog_logDebug(LogType::Force, "GX2CopySurface(): Format mismatch (src=0x{:04x} dst=0x{:04x})", (sint32)srcFormat, (sint32)dstFormat); return; } - - // note: Do not trust values from the input GX2Surface* structs but rely on surfOutDst/surfOutSrc instead if possible. - // src + // get input pointer + cemu_assert(srcMip < srcSurface->numLevels); + uint8* srcDataPtr = nullptr; + if( srcMip == 0 ) + srcDataPtr = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->imagePtr); + else if( srcMip == 1 ) + srcDataPtr = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr); + else + srcDataPtr = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr + srcSurface->mipOffset[srcMip - 1]); + // get output pointer + cemu_assert(dstMip < dstSurface->numLevels); + uint8* dstDataPtr = nullptr; + if( dstMip == 0 ) + dstDataPtr = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->imagePtr); + else if( dstMip == 1 ) + dstDataPtr = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr); + else + dstDataPtr = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr + dstSurface->mipOffset[dstMip - 1]); + // note: pitch is taken from surfOutSrc/surfOutDst, which may different from the pitch stored in the input surface structs uint32 srcPitch = surfOutSrc.pitch; - uint32 srcSwizzle = srcSurface->swizzle; - uint32 srcHwTileMode = (uint32)surfOutSrc.hwTileMode; - uint32 srcDepth = std::max(surfOutSrc.depth, 1); - if (srcHwTileMode == 0) // linear + Latte::E_GX2TILEMODE srcTilemode = (srcSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL) ? srcSurface->tileMode.value() : (Latte::E_GX2TILEMODE)surfOutSrc.hwTileMode; + Latte::E_GX2TILEMODE dstTilemode = (dstSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL) ? dstSurface->tileMode.value() : (Latte::E_GX2TILEMODE)surfOutDst.hwTileMode; + + if (srcTilemode == Latte::E_GX2TILEMODE::TM_LINEAR_GENERAL) { + // todo - why is this necessary? GX2CalculateSurfaceInfo should already handle this srcPitch = srcSurface->pitch >> srcMip; srcPitch = std::max(srcPitch, 1); } - // dst - uint32 dstPitch = surfOutDst.pitch; - uint32 dstSwizzle = dstSurface->swizzle; - uint32 dstHwTileMode = (uint32)surfOutDst.hwTileMode; - uint32 dstDepth = std::max(surfOutDst.depth, 1); - uint32 dstBpp = surfOutDst.bpp; + uint32 copyWidth = std::max(dstWidth>>dstMip, 1); + uint32 copyHeight = std::max(dstHeight>>dstMip, 1); - //debug_printf("Src Tex: %08X %dx%d Swizzle: %08x tm: %d fmt: %04x use: %02x\n", _swapEndianU32(srcSurface->imagePtr), _swapEndianU32(srcSurface->width), _swapEndianU32(srcSurface->height), _swapEndianU32(srcSurface->swizzle), _swapEndianU32(srcSurface->tileMode), _swapEndianU32(srcSurface->format), (uint32)srcSurface->resFlag); - //debug_printf("Dst Tex: %08X %dx%d Swizzle: %08x tm: %d fmt: %04x use: %02x\n", _swapEndianU32(dstSurface->imagePtr), _swapEndianU32(dstSurface->width), _swapEndianU32(dstSurface->height), _swapEndianU32(dstSurface->swizzle), _swapEndianU32(dstSurface->tileMode), _swapEndianU32(dstSurface->format), (uint32)dstSurface->resFlag); + cemu_assert(copyWidth <= std::max(srcWidth>>srcMip, 1)); + cemu_assert(copyHeight <= std::max(srcHeight>>srcMip, 1)); - bool requestGPURAMCopy = false; - bool debugTestForceCPUCopy = false; + cemuLog_log(LogType::GX2, "GX2CopySurface:"); + cemuLog_log(LogType::GX2,"srcSurface: imagePtr=0x{:08x} mipPtr=0x{:08x} swizzle=0x{:06x} width={} height={} depth={} pitch=0x{:x} tilemode=0x{:x} format=0x{:x} mip={} slice={}", + (uint32)srcSurface->imagePtr, (uint32)srcSurface->mipPtr, (uint32)srcSurface->swizzle, (uint32)srcSurface->width, (uint32)srcSurface->height, (uint32)srcSurface->depth, (uint32)srcSurface->pitch, + (uint32)srcSurface->tileMode.value(), (uint32)srcSurface->format.value(), srcMip, srcSlice); + cemuLog_log(LogType::GX2, "dstSurface: imagePtr=0x{:08x} mipPtr=0x{:08x} swizzle=0x{:06x} width={} height={} depth={} pitch=0x{:x} tilemode=0x{:x} format=0x{:x} mip={} slice={}", + (uint32)dstSurface->imagePtr, (uint32)dstSurface->mipPtr, (uint32)dstSurface->swizzle, (uint32)dstSurface->width, (uint32)dstSurface->height, (uint32)dstSurface->depth, (uint32)dstSurface->pitch, + (uint32)dstSurface->tileMode.value(), (uint32)dstSurface->format.value(), dstMip, dstSlice); - if (srcSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL && dstSurface->tileMode == Latte::E_GX2TILEMODE::TM_2D_TILED_THIN1) - debugTestForceCPUCopy = true; - - if (srcSurface->tileMode == Latte::E_GX2TILEMODE::TM_2D_TILED_THIN1 && dstSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL ) + bool isGX2CPUCopy = srcSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL || dstSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL; + if (isGX2CPUCopy) { - LatteAsyncCommands_queueForceTextureReadback( - srcSurface->imagePtr, - srcSurface->mipPtr, - srcSurface->swizzle, - (uint32)srcSurface->format.value(), - srcSurface->width, - srcSurface->height, - srcSurface->depth, - srcSurface->pitch, - srcSlice, - (uint32)srcSurface->dim.value(), - Latte::MakeHWTileMode(srcSurface->tileMode), - srcSurface->aa, - srcMip); + LatteSurfaceCopyParam srcCopy{}; + srcCopy.physDataAddr = MEMPTR(srcDataPtr).GetMPTR(); + srcCopy.swizzle = srcSurface->swizzle; + srcCopy.surfaceFormat = srcSurface->format; + srcCopy.heightInTexels = surfOutSrc.height; + srcCopy.pitch = surfOutSrc.pitch; + srcCopy.dim = srcSurface->dim; + srcCopy.tilemode = srcTilemode; + srcCopy.aa = srcSurface->aa; + srcCopy.sliceIndex = (sint32)srcSlice; + LatteSurfaceCopyParam dstCopy{}; + dstCopy.physDataAddr = MEMPTR(dstDataPtr).GetMPTR(); + dstCopy.swizzle = dstSurface->swizzle; + dstCopy.surfaceFormat = dstSurface->format; + dstCopy.heightInTexels = surfOutDst.height; + dstCopy.pitch = surfOutDst.pitch; + dstCopy.dim = dstSurface->dim; + dstCopy.tilemode = dstTilemode; + dstCopy.aa = dstSurface->aa; + dstCopy.sliceIndex = (sint32)dstSlice; + + LatteSurfaceCopyRect copyRect{}; + copyRect.x = 0; + copyRect.y = 0; + copyRect.width = copyWidth; + copyRect.height = copyHeight; + + LatteAsyncCommand_queueTextureCopy(srcCopy, dstCopy, copyRect); + // cpu copies have to be finished by the time GX2CopySurface returns + // since we delegate the copy to the Latte thread, we have to wait for it to finish here LatteAsyncCommands_waitUntilAllProcessed(); - - debugTestForceCPUCopy = true; - } - - // send copy command to GPU - if( srcHwTileMode > 0 && srcHwTileMode < 16 && dstHwTileMode > 0 && dstHwTileMode < 16 || requestGPURAMCopy ) - { - GX2::GX2ReserveCmdSpace(1+13*2); - - gx2WriteGather_submit(pm4HeaderType3(IT_HLE_COPY_SURFACE_NEW, 13*2), - // src - (uint32)srcSurface->imagePtr, - (uint32)srcSurface->mipPtr, - (uint32)srcSurface->swizzle, - (uint32)srcSurface->format.value(), - (uint32)srcSurface->width, - (uint32)srcSurface->height, - (uint32)srcSurface->depth, - (uint32)srcSurface->pitch, - srcSlice, - (uint32)srcSurface->dim.value(), - (uint32)srcSurface->tileMode.value(), - (uint32)srcSurface->aa, - srcMip, - // dst - (uint32)dstSurface->imagePtr, - (uint32)dstSurface->mipPtr, - (uint32)dstSurface->swizzle, - (uint32)dstSurface->format.value(), - (uint32)dstSurface->width, - (uint32)dstSurface->height, - (uint32)dstSurface->depth, - (uint32)dstSurface->pitch, - dstSlice, - (uint32)dstSurface->dim.value(), - (uint32)dstSurface->tileMode.value(), - (uint32)dstSurface->aa, - dstMip); - } - - if (requestGPURAMCopy) - return; // if RAM copy happens on the GPU side we skip it here - - // manually exclude expensive CPU texture copies for some known game framebuffer textures - // todo - find a better way to solve this - bool isDynamicTexCopy = false; - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width >= 800 && srcFormat == Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT); // SM3DW - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width >= 800 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM); // Trine 2 - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 0xA0 && srcFormat == Latte::E_GX2SURFFMT::R32_FLOAT); // Little Inferno - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 1280 && srcFormat == Latte::E_GX2SURFFMT::R32_FLOAT); // Donkey Kong Tropical Freeze - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 640 && srcSurface->height == 320 && srcFormat == Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT); // SM3DW Switch Scramble Circus - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM && srcSurface->tileMode != Latte::E_GX2TILEMODE::TM_LINEAR_ALIGNED ); // Affordable Space Adventures - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 854 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM); // Affordable Space Adventures - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 1152 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT && (srcSurface->resFlag&GX2_RESFLAG_USAGE_COLOR_BUFFER) != 0 ); // Star Fox Zero - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 680 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT && (srcSurface->resFlag&GX2_RESFLAG_USAGE_COLOR_BUFFER) != 0 ); // Star Fox Zero - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT ); // Qube - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 322 && srcSurface->height == 182 && srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM ); // Qube - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 640 && srcSurface->height == 360 && srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT ); // Qube - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1920 && srcSurface->height == 1080 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM && dstSurface->resFlag == 0x80000003); // Cosmophony - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 854 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM && dstSurface->resFlag == 0x3); // Cosmophony - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB && dstSurface->resFlag == 0x3); // The Fall - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 854 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB && dstSurface->resFlag == 0x3); // The Fall - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB && dstSurface->resFlag == 0x80000003); // The Fall - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB && srcSurface->resFlag == 0x80000003); // Nano Assault Neo - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM); // Mario Party 10 - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 854 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM); // Mario Party 10 - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1920 && srcSurface->height == 1080 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM && dstSurface->resFlag == 0x3); // Hello Kitty Kruisers - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1024 && srcSurface->height == 1024 && srcFormat == Latte::E_GX2SURFFMT::R32_FLOAT && dstSurface->resFlag == 0x5); // Art Academy - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 260 && srcSurface->height == 148 && srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT && dstSurface->resFlag == 0x3); // Transformers: Rise of the Dark Spark - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1040 && srcSurface->height == 592 && srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT && dstSurface->resFlag == 0x3); // Transformers: Rise of the Dark Spark - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 854 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB && srcSurface->resFlag == 0x3); // Nano Assault Neo - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1024 && srcSurface->height == 576 && srcFormat == Latte::E_GX2SURFFMT::D24_S8_UNORM && srcSurface->resFlag == 0x1); // Skylanders SuperChargers - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1152 && srcSurface->height == 648 && (srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM || srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT) && srcSurface->resFlag == 0x1); // Watch Dogs - isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 576 && srcSurface->height == 324 && (srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM || srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT) && srcSurface->resFlag == 0x1); // Watch Dogs - - if( isDynamicTexCopy && debugTestForceCPUCopy == false) - { - debug_printf("Software tex copy blocked\n"); return; } - sint32 copyWidth = dstMipWidth; - sint32 copyHeight = dstMipHeight; - if (Latte::IsCompressedFormat(dstHwFormat)) - { - copyWidth = (copyWidth + 3) / 4; - copyHeight = (copyHeight + 3) / 4; - } + // copy via GPU commands + // for simplicity and performance Cemu uses a HLE command to handle surface copies, + // a more accurate implementation would setup actual drawcalls to copy the texture data + GX2::GX2ReserveCmdSpace(23); + gx2WriteGather_submit(pm4HeaderType3(IT_HLE_COPY_SURFACE_NEW, 4+9*2), + // copy rect + (uint32)0, // x + (uint32)0, // y + (uint32)copyWidth, + (uint32)copyHeight, + // src + (uint32)MEMPTR(srcDataPtr).GetMPTR(), + (uint32)srcSurface->swizzle, + (uint32)srcSurface->format.value(), + (uint32)surfOutSrc.pitch, + (uint32)surfOutSrc.height, + srcSlice, + (uint32)srcSurface->dim.value(), + (uint32)srcTilemode, + (uint32)srcSurface->aa, + // dst + (uint32)MEMPTR(dstDataPtr).GetMPTR(), + (uint32)dstSurface->swizzle, + (uint32)dstSurface->format.value(), + (uint32)surfOutDst.pitch, + (uint32)surfOutDst.height, + dstSlice, + (uint32)dstSurface->dim.value(), + (uint32)dstTilemode, + (uint32)dstSurface->aa + ); +} - gx2SurfaceCopySoftware(inputData, surfOutSrc.height, srcPitch, srcDepth, srcSlice, srcSwizzle, srcHwTileMode, - outputData, surfOutDst.height, dstPitch, dstDepth, dstSlice, dstSwizzle, dstHwTileMode, - copyWidth, copyHeight, dstBpp); +void gx2Surface_GX2CopySurface(GX2Surface* srcSurface, uint32 srcMip, uint32 srcSlice, GX2Surface* dstSurface, uint32 dstMip, uint32 dstSlice) +{ + GX2CopySurfaceInternal(srcSurface, srcMip, srcSlice, dstSurface, dstMip, dstSlice); } void gx2Export_GX2CopySurface(PPCInterpreter_t* hCPU) @@ -363,7 +336,7 @@ void gx2Export_GX2CopySurface(PPCInterpreter_t* hCPU) osLib_returnFromFunction(hCPU, 0); } -typedef struct +typedef struct { sint32 left; sint32 top; @@ -471,37 +444,12 @@ void gx2Export_GX2ResolveAAColorBuffer(PPCInterpreter_t* hCPU) // handle format Latte::E_GX2SURFFMT srcFormat = srcSurface->format; Latte::E_GX2SURFFMT dstFormat = dstSurface->format; - uint32 srcBPP = Latte::GetFormatBits(srcFormat); - uint32 dstBPP = Latte::GetFormatBits(dstFormat); sint32 srcStepX = 1; sint32 srcStepY = 1; sint32 dstStepX = 1; sint32 dstStepY = 1; auto srcHwFormat = Latte::GetHWFormat(srcFormat); auto dstHwFormat = Latte::GetHWFormat(dstFormat); - // get texture info - LatteAddrLib::AddrSurfaceInfo_OUT surfOutSrc = {0}; - GX2::GX2CalculateSurfaceInfo(srcSurface, srcMip, &surfOutSrc); - LatteAddrLib::AddrSurfaceInfo_OUT surfOutDst = {0}; - GX2::GX2CalculateSurfaceInfo(dstSurface, dstMip, &surfOutDst); - // get input pointer - uint8* inputData = NULL; - cemu_assert(srcMip < srcSurface->numLevels); - if( srcMip == 0 ) - inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->imagePtr); - else if( srcMip == 1 ) - inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr); - else - inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr+srcSurface->mipOffset[srcMip-1]); - // get output pointer - uint8* outputData = NULL; - cemu_assert(dstMip < dstSurface->numLevels); - if( dstMip == 0 ) - outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->imagePtr); - else if( dstMip == 1 ) - outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr); - else - outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr+dstSurface->mipOffset[dstMip-1]); // calculate step size for compressed textures if( Latte::IsCompressedFormat(srcHwFormat) ) { @@ -513,64 +461,17 @@ void gx2Export_GX2ResolveAAColorBuffer(PPCInterpreter_t* hCPU) dstStepX = 4; dstStepY = 4; } - if( srcStepX != dstStepX || srcStepY != dstStepY ) - assert_dbg(); + cemu_assert_debug(srcStepX == dstStepX && srcStepY == dstStepY); if( srcHwFormat != dstHwFormat ) { // mismatching format - debug_printf("GX2CopySurface(): Format mismatch\n"); + debug_printf("GX2ResolveAAColorBuffer(): Format mismatch\n"); + cemu_assert_unimplemented(); osLib_returnFromFunction(hCPU, 0); return; } - - // src - uint32 srcPitch = surfOutSrc.pitch; - uint32 srcSwizzle = srcSurface->swizzle; - uint32 srcPipeSwizzle = (srcSwizzle>>8)&1; - uint32 srcBankSwizzle = ((srcSwizzle>>9)&3); - uint32 srcTileMode = (uint32)surfOutSrc.hwTileMode; - uint32 srcDepth = std::max(surfOutSrc.depth, 1); - // dst - uint32 dstPitch = surfOutDst.pitch; - uint32 dstSwizzle = dstSurface->swizzle; - uint32 dstPipeSwizzle = (dstSwizzle>>8)&1; - uint32 dstBankSwizzle = ((dstSwizzle>>9)&3); - uint32 dstTileMode = (uint32)surfOutDst.hwTileMode; - uint32 dstDepth = std::max(surfOutDst.depth, 1); - - // send copy command to GPU - GX2::GX2ReserveCmdSpace(1 + 13 * 2); - gx2WriteGather_submit(pm4HeaderType3(IT_HLE_COPY_SURFACE_NEW, 13 * 2), - // src - (uint32)srcSurface->imagePtr, - (uint32)srcSurface->mipPtr, - (uint32)srcSurface->swizzle, - (uint32)srcSurface->format.value(), - (uint32)srcSurface->width, - (uint32)srcSurface->height, - (uint32)srcSurface->depth, - (uint32)srcSurface->pitch, - srcSlice, - (uint32)srcSurface->dim.value(), - (uint32)srcSurface->tileMode.value(), - (uint32)srcSurface->aa, - srcMip, - // dst - (uint32)dstSurface->imagePtr, - (uint32)dstSurface->mipPtr, - (uint32)dstSurface->swizzle, - (uint32)dstSurface->format.value(), - (uint32)dstSurface->width, - (uint32)dstSurface->height, - (uint32)dstSurface->depth, - (uint32)dstSurface->pitch, - dstSlice, - (uint32)dstSurface->dim.value(), - (uint32)dstSurface->tileMode.value(), - (uint32)dstSurface->aa, - dstMip); - + GX2CopySurfaceInternal(srcSurface, srcMip, srcSlice, dstSurface, dstMip, dstSlice); osLib_returnFromFunction(hCPU, 0); } @@ -600,58 +501,11 @@ void gx2Export_GX2ConvertDepthBufferToTextureSurface(PPCInterpreter_t* hCPU) return; } - // note: Do not trust values from the input GX2Surface* structs but rely on surfOutDst/surfOutSrc instead if possible. - // src - uint32 srcPitch = surfOutSrc.pitch; - uint32 srcSwizzle = depthBuffer->surface.swizzle; - uint32 srcPipeSwizzle = (srcSwizzle >> 8) & 1; - uint32 srcBankSwizzle = ((srcSwizzle >> 9) & 3); - uint32 srcTileMode = (uint32)surfOutSrc.hwTileMode; - uint32 srcDepth = std::max(surfOutSrc.depth, 1); - // dst - uint32 dstPitch = surfOutDst.pitch; - uint32 dstSwizzle = dstSurface->swizzle; - uint32 dstPipeSwizzle = (dstSwizzle >> 8) & 1; - uint32 dstBankSwizzle = ((dstSwizzle >> 9) & 3); - uint32 dstTileMode = (uint32)surfOutDst.hwTileMode; - uint32 dstDepth = srcDepth; - - sint32 srcMip = 0; - uint32 numSlices = std::max(depthBuffer->viewNumSlices, 1); - GX2::GX2ReserveCmdSpace((1 + 13 * 2) * numSlices); for (uint32 subSliceIndex = 0; subSliceIndex < numSlices; subSliceIndex++) { // send copy command to GPU - gx2WriteGather_submit(pm4HeaderType3(IT_HLE_COPY_SURFACE_NEW, 13 * 2), - // src - (uint32)(depthBuffer->surface.imagePtr), - (uint32)(depthBuffer->surface.mipPtr), - (uint32)(depthBuffer->surface.swizzle), - (uint32)(depthBuffer->surface.format.value()), - (uint32)(depthBuffer->surface.width), - (uint32)(depthBuffer->surface.height), - (uint32)(depthBuffer->surface.depth), - (uint32)(depthBuffer->surface.pitch), - (uint32)(depthBuffer->viewFirstSlice) + subSliceIndex, - (uint32)(depthBuffer->surface.dim.value()), - (uint32)(depthBuffer->surface.tileMode.value()), - (uint32)(depthBuffer->surface.aa), - srcMip, - // dst - (uint32)(dstSurface->imagePtr), - (uint32)(dstSurface->mipPtr), - (uint32)(dstSurface->swizzle), - (uint32)(dstSurface->format.value()), - (uint32)(dstSurface->width), - (uint32)(dstSurface->height), - (uint32)(dstSurface->depth), - (uint32)(dstSurface->pitch), - dstSlice + subSliceIndex, - (uint32)(dstSurface->dim.value()), - (uint32)(dstSurface->tileMode.value()), - (uint32)(dstSurface->aa), - dstMip); + GX2CopySurfaceInternal(&depthBuffer->surface, depthBuffer->viewMip.value(), depthBuffer->viewFirstSlice.value() + subSliceIndex, dstSurface, dstMip, dstSlice + subSliceIndex); } osLib_returnFromFunction(hCPU, 0);