mirror of
https://github.com/cemu-project/Cemu.git
synced 2026-05-12 15:59:38 -06:00
GX2+Latte: Rework GX2CopySurface
This commit is contained in:
parent
e2a69becf1
commit
414aa9d6b3
@ -133,6 +133,7 @@ add_library(CemuCafe
|
||||
HW/Latte/Core/LatteSoftware.h
|
||||
HW/Latte/Core/LatteStreamoutGPU.cpp
|
||||
HW/Latte/Core/LatteSurfaceCopy.cpp
|
||||
HW/Latte/Core/LatteSurfaceCopy.h
|
||||
HW/Latte/Core/LatteTextureCache.cpp
|
||||
HW/Latte/Core/LatteTexture.cpp
|
||||
HW/Latte/Core/LatteTexture.h
|
||||
|
||||
@ -98,10 +98,6 @@ void LatteRenderTarget_itHLECopyColorBufferToScanBuffer(MPTR colorBufferPtr, uin
|
||||
|
||||
void LatteRenderTarget_unloadAll();
|
||||
|
||||
// surface copy
|
||||
|
||||
void LatteSurfaceCopy_copySurfaceNew(MPTR srcPhysAddr, MPTR srcMipAddr, uint32 srcSwizzle, Latte::E_GX2SURFFMT srcSurfaceFormat, sint32 srcWidth, sint32 srcHeight, sint32 srcDepth, uint32 srcPitch, sint32 srcSlice, Latte::E_DIM srcDim, Latte::E_HWTILEMODE srcTilemode, sint32 srcAA, sint32 srcLevel, MPTR dstPhysAddr, MPTR dstMipAddr, uint32 dstSwizzle, Latte::E_GX2SURFFMT dstSurfaceFormat, sint32 dstWidth, sint32 dstHeight, sint32 dstDepth, uint32 dstPitch, sint32 dstSlice, Latte::E_DIM dstDim, Latte::E_HWTILEMODE dstTilemode, sint32 dstAA, sint32 dstLevel);
|
||||
|
||||
// texture cache
|
||||
|
||||
void LatteTC_Init();
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
#include "Cafe/HW/Latte/Core/Latte.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteAsyncCommands.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteSurfaceCopy.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteTexture.h"
|
||||
|
||||
void LatteThread_Exit();
|
||||
@ -35,11 +36,19 @@ typedef struct
|
||||
uint64 shaderAuxHash;
|
||||
LatteConst::ShaderType shaderType;
|
||||
}deleteShader;
|
||||
|
||||
struct
|
||||
{
|
||||
LatteSurfaceCopyParam src;
|
||||
LatteSurfaceCopyParam dst;
|
||||
LatteSurfaceCopyRect rect;
|
||||
}textureCopy;
|
||||
};
|
||||
}LatteAsyncCommand_t;
|
||||
|
||||
#define ASYNC_CMD_FORCE_TEXTURE_READBACK 1
|
||||
#define ASYNC_CMD_DELETE_SHADER 2
|
||||
#define ASYNC_CMD_TEXTURE_COPY 3
|
||||
|
||||
std::queue<LatteAsyncCommand_t> LatteAsyncCommandQueue;
|
||||
|
||||
@ -82,6 +91,20 @@ void LatteAsyncCommands_queueDeleteShader(uint64 shaderBaseHash, uint64 shaderAu
|
||||
swl_gpuAsyncCommands.UnlockWrite();
|
||||
}
|
||||
|
||||
void LatteAsyncCommand_queueTextureCopy(const LatteSurfaceCopyParam& src, const LatteSurfaceCopyParam& dst, const LatteSurfaceCopyRect& rect)
|
||||
{
|
||||
LatteAsyncCommand_t asyncCommand = {};
|
||||
// setup command
|
||||
asyncCommand.type = ASYNC_CMD_TEXTURE_COPY;
|
||||
asyncCommand.textureCopy.src = src;
|
||||
asyncCommand.textureCopy.dst = dst;
|
||||
asyncCommand.textureCopy.rect = rect;
|
||||
|
||||
swl_gpuAsyncCommands.LockWrite();
|
||||
LatteAsyncCommandQueue.push(asyncCommand);
|
||||
swl_gpuAsyncCommands.UnlockWrite();
|
||||
}
|
||||
|
||||
void LatteAsyncCommands_waitUntilAllProcessed()
|
||||
{
|
||||
while (LatteAsyncCommandQueue.empty() == false)
|
||||
@ -127,6 +150,10 @@ void LatteAsyncCommands_checkAndExecute()
|
||||
{
|
||||
LatteSHRC_RemoveFromCacheByHash(asyncCommand.deleteShader.shaderBaseHash, asyncCommand.deleteShader.shaderAuxHash, asyncCommand.deleteShader.shaderType);
|
||||
}
|
||||
else if (asyncCommand.type == ASYNC_CMD_TEXTURE_COPY)
|
||||
{
|
||||
LatteSurfaceCopy_copySurfaceNew(asyncCommand.textureCopy.src, asyncCommand.textureCopy.dst, asyncCommand.textureCopy.rect);
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
|
||||
@ -1,7 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
struct LatteSurfaceCopyParam;
|
||||
struct LatteSurfaceCopyRect;
|
||||
|
||||
void LatteAsyncCommands_queueForceTextureReadback(MPTR physAddr, MPTR mipAddr, uint32 swizzle, sint32 format, sint32 width, sint32 height, sint32 depth, uint32 pitch, uint32 slice, sint32 dim, Latte::E_HWTILEMODE tilemode, sint32 aa, sint32 level);
|
||||
void LatteAsyncCommand_queueTextureCopy(const LatteSurfaceCopyParam& src, const LatteSurfaceCopyParam& dst, const LatteSurfaceCopyRect& rect);
|
||||
void LatteAsyncCommands_queueDeleteShader(uint64 shaderBaseHash, uint64 shaderAuxHash, LatteConst::ShaderType shaderType);
|
||||
void LatteAsyncCommands_waitUntilAllProcessed();
|
||||
|
||||
void LatteAsyncCommands_checkAndExecute();
|
||||
void LatteAsyncCommands_checkAndExecute();
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
#include "Cafe/HW/Latte/Core/LatteIndices.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
|
||||
#include "Cafe/HW/Latte/Core/LattePM4.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteSurfaceCopy.h"
|
||||
|
||||
#include "Cafe/OS/libs/coreinit/coreinit_Time.h"
|
||||
#include "Cafe/OS/libs/TCL/TCL.h" // TCL currently handles the GPU command ringbuffer
|
||||
@ -805,37 +806,37 @@ LatteCMDPtr LatteCP_itHLEBottomOfPipeCB(LatteCMDPtr cmd, uint32 nWords)
|
||||
// GPU-side handler for GX2CopySurface/GX2CopySurfaceEx and similar
|
||||
LatteCMDPtr LatteCP_itHLECopySurfaceNew(LatteCMDPtr cmd, uint32 nWords)
|
||||
{
|
||||
cemu_assert_debug(nWords == 26);
|
||||
cemu_assert_debug(nWords == 4+9*2);
|
||||
// copy rect
|
||||
LatteSurfaceCopyRect copyRect;
|
||||
copyRect.x = LatteReadCMD();
|
||||
copyRect.y = LatteReadCMD();
|
||||
copyRect.width = LatteReadCMD();
|
||||
copyRect.height = LatteReadCMD();
|
||||
// src
|
||||
MPTR srcPhysAddr = LatteReadCMD();
|
||||
MPTR srcMipAddr = LatteReadCMD();
|
||||
uint32 srcSwizzle = LatteReadCMD();
|
||||
Latte::E_GX2SURFFMT srcSurfaceFormat = (Latte::E_GX2SURFFMT)LatteReadCMD();
|
||||
sint32 srcWidth = LatteReadCMD();
|
||||
sint32 srcHeight = LatteReadCMD();
|
||||
sint32 srcDepth = LatteReadCMD();
|
||||
uint32 srcPitch = LatteReadCMD();
|
||||
uint32 srcSlice = LatteReadCMD();
|
||||
Latte::E_DIM srcDim = (Latte::E_DIM)LatteReadCMD();
|
||||
Latte::E_HWTILEMODE srcTilemode = (Latte::E_HWTILEMODE)LatteReadCMD();
|
||||
sint32 srcAA = LatteReadCMD();
|
||||
sint32 srcLevel = LatteReadCMD();
|
||||
LatteSurfaceCopyParam src{};
|
||||
src.physDataAddr = LatteReadCMD();
|
||||
src.swizzle = LatteReadCMD();
|
||||
src.surfaceFormat = (Latte::E_GX2SURFFMT)LatteReadCMD();
|
||||
src.pitch = LatteReadCMD();
|
||||
src.heightInTexels = LatteReadCMD();
|
||||
src.sliceIndex = LatteReadCMD();
|
||||
src.dim = (Latte::E_DIM)LatteReadCMD();
|
||||
src.tilemode = (Latte::E_GX2TILEMODE)LatteReadCMD();
|
||||
src.aa = LatteReadCMD();
|
||||
// dst
|
||||
MPTR dstPhysAddr = LatteReadCMD();
|
||||
MPTR dstMipAddr = LatteReadCMD();
|
||||
uint32 dstSwizzle = LatteReadCMD();
|
||||
Latte::E_GX2SURFFMT dstSurfaceFormat = (Latte::E_GX2SURFFMT)LatteReadCMD();
|
||||
sint32 dstWidth = LatteReadCMD();
|
||||
sint32 dstHeight = LatteReadCMD();
|
||||
sint32 dstDepth = LatteReadCMD();
|
||||
uint32 dstPitch = LatteReadCMD();
|
||||
uint32 dstSlice = LatteReadCMD();
|
||||
Latte::E_DIM dstDim = (Latte::E_DIM)LatteReadCMD();
|
||||
Latte::E_HWTILEMODE dstTilemode = (Latte::E_HWTILEMODE)LatteReadCMD();
|
||||
sint32 dstAA = LatteReadCMD();
|
||||
sint32 dstLevel = LatteReadCMD();
|
||||
LatteSurfaceCopyParam dst{};
|
||||
dst.physDataAddr = LatteReadCMD();
|
||||
dst.swizzle = LatteReadCMD();
|
||||
dst.surfaceFormat = (Latte::E_GX2SURFFMT)LatteReadCMD();
|
||||
dst.pitch = LatteReadCMD();
|
||||
dst.heightInTexels = LatteReadCMD();
|
||||
dst.sliceIndex = LatteReadCMD();
|
||||
dst.dim = (Latte::E_DIM)LatteReadCMD();
|
||||
dst.tilemode = (Latte::E_GX2TILEMODE)LatteReadCMD();
|
||||
dst.aa = LatteReadCMD();
|
||||
|
||||
LatteSurfaceCopy_copySurfaceNew(srcPhysAddr, srcMipAddr, srcSwizzle, srcSurfaceFormat, srcWidth, srcHeight, srcDepth, srcPitch, srcSlice, srcDim, srcTilemode, srcAA, srcLevel, dstPhysAddr, dstMipAddr, dstSwizzle, dstSurfaceFormat, dstWidth, dstHeight, dstDepth, dstPitch, dstSlice, dstDim, dstTilemode, dstAA, dstLevel);
|
||||
LatteSurfaceCopy_copySurfaceNew(src, dst, copyRect);
|
||||
return cmd;
|
||||
}
|
||||
|
||||
@ -1921,4 +1922,4 @@ void LatteCP_DebugPrintCmdBuffer(uint32be* bufferPtr, uint32 size)
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -3,22 +3,65 @@
|
||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteDefaultShaders.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteTexture.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteSurfaceCopy.h"
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||
|
||||
void LatteSurfaceCopy_copySurfaceNew(MPTR srcPhysAddr, MPTR srcMipAddr, uint32 srcSwizzle, Latte::E_GX2SURFFMT srcSurfaceFormat, sint32 srcWidth, sint32 srcHeight, sint32 srcDepth, uint32 srcPitch, sint32 srcSlice, Latte::E_DIM srcDim, Latte::E_HWTILEMODE srcTilemode, sint32 srcAA, sint32 srcLevel, MPTR dstPhysAddr, MPTR dstMipAddr, uint32 dstSwizzle, Latte::E_GX2SURFFMT dstSurfaceFormat, sint32 dstWidth, sint32 dstHeight, sint32 dstDepth, uint32 dstPitch, sint32 dstSlice, Latte::E_DIM dstDim, Latte::E_HWTILEMODE dstTilemode, sint32 dstAA, sint32 dstLevel)
|
||||
/* Surface copies are tricky to handle because we simulate unified memory on top of two separate memory systems: RAM and VRAM
|
||||
* Cemu may only have texture data in either RAM or VRAM, or both. And doing transfers to or from CPU can quickly become prohibitively expensive
|
||||
* So we need to make best guesses on where the data needs to come from and where it needs to go
|
||||
* A complicating factor is that texture copies are more like memcpy, in the sense that they don't care about the actual underlying pixel format
|
||||
* For example a R32F texture can be copied as RGBA8. Similiarly, different tile modes can sometimes be identical under specific circumstances
|
||||
*/
|
||||
|
||||
void gx2SurfaceCopySoftware(
|
||||
uint8* inputData, sint32 surfSrcHeight, sint32 srcPitch, sint32 srcDepth, uint32 srcSlice, uint32 srcSwizzle, uint32 srcHwTileMode,
|
||||
uint8* outputData, sint32 surfDstHeight, sint32 dstPitch, sint32 dstDepth, uint32 dstSlice, uint32 dstSwizzle, uint32 dstHwTileMode,
|
||||
uint32 copyWidth, uint32 copyHeight, uint32 copyBpp);
|
||||
|
||||
void LatteSurfaceCopy_CopyInRAM(const LatteSurfaceCopyParam& src, const LatteSurfaceCopyParam& dst, const LatteSurfaceCopyRect& rect)
|
||||
{
|
||||
// check if source is within valid mip range
|
||||
if (srcDim == Latte::E_DIM::DIM_3D && (srcDepth >> srcLevel) == 0 && (srcWidth >> srcLevel) == 0 && (srcHeight >> srcLevel) == 0)
|
||||
return;
|
||||
else if ((srcWidth >> srcLevel) == 0 && (srcHeight >> srcLevel) == 0)
|
||||
Latte::E_HWSURFFMT dstHwFormat = Latte::GetHWFormat(dst.surfaceFormat);
|
||||
|
||||
sint32 copyWidth = rect.width;
|
||||
sint32 copyHeight = rect.height;
|
||||
if (Latte::IsCompressedFormat(dstHwFormat))
|
||||
{
|
||||
copyWidth = (copyWidth + 3) / 4;
|
||||
copyHeight = (copyHeight + 3) / 4;
|
||||
}
|
||||
|
||||
uint32 dstBpp = Latte::GetFormatBits(dstHwFormat);
|
||||
|
||||
gx2SurfaceCopySoftware((uint8*)MEMPTR<void>(src.physDataAddr).GetPtr(), src.heightInTexels, src.pitch, 1, src.sliceIndex, src.swizzle, (uint32)src.tilemode,
|
||||
(uint8*)MEMPTR<void>(dst.physDataAddr).GetPtr(), dst.heightInTexels, dst.pitch, 1, dst.sliceIndex, dst.swizzle, (uint32)dst.tilemode,
|
||||
copyWidth, copyHeight, dstBpp);
|
||||
}
|
||||
|
||||
void LatteSurfaceCopy_copySurfaceNew(const LatteSurfaceCopyParam& src, const LatteSurfaceCopyParam& dst, const LatteSurfaceCopyRect& rect)
|
||||
{
|
||||
cemu_assert_debug(rect.x == 0 && rect.y == 0); // origin offset not yet supported
|
||||
|
||||
cemu_assert_debug((rect.x + rect.width) <= dst.pitch * (Latte::IsCompressedFormat(dst.surfaceFormat)?4:1));
|
||||
cemu_assert_debug((rect.x + rect.width) <= src.pitch * (Latte::IsCompressedFormat(src.surfaceFormat)?4:1));
|
||||
|
||||
if (src.tilemode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL || dst.tilemode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL)
|
||||
{
|
||||
// todo - it's technically possible for a matching linear texture to be in the texture cache already
|
||||
// there is also a case of tiled to linear_special where we should trigger a readback without an actual destination texture
|
||||
LatteSurfaceCopy_CopyInRAM(src, dst, rect);
|
||||
return;
|
||||
}
|
||||
|
||||
// look up source texture
|
||||
// todo - for non-zero slices heightInTexels matters (used to calculate the slice size). We should take this into account during lookup
|
||||
LatteTexture* sourceTexture = nullptr;
|
||||
LatteTextureView* sourceView = LatteTC_GetTextureSliceViewOrTryCreate(srcPhysAddr, srcMipAddr, srcSurfaceFormat, srcTilemode, srcWidth, srcHeight, srcDepth, srcPitch, srcSwizzle, srcSlice, srcLevel);
|
||||
LatteTextureView* sourceView = LatteTC_GetTextureSliceViewOrTryCreate(src.physDataAddr, MPTR_NULL, src.surfaceFormat, Latte::MakeHWTileMode(src.tilemode), rect.x + rect.width, rect.y + rect.height, 1, src.pitch, src.swizzle, src.sliceIndex, 0);
|
||||
if (sourceView == nullptr)
|
||||
{
|
||||
debug_printf("HLECopySurface(): Source texture is not in list of dynamic textures\n");
|
||||
// source texture doesn't exist (yet) in texture cache
|
||||
// operate on RAM instead
|
||||
LatteSurfaceCopy_CopyInRAM(src, dst, rect);
|
||||
return;
|
||||
}
|
||||
sourceTexture = sourceView->baseTexture;
|
||||
@ -29,15 +72,14 @@ void LatteSurfaceCopy_copySurfaceNew(MPTR srcPhysAddr, MPTR srcMipAddr, uint32 s
|
||||
}
|
||||
// look up destination texture
|
||||
LatteTexture* destinationTexture = nullptr;
|
||||
LatteTextureView* destinationView = LatteTextureViewLookupCache::lookupSlice(dstPhysAddr, dstWidth, dstHeight, dstPitch, dstLevel, dstSlice, dstSurfaceFormat);
|
||||
LatteTextureView* destinationView = LatteTextureViewLookupCache::lookupSliceMinSize(dst.physDataAddr, rect.x + rect.width, rect.y + rect.height, dst.pitch, 0, dst.sliceIndex, dst.surfaceFormat);
|
||||
// todo - Instead of lookupSliceMinSize lookup the base texture by data range instead and return mip/slice index
|
||||
if (destinationView)
|
||||
destinationTexture = destinationView->baseTexture;
|
||||
|
||||
// create destination texture if it doesnt exist
|
||||
if (!destinationTexture)
|
||||
{
|
||||
LatteTexture* renderTargetConf = nullptr;
|
||||
destinationView = LatteTexture_CreateMapping(dstPhysAddr, dstMipAddr, dstWidth, dstHeight, dstDepth, dstPitch, dstTilemode, dstSwizzle, dstLevel, 1, dstSlice, 1, dstSurfaceFormat, dstDim, Latte::IsMSAA(dstDim) ? Latte::E_DIM::DIM_2D_MSAA : Latte::E_DIM::DIM_2D, false);
|
||||
destinationView = LatteTexture_CreateMapping(dst.physDataAddr, MPTR_NULL, rect.x + rect.width, rect.y + rect.height, 1, dst.pitch, Latte::MakeHWTileMode(dst.tilemode), dst.swizzle, 0, 1, dst.sliceIndex, 1, dst.surfaceFormat, dst.dim, Latte::IsMSAA(dst.dim) ? Latte::E_DIM::DIM_2D_MSAA : Latte::E_DIM::DIM_2D, false);
|
||||
destinationTexture = destinationView->baseTexture;
|
||||
}
|
||||
// copy texture
|
||||
@ -46,15 +88,12 @@ void LatteSurfaceCopy_copySurfaceNew(MPTR srcPhysAddr, MPTR srcMipAddr, uint32 s
|
||||
// mark source and destination texture as still in use
|
||||
LatteTC_MarkTextureStillInUse(destinationTexture);
|
||||
LatteTC_MarkTextureStillInUse(sourceTexture);
|
||||
sint32 realSrcSlice = srcSlice;
|
||||
sint32 realSrcSlice = src.sliceIndex;
|
||||
if (LatteTexture_doesEffectiveRescaleRatioMatch(sourceTexture, sourceView->firstMip, destinationTexture, destinationView->firstMip))
|
||||
{
|
||||
// adjust copy size
|
||||
sint32 copyWidth = std::max(srcWidth >> srcLevel, 1);
|
||||
sint32 copyHeight = std::max(srcHeight >> srcLevel, 1);
|
||||
// use the smaller width/height as copy size
|
||||
copyWidth = std::min(copyWidth, std::max(dstWidth >> dstLevel, 1));
|
||||
copyHeight = std::min(copyHeight, std::max(dstHeight >> dstLevel, 1));
|
||||
cemu_assert_debug(rect.x == 0 && rect.y == 0);
|
||||
sint32 copyWidth = rect.width;
|
||||
sint32 copyHeight = rect.height;
|
||||
sint32 effectiveCopyWidth = copyWidth;
|
||||
sint32 effectiveCopyHeight = copyHeight;
|
||||
LatteTexture_scaleToEffectiveSize(sourceTexture, &effectiveCopyWidth, &effectiveCopyHeight, 0);
|
||||
@ -77,11 +116,19 @@ void LatteSurfaceCopy_copySurfaceNew(MPTR srcPhysAddr, MPTR srcMipAddr, uint32 s
|
||||
}
|
||||
else
|
||||
debug_printf("Source or destination texture does not exist\n");
|
||||
|
||||
// download destination texture if it matches known accessed formats
|
||||
// if the texture is updated from a tiled to a linear format it's a strong indicator for CPU reads
|
||||
// in which case we should sync the texture back to CPU RAM
|
||||
const bool sourceIsLinear = sourceTexture->tileMode == Latte::E_HWTILEMODE::TM_LINEAR_ALIGNED || sourceTexture->tileMode == Latte::E_HWTILEMODE::TM_LINEAR_GENERAL;
|
||||
const bool destinationIsLinear = destinationTexture->tileMode == Latte::E_HWTILEMODE::TM_LINEAR_ALIGNED || destinationTexture->tileMode == Latte::E_HWTILEMODE::TM_LINEAR_GENERAL;
|
||||
bool shouldReadback = !sourceIsLinear && destinationIsLinear;
|
||||
// special case for Bayonetta 2
|
||||
if (destinationTexture->width == 8 && destinationTexture->height == 8 && destinationTexture->tileMode == Latte::E_HWTILEMODE::TM_1D_TILED_THIN1)
|
||||
{
|
||||
cemuLog_logDebug(LogType::Force, "Texture readback after copy for Bayonetta 2 (phys: 0x{:08x})", destinationTexture->physAddress);
|
||||
shouldReadback = true;
|
||||
}
|
||||
if (shouldReadback)
|
||||
{
|
||||
LatteTextureReadback_Initate(destinationView);
|
||||
}
|
||||
}
|
||||
|
||||
25
src/Cafe/HW/Latte/Core/LatteSurfaceCopy.h
Normal file
25
src/Cafe/HW/Latte/Core/LatteSurfaceCopy.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
struct LatteSurfaceCopyParam
|
||||
{
|
||||
// effective parameters (with mip index baked into them)
|
||||
MPTR physDataAddr; // points to actual mip
|
||||
uint32 swizzle;
|
||||
Latte::E_GX2SURFFMT surfaceFormat;
|
||||
sint32 heightInTexels;
|
||||
uint32 pitch;
|
||||
Latte::E_DIM dim;
|
||||
Latte::E_GX2TILEMODE tilemode;
|
||||
sint32 aa;
|
||||
sint32 sliceIndex;
|
||||
};
|
||||
|
||||
struct LatteSurfaceCopyRect
|
||||
{
|
||||
uint32 x;
|
||||
uint32 y;
|
||||
uint32 width; // in pixels
|
||||
uint32 height;
|
||||
};
|
||||
|
||||
void LatteSurfaceCopy_copySurfaceNew(const LatteSurfaceCopyParam& src, const LatteSurfaceCopyParam& dst, const LatteSurfaceCopyRect& rect);
|
||||
@ -1138,6 +1138,7 @@ void LatteTC_LookupTexturesByPhysAddr(MPTR physAddr, std::vector<LatteTexture*>&
|
||||
}
|
||||
}
|
||||
|
||||
// return or create a view, requires existing base texture. Returns nullptr if it doesn't exist yet
|
||||
LatteTextureView* LatteTC_GetTextureSliceViewOrTryCreate(MPTR srcImagePtr, MPTR srcMipPtr, Latte::E_GX2SURFFMT srcFormat, Latte::E_HWTILEMODE srcTileMode, uint32 srcWidth, uint32 srcHeight, uint32 srcDepth, uint32 srcPitch, uint32 srcSwizzle, uint32 srcSlice, uint32 srcMip, const bool requireExactResolution)
|
||||
{
|
||||
LatteTextureView* sourceView;
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
#include "Cafe/HW/Latte/Core/Latte.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteDraw.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteAsyncCommands.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteSurfaceCopy.h"
|
||||
#include "Cafe/HW/Latte/LatteAddrLib/LatteAddrLib.h"
|
||||
#include "util/highresolutiontimer/HighResolutionTimer.h"
|
||||
#include "GX2.h"
|
||||
@ -127,6 +128,11 @@ void gx2SurfaceCopySoftware(
|
||||
uint8* outputData, sint32 surfDstHeight, sint32 dstPitch, sint32 dstDepth, uint32 dstSlice, uint32 dstSwizzle, uint32 dstHwTileMode,
|
||||
uint32 copyWidth, uint32 copyHeight, uint32 copyBpp)
|
||||
{
|
||||
if (srcHwTileMode == 16)
|
||||
srcHwTileMode = 0;
|
||||
if (dstHwTileMode == 16)
|
||||
dstHwTileMode = 0;
|
||||
|
||||
if (srcHwTileMode == 4 && dstHwTileMode == 4 && (copyWidth & 7) == 0 && (copyHeight & 7) == 0 && copyBpp <= 32) // todo - check sample == 1
|
||||
{
|
||||
gx2SurfaceCopySoftware_fastPath_tm4Copy(inputData, surfSrcHeight, srcPitch, srcDepth, srcSlice, srcSwizzle, outputData, surfDstHeight, dstPitch, dstDepth, dstSlice, dstSwizzle, copyWidth, copyHeight, copyBpp);
|
||||
@ -147,7 +153,19 @@ void gx2SurfaceCopySoftware(
|
||||
cemu_assert_debug(false);
|
||||
}
|
||||
|
||||
void gx2Surface_GX2CopySurface(GX2Surface* srcSurface, uint32 srcMip, uint32 srcSlice, GX2Surface* dstSurface, uint32 dstMip, uint32 dstSlice)
|
||||
// Surface copy handling is complicated because of having to simulate unified memory
|
||||
// GX2CopySurface supports two modes:
|
||||
// - When source or destination surface have a tilemode of LINEAR_SPECIAL (16) -> Copy is done synchronously on the CPU
|
||||
// - In all other cases -> Copy is done on the GPU via draw commands
|
||||
|
||||
// But in Cemu things are more complicated, we generally can't do pure CPU copies because a surface's texture data
|
||||
// may only exist in VRAM right now. So we always need to delegate copying to the renderer thread (which has access to the texture cache)
|
||||
|
||||
// In Cemu we thus handle it like this:
|
||||
// For GX2 CPU copies -> Submit as async command to the renderer (will be processed asap) and stall until completed
|
||||
// For GX2 GPU copies -> Submit as HLE command to Latte's command queue to be executed in order
|
||||
|
||||
void GX2CopySurfaceInternal(GX2Surface* srcSurface, uint32 srcMip, uint32 srcSlice, GX2Surface* dstSurface, uint32 dstMip, uint32 dstSlice)
|
||||
{
|
||||
sint32 dstWidth = dstSurface->width;
|
||||
sint32 dstHeight = dstSurface->height;
|
||||
@ -167,8 +185,6 @@ void gx2Surface_GX2CopySurface(GX2Surface* srcSurface, uint32 srcMip, uint32 src
|
||||
// handle format
|
||||
Latte::E_GX2SURFFMT srcFormat = srcSurface->format;
|
||||
Latte::E_GX2SURFFMT dstFormat = dstSurface->format;
|
||||
uint32 srcBPP = Latte::GetFormatBits(srcFormat);
|
||||
uint32 dstBPP = Latte::GetFormatBits(dstFormat);
|
||||
auto srcHwFormat = Latte::GetHWFormat(srcFormat);
|
||||
auto dstHwFormat = Latte::GetHWFormat(dstFormat);
|
||||
// get texture info
|
||||
@ -182,173 +198,130 @@ void gx2Surface_GX2CopySurface(GX2Surface* srcSurface, uint32 srcMip, uint32 src
|
||||
debug_printf("GX2CopySurface(): mip count is 0\n");
|
||||
return;
|
||||
}
|
||||
// get input pointer
|
||||
uint8* inputData = NULL;
|
||||
cemu_assert(srcMip < srcSurface->numLevels);
|
||||
if( srcMip == 0 )
|
||||
inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->imagePtr);
|
||||
else if( srcMip == 1 )
|
||||
inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr);
|
||||
else
|
||||
{
|
||||
inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr + srcSurface->mipOffset[srcMip - 1]);
|
||||
}
|
||||
// get output pointer
|
||||
uint8* outputData = NULL;
|
||||
cemu_assert(dstMip < dstSurface->numLevels);
|
||||
if( dstMip == 0 )
|
||||
outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->imagePtr);
|
||||
else if( dstMip == 1 )
|
||||
outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr);
|
||||
else
|
||||
{
|
||||
outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr + dstSurface->mipOffset[dstMip - 1]);
|
||||
}
|
||||
|
||||
// make sure formats are compatible
|
||||
if( srcHwFormat != dstHwFormat )
|
||||
{
|
||||
// mismatching format
|
||||
cemuLog_logDebug(LogType::Force, "GX2CopySurface(): Format mismatch");
|
||||
cemuLog_logDebug(LogType::Force, "GX2CopySurface(): Format mismatch (src=0x{:04x} dst=0x{:04x})", (sint32)srcFormat, (sint32)dstFormat);
|
||||
return;
|
||||
}
|
||||
|
||||
// note: Do not trust values from the input GX2Surface* structs but rely on surfOutDst/surfOutSrc instead if possible.
|
||||
// src
|
||||
// get input pointer
|
||||
cemu_assert(srcMip < srcSurface->numLevels);
|
||||
uint8* srcDataPtr = nullptr;
|
||||
if( srcMip == 0 )
|
||||
srcDataPtr = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->imagePtr);
|
||||
else if( srcMip == 1 )
|
||||
srcDataPtr = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr);
|
||||
else
|
||||
srcDataPtr = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr + srcSurface->mipOffset[srcMip - 1]);
|
||||
// get output pointer
|
||||
cemu_assert(dstMip < dstSurface->numLevels);
|
||||
uint8* dstDataPtr = nullptr;
|
||||
if( dstMip == 0 )
|
||||
dstDataPtr = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->imagePtr);
|
||||
else if( dstMip == 1 )
|
||||
dstDataPtr = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr);
|
||||
else
|
||||
dstDataPtr = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr + dstSurface->mipOffset[dstMip - 1]);
|
||||
// note: pitch is taken from surfOutSrc/surfOutDst, which may different from the pitch stored in the input surface structs
|
||||
uint32 srcPitch = surfOutSrc.pitch;
|
||||
uint32 srcSwizzle = srcSurface->swizzle;
|
||||
uint32 srcHwTileMode = (uint32)surfOutSrc.hwTileMode;
|
||||
uint32 srcDepth = std::max<uint32>(surfOutSrc.depth, 1);
|
||||
if (srcHwTileMode == 0) // linear
|
||||
Latte::E_GX2TILEMODE srcTilemode = (srcSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL) ? srcSurface->tileMode.value() : (Latte::E_GX2TILEMODE)surfOutSrc.hwTileMode;
|
||||
Latte::E_GX2TILEMODE dstTilemode = (dstSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL) ? dstSurface->tileMode.value() : (Latte::E_GX2TILEMODE)surfOutDst.hwTileMode;
|
||||
|
||||
if (srcTilemode == Latte::E_GX2TILEMODE::TM_LINEAR_GENERAL)
|
||||
{
|
||||
// todo - why is this necessary? GX2CalculateSurfaceInfo should already handle this
|
||||
srcPitch = srcSurface->pitch >> srcMip;
|
||||
srcPitch = std::max<uint32>(srcPitch, 1);
|
||||
}
|
||||
// dst
|
||||
uint32 dstPitch = surfOutDst.pitch;
|
||||
uint32 dstSwizzle = dstSurface->swizzle;
|
||||
uint32 dstHwTileMode = (uint32)surfOutDst.hwTileMode;
|
||||
uint32 dstDepth = std::max<uint32>(surfOutDst.depth, 1);
|
||||
uint32 dstBpp = surfOutDst.bpp;
|
||||
uint32 copyWidth = std::max<uint32>(dstWidth>>dstMip, 1);
|
||||
uint32 copyHeight = std::max<uint32>(dstHeight>>dstMip, 1);
|
||||
|
||||
//debug_printf("Src Tex: %08X %dx%d Swizzle: %08x tm: %d fmt: %04x use: %02x\n", _swapEndianU32(srcSurface->imagePtr), _swapEndianU32(srcSurface->width), _swapEndianU32(srcSurface->height), _swapEndianU32(srcSurface->swizzle), _swapEndianU32(srcSurface->tileMode), _swapEndianU32(srcSurface->format), (uint32)srcSurface->resFlag);
|
||||
//debug_printf("Dst Tex: %08X %dx%d Swizzle: %08x tm: %d fmt: %04x use: %02x\n", _swapEndianU32(dstSurface->imagePtr), _swapEndianU32(dstSurface->width), _swapEndianU32(dstSurface->height), _swapEndianU32(dstSurface->swizzle), _swapEndianU32(dstSurface->tileMode), _swapEndianU32(dstSurface->format), (uint32)dstSurface->resFlag);
|
||||
cemu_assert(copyWidth <= std::max<uint32>(srcWidth>>srcMip, 1));
|
||||
cemu_assert(copyHeight <= std::max<uint32>(srcHeight>>srcMip, 1));
|
||||
|
||||
bool requestGPURAMCopy = false;
|
||||
bool debugTestForceCPUCopy = false;
|
||||
cemuLog_log(LogType::GX2, "GX2CopySurface:");
|
||||
cemuLog_log(LogType::GX2,"srcSurface: imagePtr=0x{:08x} mipPtr=0x{:08x} swizzle=0x{:06x} width={} height={} depth={} pitch=0x{:x} tilemode=0x{:x} format=0x{:x} mip={} slice={}",
|
||||
(uint32)srcSurface->imagePtr, (uint32)srcSurface->mipPtr, (uint32)srcSurface->swizzle, (uint32)srcSurface->width, (uint32)srcSurface->height, (uint32)srcSurface->depth, (uint32)srcSurface->pitch,
|
||||
(uint32)srcSurface->tileMode.value(), (uint32)srcSurface->format.value(), srcMip, srcSlice);
|
||||
cemuLog_log(LogType::GX2, "dstSurface: imagePtr=0x{:08x} mipPtr=0x{:08x} swizzle=0x{:06x} width={} height={} depth={} pitch=0x{:x} tilemode=0x{:x} format=0x{:x} mip={} slice={}",
|
||||
(uint32)dstSurface->imagePtr, (uint32)dstSurface->mipPtr, (uint32)dstSurface->swizzle, (uint32)dstSurface->width, (uint32)dstSurface->height, (uint32)dstSurface->depth, (uint32)dstSurface->pitch,
|
||||
(uint32)dstSurface->tileMode.value(), (uint32)dstSurface->format.value(), dstMip, dstSlice);
|
||||
|
||||
if (srcSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL && dstSurface->tileMode == Latte::E_GX2TILEMODE::TM_2D_TILED_THIN1)
|
||||
debugTestForceCPUCopy = true;
|
||||
|
||||
if (srcSurface->tileMode == Latte::E_GX2TILEMODE::TM_2D_TILED_THIN1 && dstSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL )
|
||||
bool isGX2CPUCopy = srcSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL || dstSurface->tileMode == Latte::E_GX2TILEMODE::TM_LINEAR_SPECIAL;
|
||||
if (isGX2CPUCopy)
|
||||
{
|
||||
LatteAsyncCommands_queueForceTextureReadback(
|
||||
srcSurface->imagePtr,
|
||||
srcSurface->mipPtr,
|
||||
srcSurface->swizzle,
|
||||
(uint32)srcSurface->format.value(),
|
||||
srcSurface->width,
|
||||
srcSurface->height,
|
||||
srcSurface->depth,
|
||||
srcSurface->pitch,
|
||||
srcSlice,
|
||||
(uint32)srcSurface->dim.value(),
|
||||
Latte::MakeHWTileMode(srcSurface->tileMode),
|
||||
srcSurface->aa,
|
||||
srcMip);
|
||||
LatteSurfaceCopyParam srcCopy{};
|
||||
srcCopy.physDataAddr = MEMPTR<void>(srcDataPtr).GetMPTR();
|
||||
srcCopy.swizzle = srcSurface->swizzle;
|
||||
srcCopy.surfaceFormat = srcSurface->format;
|
||||
srcCopy.heightInTexels = surfOutSrc.height;
|
||||
srcCopy.pitch = surfOutSrc.pitch;
|
||||
srcCopy.dim = srcSurface->dim;
|
||||
srcCopy.tilemode = srcTilemode;
|
||||
srcCopy.aa = srcSurface->aa;
|
||||
srcCopy.sliceIndex = (sint32)srcSlice;
|
||||
|
||||
LatteSurfaceCopyParam dstCopy{};
|
||||
dstCopy.physDataAddr = MEMPTR<void>(dstDataPtr).GetMPTR();
|
||||
dstCopy.swizzle = dstSurface->swizzle;
|
||||
dstCopy.surfaceFormat = dstSurface->format;
|
||||
dstCopy.heightInTexels = surfOutDst.height;
|
||||
dstCopy.pitch = surfOutDst.pitch;
|
||||
dstCopy.dim = dstSurface->dim;
|
||||
dstCopy.tilemode = dstTilemode;
|
||||
dstCopy.aa = dstSurface->aa;
|
||||
dstCopy.sliceIndex = (sint32)dstSlice;
|
||||
|
||||
LatteSurfaceCopyRect copyRect{};
|
||||
copyRect.x = 0;
|
||||
copyRect.y = 0;
|
||||
copyRect.width = copyWidth;
|
||||
copyRect.height = copyHeight;
|
||||
|
||||
LatteAsyncCommand_queueTextureCopy(srcCopy, dstCopy, copyRect);
|
||||
// cpu copies have to be finished by the time GX2CopySurface returns
|
||||
// since we delegate the copy to the Latte thread, we have to wait for it to finish here
|
||||
LatteAsyncCommands_waitUntilAllProcessed();
|
||||
|
||||
debugTestForceCPUCopy = true;
|
||||
}
|
||||
|
||||
// send copy command to GPU
|
||||
if( srcHwTileMode > 0 && srcHwTileMode < 16 && dstHwTileMode > 0 && dstHwTileMode < 16 || requestGPURAMCopy )
|
||||
{
|
||||
GX2::GX2ReserveCmdSpace(1+13*2);
|
||||
|
||||
gx2WriteGather_submit(pm4HeaderType3(IT_HLE_COPY_SURFACE_NEW, 13*2),
|
||||
// src
|
||||
(uint32)srcSurface->imagePtr,
|
||||
(uint32)srcSurface->mipPtr,
|
||||
(uint32)srcSurface->swizzle,
|
||||
(uint32)srcSurface->format.value(),
|
||||
(uint32)srcSurface->width,
|
||||
(uint32)srcSurface->height,
|
||||
(uint32)srcSurface->depth,
|
||||
(uint32)srcSurface->pitch,
|
||||
srcSlice,
|
||||
(uint32)srcSurface->dim.value(),
|
||||
(uint32)srcSurface->tileMode.value(),
|
||||
(uint32)srcSurface->aa,
|
||||
srcMip,
|
||||
// dst
|
||||
(uint32)dstSurface->imagePtr,
|
||||
(uint32)dstSurface->mipPtr,
|
||||
(uint32)dstSurface->swizzle,
|
||||
(uint32)dstSurface->format.value(),
|
||||
(uint32)dstSurface->width,
|
||||
(uint32)dstSurface->height,
|
||||
(uint32)dstSurface->depth,
|
||||
(uint32)dstSurface->pitch,
|
||||
dstSlice,
|
||||
(uint32)dstSurface->dim.value(),
|
||||
(uint32)dstSurface->tileMode.value(),
|
||||
(uint32)dstSurface->aa,
|
||||
dstMip);
|
||||
}
|
||||
|
||||
if (requestGPURAMCopy)
|
||||
return; // if RAM copy happens on the GPU side we skip it here
|
||||
|
||||
// manually exclude expensive CPU texture copies for some known game framebuffer textures
|
||||
// todo - find a better way to solve this
|
||||
bool isDynamicTexCopy = false;
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width >= 800 && srcFormat == Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT); // SM3DW
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width >= 800 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM); // Trine 2
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 0xA0 && srcFormat == Latte::E_GX2SURFFMT::R32_FLOAT); // Little Inferno
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 1280 && srcFormat == Latte::E_GX2SURFFMT::R32_FLOAT); // Donkey Kong Tropical Freeze
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 640 && srcSurface->height == 320 && srcFormat == Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT); // SM3DW Switch Scramble Circus
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM && srcSurface->tileMode != Latte::E_GX2TILEMODE::TM_LINEAR_ALIGNED ); // Affordable Space Adventures
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 854 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM); // Affordable Space Adventures
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 1152 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT && (srcSurface->resFlag&GX2_RESFLAG_USAGE_COLOR_BUFFER) != 0 ); // Star Fox Zero
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 680 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R11_G11_B10_FLOAT && (srcSurface->resFlag&GX2_RESFLAG_USAGE_COLOR_BUFFER) != 0 ); // Star Fox Zero
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT ); // Qube
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 322 && srcSurface->height == 182 && srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_UNORM ); // Qube
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 640 && srcSurface->height == 360 && srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT ); // Qube
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1920 && srcSurface->height == 1080 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM && dstSurface->resFlag == 0x80000003); // Cosmophony
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 854 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM && dstSurface->resFlag == 0x3); // Cosmophony
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB && dstSurface->resFlag == 0x3); // The Fall
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 854 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB && dstSurface->resFlag == 0x3); // The Fall
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB && dstSurface->resFlag == 0x80000003); // The Fall
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB && srcSurface->resFlag == 0x80000003); // Nano Assault Neo
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 1280 && srcSurface->height == 720 && srcFormat == Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM); // Mario Party 10
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->imagePtr >= 0xF4000000 && srcSurface->width == 854 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R10_G10_B10_A2_UNORM); // Mario Party 10
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1920 && srcSurface->height == 1080 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM && dstSurface->resFlag == 0x3); // Hello Kitty Kruisers
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1024 && srcSurface->height == 1024 && srcFormat == Latte::E_GX2SURFFMT::R32_FLOAT && dstSurface->resFlag == 0x5); // Art Academy
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 260 && srcSurface->height == 148 && srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT && dstSurface->resFlag == 0x3); // Transformers: Rise of the Dark Spark
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1040 && srcSurface->height == 592 && srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT && dstSurface->resFlag == 0x3); // Transformers: Rise of the Dark Spark
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 854 && srcSurface->height == 480 && srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_SRGB && srcSurface->resFlag == 0x3); // Nano Assault Neo
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1024 && srcSurface->height == 576 && srcFormat == Latte::E_GX2SURFFMT::D24_S8_UNORM && srcSurface->resFlag == 0x1); // Skylanders SuperChargers
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 1152 && srcSurface->height == 648 && (srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM || srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT) && srcSurface->resFlag == 0x1); // Watch Dogs
|
||||
isDynamicTexCopy = isDynamicTexCopy || (srcSurface->width == 576 && srcSurface->height == 324 && (srcFormat == Latte::E_GX2SURFFMT::R8_G8_B8_A8_UNORM || srcFormat == Latte::E_GX2SURFFMT::R16_G16_B16_A16_FLOAT) && srcSurface->resFlag == 0x1); // Watch Dogs
|
||||
|
||||
if( isDynamicTexCopy && debugTestForceCPUCopy == false)
|
||||
{
|
||||
debug_printf("Software tex copy blocked\n");
|
||||
return;
|
||||
}
|
||||
|
||||
sint32 copyWidth = dstMipWidth;
|
||||
sint32 copyHeight = dstMipHeight;
|
||||
if (Latte::IsCompressedFormat(dstHwFormat))
|
||||
{
|
||||
copyWidth = (copyWidth + 3) / 4;
|
||||
copyHeight = (copyHeight + 3) / 4;
|
||||
}
|
||||
// copy via GPU commands
|
||||
// for simplicity and performance Cemu uses a HLE command to handle surface copies,
|
||||
// a more accurate implementation would setup actual drawcalls to copy the texture data
|
||||
GX2::GX2ReserveCmdSpace(23);
|
||||
gx2WriteGather_submit(pm4HeaderType3(IT_HLE_COPY_SURFACE_NEW, 4+9*2),
|
||||
// copy rect
|
||||
(uint32)0, // x
|
||||
(uint32)0, // y
|
||||
(uint32)copyWidth,
|
||||
(uint32)copyHeight,
|
||||
// src
|
||||
(uint32)MEMPTR<void>(srcDataPtr).GetMPTR(),
|
||||
(uint32)srcSurface->swizzle,
|
||||
(uint32)srcSurface->format.value(),
|
||||
(uint32)surfOutSrc.pitch,
|
||||
(uint32)surfOutSrc.height,
|
||||
srcSlice,
|
||||
(uint32)srcSurface->dim.value(),
|
||||
(uint32)srcTilemode,
|
||||
(uint32)srcSurface->aa,
|
||||
// dst
|
||||
(uint32)MEMPTR<void>(dstDataPtr).GetMPTR(),
|
||||
(uint32)dstSurface->swizzle,
|
||||
(uint32)dstSurface->format.value(),
|
||||
(uint32)surfOutDst.pitch,
|
||||
(uint32)surfOutDst.height,
|
||||
dstSlice,
|
||||
(uint32)dstSurface->dim.value(),
|
||||
(uint32)dstTilemode,
|
||||
(uint32)dstSurface->aa
|
||||
);
|
||||
}
|
||||
|
||||
gx2SurfaceCopySoftware(inputData, surfOutSrc.height, srcPitch, srcDepth, srcSlice, srcSwizzle, srcHwTileMode,
|
||||
outputData, surfOutDst.height, dstPitch, dstDepth, dstSlice, dstSwizzle, dstHwTileMode,
|
||||
copyWidth, copyHeight, dstBpp);
|
||||
void gx2Surface_GX2CopySurface(GX2Surface* srcSurface, uint32 srcMip, uint32 srcSlice, GX2Surface* dstSurface, uint32 dstMip, uint32 dstSlice)
|
||||
{
|
||||
GX2CopySurfaceInternal(srcSurface, srcMip, srcSlice, dstSurface, dstMip, dstSlice);
|
||||
}
|
||||
|
||||
void gx2Export_GX2CopySurface(PPCInterpreter_t* hCPU)
|
||||
@ -363,7 +336,7 @@ void gx2Export_GX2CopySurface(PPCInterpreter_t* hCPU)
|
||||
osLib_returnFromFunction(hCPU, 0);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
typedef struct
|
||||
{
|
||||
sint32 left;
|
||||
sint32 top;
|
||||
@ -471,37 +444,12 @@ void gx2Export_GX2ResolveAAColorBuffer(PPCInterpreter_t* hCPU)
|
||||
// handle format
|
||||
Latte::E_GX2SURFFMT srcFormat = srcSurface->format;
|
||||
Latte::E_GX2SURFFMT dstFormat = dstSurface->format;
|
||||
uint32 srcBPP = Latte::GetFormatBits(srcFormat);
|
||||
uint32 dstBPP = Latte::GetFormatBits(dstFormat);
|
||||
sint32 srcStepX = 1;
|
||||
sint32 srcStepY = 1;
|
||||
sint32 dstStepX = 1;
|
||||
sint32 dstStepY = 1;
|
||||
auto srcHwFormat = Latte::GetHWFormat(srcFormat);
|
||||
auto dstHwFormat = Latte::GetHWFormat(dstFormat);
|
||||
// get texture info
|
||||
LatteAddrLib::AddrSurfaceInfo_OUT surfOutSrc = {0};
|
||||
GX2::GX2CalculateSurfaceInfo(srcSurface, srcMip, &surfOutSrc);
|
||||
LatteAddrLib::AddrSurfaceInfo_OUT surfOutDst = {0};
|
||||
GX2::GX2CalculateSurfaceInfo(dstSurface, dstMip, &surfOutDst);
|
||||
// get input pointer
|
||||
uint8* inputData = NULL;
|
||||
cemu_assert(srcMip < srcSurface->numLevels);
|
||||
if( srcMip == 0 )
|
||||
inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->imagePtr);
|
||||
else if( srcMip == 1 )
|
||||
inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr);
|
||||
else
|
||||
inputData = (uint8*)memory_getPointerFromVirtualOffset(srcSurface->mipPtr+srcSurface->mipOffset[srcMip-1]);
|
||||
// get output pointer
|
||||
uint8* outputData = NULL;
|
||||
cemu_assert(dstMip < dstSurface->numLevels);
|
||||
if( dstMip == 0 )
|
||||
outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->imagePtr);
|
||||
else if( dstMip == 1 )
|
||||
outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr);
|
||||
else
|
||||
outputData = (uint8*)memory_getPointerFromVirtualOffset(dstSurface->mipPtr+dstSurface->mipOffset[dstMip-1]);
|
||||
// calculate step size for compressed textures
|
||||
if( Latte::IsCompressedFormat(srcHwFormat) )
|
||||
{
|
||||
@ -513,64 +461,17 @@ void gx2Export_GX2ResolveAAColorBuffer(PPCInterpreter_t* hCPU)
|
||||
dstStepX = 4;
|
||||
dstStepY = 4;
|
||||
}
|
||||
if( srcStepX != dstStepX || srcStepY != dstStepY )
|
||||
assert_dbg();
|
||||
cemu_assert_debug(srcStepX == dstStepX && srcStepY == dstStepY);
|
||||
|
||||
if( srcHwFormat != dstHwFormat )
|
||||
{
|
||||
// mismatching format
|
||||
debug_printf("GX2CopySurface(): Format mismatch\n");
|
||||
debug_printf("GX2ResolveAAColorBuffer(): Format mismatch\n");
|
||||
cemu_assert_unimplemented();
|
||||
osLib_returnFromFunction(hCPU, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
// src
|
||||
uint32 srcPitch = surfOutSrc.pitch;
|
||||
uint32 srcSwizzle = srcSurface->swizzle;
|
||||
uint32 srcPipeSwizzle = (srcSwizzle>>8)&1;
|
||||
uint32 srcBankSwizzle = ((srcSwizzle>>9)&3);
|
||||
uint32 srcTileMode = (uint32)surfOutSrc.hwTileMode;
|
||||
uint32 srcDepth = std::max<uint32>(surfOutSrc.depth, 1);
|
||||
// dst
|
||||
uint32 dstPitch = surfOutDst.pitch;
|
||||
uint32 dstSwizzle = dstSurface->swizzle;
|
||||
uint32 dstPipeSwizzle = (dstSwizzle>>8)&1;
|
||||
uint32 dstBankSwizzle = ((dstSwizzle>>9)&3);
|
||||
uint32 dstTileMode = (uint32)surfOutDst.hwTileMode;
|
||||
uint32 dstDepth = std::max<uint32>(surfOutDst.depth, 1);
|
||||
|
||||
// send copy command to GPU
|
||||
GX2::GX2ReserveCmdSpace(1 + 13 * 2);
|
||||
gx2WriteGather_submit(pm4HeaderType3(IT_HLE_COPY_SURFACE_NEW, 13 * 2),
|
||||
// src
|
||||
(uint32)srcSurface->imagePtr,
|
||||
(uint32)srcSurface->mipPtr,
|
||||
(uint32)srcSurface->swizzle,
|
||||
(uint32)srcSurface->format.value(),
|
||||
(uint32)srcSurface->width,
|
||||
(uint32)srcSurface->height,
|
||||
(uint32)srcSurface->depth,
|
||||
(uint32)srcSurface->pitch,
|
||||
srcSlice,
|
||||
(uint32)srcSurface->dim.value(),
|
||||
(uint32)srcSurface->tileMode.value(),
|
||||
(uint32)srcSurface->aa,
|
||||
srcMip,
|
||||
// dst
|
||||
(uint32)dstSurface->imagePtr,
|
||||
(uint32)dstSurface->mipPtr,
|
||||
(uint32)dstSurface->swizzle,
|
||||
(uint32)dstSurface->format.value(),
|
||||
(uint32)dstSurface->width,
|
||||
(uint32)dstSurface->height,
|
||||
(uint32)dstSurface->depth,
|
||||
(uint32)dstSurface->pitch,
|
||||
dstSlice,
|
||||
(uint32)dstSurface->dim.value(),
|
||||
(uint32)dstSurface->tileMode.value(),
|
||||
(uint32)dstSurface->aa,
|
||||
dstMip);
|
||||
|
||||
GX2CopySurfaceInternal(srcSurface, srcMip, srcSlice, dstSurface, dstMip, dstSlice);
|
||||
osLib_returnFromFunction(hCPU, 0);
|
||||
}
|
||||
|
||||
@ -600,58 +501,11 @@ void gx2Export_GX2ConvertDepthBufferToTextureSurface(PPCInterpreter_t* hCPU)
|
||||
return;
|
||||
}
|
||||
|
||||
// note: Do not trust values from the input GX2Surface* structs but rely on surfOutDst/surfOutSrc instead if possible.
|
||||
// src
|
||||
uint32 srcPitch = surfOutSrc.pitch;
|
||||
uint32 srcSwizzle = depthBuffer->surface.swizzle;
|
||||
uint32 srcPipeSwizzle = (srcSwizzle >> 8) & 1;
|
||||
uint32 srcBankSwizzle = ((srcSwizzle >> 9) & 3);
|
||||
uint32 srcTileMode = (uint32)surfOutSrc.hwTileMode;
|
||||
uint32 srcDepth = std::max<uint32>(surfOutSrc.depth, 1);
|
||||
// dst
|
||||
uint32 dstPitch = surfOutDst.pitch;
|
||||
uint32 dstSwizzle = dstSurface->swizzle;
|
||||
uint32 dstPipeSwizzle = (dstSwizzle >> 8) & 1;
|
||||
uint32 dstBankSwizzle = ((dstSwizzle >> 9) & 3);
|
||||
uint32 dstTileMode = (uint32)surfOutDst.hwTileMode;
|
||||
uint32 dstDepth = srcDepth;
|
||||
|
||||
sint32 srcMip = 0;
|
||||
|
||||
uint32 numSlices = std::max<uint32>(depthBuffer->viewNumSlices, 1);
|
||||
GX2::GX2ReserveCmdSpace((1 + 13 * 2) * numSlices);
|
||||
for (uint32 subSliceIndex = 0; subSliceIndex < numSlices; subSliceIndex++)
|
||||
{
|
||||
// send copy command to GPU
|
||||
gx2WriteGather_submit(pm4HeaderType3(IT_HLE_COPY_SURFACE_NEW, 13 * 2),
|
||||
// src
|
||||
(uint32)(depthBuffer->surface.imagePtr),
|
||||
(uint32)(depthBuffer->surface.mipPtr),
|
||||
(uint32)(depthBuffer->surface.swizzle),
|
||||
(uint32)(depthBuffer->surface.format.value()),
|
||||
(uint32)(depthBuffer->surface.width),
|
||||
(uint32)(depthBuffer->surface.height),
|
||||
(uint32)(depthBuffer->surface.depth),
|
||||
(uint32)(depthBuffer->surface.pitch),
|
||||
(uint32)(depthBuffer->viewFirstSlice) + subSliceIndex,
|
||||
(uint32)(depthBuffer->surface.dim.value()),
|
||||
(uint32)(depthBuffer->surface.tileMode.value()),
|
||||
(uint32)(depthBuffer->surface.aa),
|
||||
srcMip,
|
||||
// dst
|
||||
(uint32)(dstSurface->imagePtr),
|
||||
(uint32)(dstSurface->mipPtr),
|
||||
(uint32)(dstSurface->swizzle),
|
||||
(uint32)(dstSurface->format.value()),
|
||||
(uint32)(dstSurface->width),
|
||||
(uint32)(dstSurface->height),
|
||||
(uint32)(dstSurface->depth),
|
||||
(uint32)(dstSurface->pitch),
|
||||
dstSlice + subSliceIndex,
|
||||
(uint32)(dstSurface->dim.value()),
|
||||
(uint32)(dstSurface->tileMode.value()),
|
||||
(uint32)(dstSurface->aa),
|
||||
dstMip);
|
||||
GX2CopySurfaceInternal(&depthBuffer->surface, depthBuffer->viewMip.value(), depthBuffer->viewFirstSlice.value() + subSliceIndex, dstSurface, dstMip, dstSlice + subSliceIndex);
|
||||
}
|
||||
|
||||
osLib_returnFromFunction(hCPU, 0);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user