From ed5b73938ca7ffdeade3d2ca80d87088c48e9296 Mon Sep 17 00:00:00 2001 From: emiyl Date: Tue, 21 Apr 2026 00:34:30 +0100 Subject: [PATCH] Create CommonRendererCore to move LatteDraw_cleanupAfterFrame away from opengl stuffs --- src/Cafe/CMakeLists.txt | 2 + src/Cafe/HW/Latte/Core/LatteDraw.h | 2 +- src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp | 3 +- .../Renderer/Common/CommonRendererCore.cpp | 155 ++++++++++ .../Renderer/Common/CommonRendererCore.h | 98 ++++++ .../Renderer/OpenGL/OpenGLRendererCore.cpp | 284 +++++------------- 6 files changed, 332 insertions(+), 212 deletions(-) create mode 100644 src/Cafe/HW/Latte/Renderer/Common/CommonRendererCore.cpp create mode 100644 src/Cafe/HW/Latte/Renderer/Common/CommonRendererCore.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 2b9d8e28..f6b427e9 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -168,6 +168,8 @@ add_library(CemuCafe HW/Latte/Renderer/RendererOuputShader.h HW/Latte/Renderer/RendererShader.cpp HW/Latte/Renderer/RendererShader.h + HW/Latte/Renderer/Common/CommonRendererCore.cpp + HW/Latte/Renderer/Common/CommonRendererCore.h HW/Latte/ShaderInfo/ShaderDescription.cpp HW/Latte/ShaderInfo/ShaderInfo.h HW/Latte/ShaderInfo/ShaderInstanceInfo.cpp diff --git a/src/Cafe/HW/Latte/Core/LatteDraw.h b/src/Cafe/HW/Latte/Core/LatteDraw.h index 89c34dc0..37390ae1 100644 --- a/src/Cafe/HW/Latte/Core/LatteDraw.h +++ b/src/Cafe/HW/Latte/Core/LatteDraw.h @@ -1,4 +1,4 @@ #pragma once #include "Common/GLInclude/GLInclude.h" -void LatteDraw_cleanupAfterFrame(); \ No newline at end of file +void CommonRenderer_cleanupAfterFrame(); \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp b/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp index 1bdbc4bb..a466b652 100644 --- a/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp +++ b/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp @@ -10,6 +10,7 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" #include "Cafe/GraphicPack/GraphicPack2.h" +#include "HW/Latte/Renderer/Common/CommonRendererCore.h" #include "config/ActiveSettings.h" #include "WindowSystem.h" #include "Cafe/OS/libs/erreula/erreula.h" @@ -694,7 +695,7 @@ void LatteRenderTarget_itHLESwapScanBuffer() performanceMonitor.gpuTime_frameTime.beginMeasuring(); LatteTC_CleanupUnusedTextures(); - LatteDraw_cleanupAfterFrame(); + CommonRenderer_cleanupAfterFrame(); LatteQuery_CancelActiveGPU7Queries(); LatteBufferCache_notifySwapTVScanBuffer(); LattePerformanceMonitor_frameBegin(); diff --git a/src/Cafe/HW/Latte/Renderer/Common/CommonRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Common/CommonRendererCore.cpp new file mode 100644 index 00000000..c8dab968 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Common/CommonRendererCore.cpp @@ -0,0 +1,155 @@ +#include "CommonRendererCore.h" + +indexState_t indexState = { 0 }; +AttributePointerCacheEntry_t activeAttributePointer[LATTE_VS_ATTRIBUTE_LIMIT] = { 0 }; + +indexDataCacheEntry2_t* indexDataCacheBucket[INDEX_DATA_CACHE_BUCKETS] = { 0 }; +indexDataCacheEntry2_t* indexDataCacheFirst = nullptr; // points to least recently used item +indexDataCacheEntry2_t* indexDataCacheLast = nullptr; // points to most recently used item +sint32 indexDataCacheEntryCount = 0; + +uint8* CommonRenderer_getAttributePointerCacheVboOutput(uint32 attributeShaderLoc) +{ + return activeAttributePointer[attributeShaderLoc].vboOutput; +} + +void CommonRenderer_setAttributePointerCacheVboOutput(uint32 attributeShaderLoc, uint8* vboOutput) +{ + activeAttributePointer[attributeShaderLoc].vboOutput = vboOutput; +} + +uint32 CommonRenderer_getAttributePointerCacheVboStride(uint32 attributeShaderLoc) +{ + return activeAttributePointer[attributeShaderLoc].vboStride; +} + +void CommonRenderer_setAttributePointerCacheVboStride(uint32 attributeShaderLoc, uint32 vboStride) +{ + activeAttributePointer[attributeShaderLoc].vboStride = vboStride; +} + +uint8 CommonRenderer_getAttributePointerCacheDataFormat(uint32 attributeShaderLoc) +{ + return activeAttributePointer[attributeShaderLoc].dataFormat; +} + +void CommonRenderer_setAttributePointerCacheDataFormat(uint32 attributeShaderLoc, uint8 dataFormat) +{ + activeAttributePointer[attributeShaderLoc].dataFormat = dataFormat; +} + +uint8 CommonRenderer_getAttributePointerCacheNfa(uint32 attributeShaderLoc) +{ + return activeAttributePointer[attributeShaderLoc].nfa; +} + +void CommonRenderer_setAttributePointerCacheNfa(uint32 attributeShaderLoc, uint8 nfa) +{ + activeAttributePointer[attributeShaderLoc].nfa = nfa; +} + +bool CommonRenderer_getAttributePointerCacheIsSigned(uint32 attributeShaderLoc) +{ + return activeAttributePointer[attributeShaderLoc].isSigned; +} + +void CommonRenderer_setAttributePointerCacheIsSigned(uint32 attributeShaderLoc, bool isSigned) +{ + activeAttributePointer[attributeShaderLoc].isSigned = isSigned; +} + +indexState_t* CommonRenderer_getIndexState() +{ + return &indexState; +} + +indexDataCacheEntry2_t** CommonRenderer_getIndexDataCacheFirst() +{ + return &indexDataCacheFirst; +} + +indexDataCacheEntry2_t** CommonRenderer_getIndexDataCacheBucket(uint32 bucketIdx) +{ + return &indexDataCacheBucket[bucketIdx]; +} + +sint32* CommonRenderer_getIndexDataCacheEntryCount() +{ + return &indexDataCacheEntryCount; +} + +void CommonRenderer_appendToUsageLinkedList(indexDataCacheEntry2_t* entry) +{ + if (indexDataCacheLast == nullptr) + { + indexDataCacheLast = entry; + indexDataCacheFirst = entry; + entry->nextInMostRecentUsage = nullptr; + entry->prevInMostRecentUsage = nullptr; + } + else + { + indexDataCacheLast->nextInMostRecentUsage = entry; + entry->prevInMostRecentUsage = indexDataCacheLast; + entry->nextInMostRecentUsage = nullptr; + indexDataCacheLast = entry; + } +} + +void CommonRenderer_removeFromUsageLinkedList(indexDataCacheEntry2_t* entry) +{ + if (entry->prevInMostRecentUsage) + { + entry->prevInMostRecentUsage->nextInMostRecentUsage = entry->nextInMostRecentUsage; + } + else + indexDataCacheFirst = entry->nextInMostRecentUsage; + if (entry->nextInMostRecentUsage) + { + entry->nextInMostRecentUsage->prevInMostRecentUsage = entry->prevInMostRecentUsage; + } + else + indexDataCacheLast = entry->prevInMostRecentUsage; + entry->prevInMostRecentUsage = nullptr; + entry->nextInMostRecentUsage = nullptr; +} + +void CommonRenderer_removeFromBucket(indexDataCacheEntry2_t* entry) +{ + uint32 indexDataBucketIdx = (uint32)((entry->key.physAddr + entry->key.count) ^ (entry->key.physAddr >> 16)) % INDEX_DATA_CACHE_BUCKETS; + if (indexDataCacheBucket[indexDataBucketIdx] == entry) + { + indexDataCacheBucket[indexDataBucketIdx] = entry->nextInBucket; + entry->nextInBucket = nullptr; + return; + } + indexDataCacheEntry2_t* cacheEntryItr = indexDataCacheBucket[indexDataBucketIdx]; + while (cacheEntryItr) + { + if (cacheEntryItr->nextInBucket == entry) + { + cacheEntryItr->nextInBucket = entry->nextInBucket; + entry->nextInBucket = nullptr; + return; + } + // next + cacheEntryItr = cacheEntryItr->nextInBucket; + } +} + +void CommonRenderer_cleanupAfterFrame() +{ + // drop everything from cache that is older than 30 frames + uint32 frameCounter = LatteGPUState.frameCounter; + while (indexDataCacheFirst) + { + indexDataCacheEntry2_t* entry = indexDataCacheFirst; + if ((frameCounter - entry->lastAccessFrameCount) < 30) + break; + // remove entry + virtualBufferHeap_free(indexState.indexBufferVirtualHeap, entry->heapEntry); + CommonRenderer_removeFromUsageLinkedList(entry); + CommonRenderer_removeFromBucket(entry); + free(entry); + } +} \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Renderer/Common/CommonRendererCore.h b/src/Cafe/HW/Latte/Renderer/Common/CommonRendererCore.h new file mode 100644 index 00000000..be7fe519 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Common/CommonRendererCore.h @@ -0,0 +1,98 @@ +#pragma once + +#include "Cafe/HW/Latte/Core/LatteRingBuffer.h" +#include "Cafe/HW/Latte/Core/Latte.h" + +using _INDEX_TYPE = Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE; + +#define INDEX_CACHE_ENTRIES (8) + +typedef struct +{ + MPTR prevIndexDataMPTR; + sint32 prevIndexType; + sint32 prevCount; + // index data + uint8* indexData; + uint8* indexData2; + uint32 indexBufferOffset; + sint32 indexDataSize; // current size + sint32 indexDataLimit; // maximum size + // info + uint32 maxIndex; + uint32 minIndex; +}indexDataCacheEntry_t; + +typedef struct +{ + indexDataCacheEntry_t indexCacheEntry[INDEX_CACHE_ENTRIES]; + sint32 nextCacheEntryIndex; + // info about currently used index data + uint32 maxIndex; + uint32 minIndex; + uint8* indexData; + // buffer + unsigned int glIndexCacheBuffer; + VirtualBufferHeap_t* indexBufferVirtualHeap; + uint8* mappedIndexBuffer; + LatteRingBuffer_t* indexRingBuffer; + uint8* tempIndexStorage; + // misc + bool initialized; + unsigned int glActiveElementArrayBuffer; +}indexState_t; + +typedef struct +{ + uint8* vboOutput; + uint32 vboStride; + uint8 dataFormat; + uint8 nfa; + bool isSigned; +} AttributePointerCacheEntry_t; + +#define INDEX_DATA_CACHE_BUCKETS (1783) + +typedef struct +{ + MPTR physAddr; + sint32 count; + uint32 primitiveRestartIndex; + uint32 primitiveMode; +}indexDataCacheKey_t; + +typedef struct _indexDataCacheEntry_t +{ + indexDataCacheKey_t key; + _indexDataCacheEntry_t* nextInBucket; // points to next element in same bucket + uint32 physSize; + uint32 hash; + _INDEX_TYPE indexType; + //sint32 indexType; + uint32 minIndex; + uint32 maxIndex; + uint32 lastAccessFrameCount; + VirtualBufferHeapEntry_t* heapEntry; + _indexDataCacheEntry_t* nextInMostRecentUsage; // points to element which was used more recently + _indexDataCacheEntry_t* prevInMostRecentUsage; // points to element which was used less recently +}indexDataCacheEntry2_t; + +uint8* CommonRenderer_getAttributePointerCacheVboOutput(uint32 attributeShaderLoc); +void CommonRenderer_setAttributePointerCacheVboOutput(uint32 attributeShaderLoc, uint8* vboOutput); +uint32 CommonRenderer_getAttributePointerCacheVboStride(uint32 attributeShaderLoc); +void CommonRenderer_setAttributePointerCacheVboStride(uint32 attributeShaderLoc, uint32 vboStride); +uint8 CommonRenderer_getAttributePointerCacheDataFormat(uint32 attributeShaderLoc); +void CommonRenderer_setAttributePointerCacheDataFormat(uint32 attributeShaderLoc, uint8 dataFormat); +uint8 CommonRenderer_getAttributePointerCacheNfa(uint32 attributeShaderLoc); +void CommonRenderer_setAttributePointerCacheNfa(uint32 attributeShaderLoc, uint8 nfa); +bool CommonRenderer_getAttributePointerCacheIsSigned(uint32 attributeShaderLoc); +void CommonRenderer_setAttributePointerCacheIsSigned(uint32 attributeShaderLoc, bool isSigned); +indexState_t* CommonRenderer_getIndexState(); +indexDataCacheEntry2_t** CommonRenderer_getIndexDataCacheFirst(); +indexDataCacheEntry2_t** CommonRenderer_getIndexDataCacheBucket(uint32 bucketIdx); +sint32* CommonRenderer_getIndexDataCacheEntryCount(); + +void CommonRenderer_appendToUsageLinkedList(indexDataCacheEntry2_t* entry); +void CommonRenderer_removeFromUsageLinkedList(indexDataCacheEntry2_t* entry); +void CommonRenderer_removeFromBucket(indexDataCacheEntry2_t* entry); +void CommonRenderer_cleanupAfterFrame(); \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererCore.cpp index 571961f4..19c4825d 100644 --- a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererCore.cpp @@ -13,71 +13,24 @@ #include "Cafe/HW/Latte/Renderer/OpenGL/CachedFBOGL.h" #include "Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h" +#include "Cafe/HW/Latte/Renderer/Common/CommonRendererCore.h" + #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/OS/libs/gx2/GX2.h" #include "Cafe/GameProfile/GameProfile.h" #include "config/ActiveSettings.h" - -using _INDEX_TYPE = Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE; - GLenum sGLActiveDrawMode = 0; extern bool hasValidFramebufferAttached; -#define INDEX_CACHE_ENTRIES (8) - -typedef struct -{ - MPTR prevIndexDataMPTR; - sint32 prevIndexType; - sint32 prevCount; - // index data - uint8* indexData; - uint8* indexData2; - uint32 indexBufferOffset; - sint32 indexDataSize; // current size - sint32 indexDataLimit; // maximum size - // info - uint32 maxIndex; - uint32 minIndex; -}indexDataCacheEntry_t; - -struct -{ - indexDataCacheEntry_t indexCacheEntry[INDEX_CACHE_ENTRIES]; - sint32 nextCacheEntryIndex; - // info about currently used index data - uint32 maxIndex; - uint32 minIndex; - uint8* indexData; - // buffer - GLuint glIndexCacheBuffer; - VirtualBufferHeap_t* indexBufferVirtualHeap; - uint8* mappedIndexBuffer; - LatteRingBuffer_t* indexRingBuffer; - uint8* tempIndexStorage; - // misc - bool initialized; - GLuint glActiveElementArrayBuffer; -}indexState = { 0 }; - -struct -{ - uint8* vboOutput; - uint32 vboStride; - uint8 dataFormat; - uint8 nfa; - bool isSigned; -}activeAttributePointer[LATTE_VS_ATTRIBUTE_LIMIT] = { 0 }; - void LatteDraw_resetAttributePointerCache() { for (sint32 i = 0; i < LATTE_VS_ATTRIBUTE_LIMIT; i++) { - activeAttributePointer[i].vboOutput = (uint8*)-1; - activeAttributePointer[i].vboStride = (uint32)-1; + CommonRenderer_setAttributePointerCacheVboOutput(i, (uint8*)-1); + CommonRenderer_setAttributePointerCacheVboStride(i, (uint32)-1); } } @@ -87,15 +40,21 @@ void _setAttributeBufferPointerRaw(uint32 attributeShaderLoc, uint8* buffer, uin bool isSigned = attrib->isSigned != 0; uint8 nfa = attrib->nfa; // don't call glVertexAttribIPointer if parameters have not changed - if (activeAttributePointer[attributeShaderLoc].vboOutput == vboOutput && activeAttributePointer[attributeShaderLoc].vboStride == vboStride && activeAttributePointer[attributeShaderLoc].dataFormat == dataFormat && activeAttributePointer[attributeShaderLoc].nfa == nfa && activeAttributePointer[attributeShaderLoc].isSigned == isSigned) + if ( + CommonRenderer_getAttributePointerCacheVboOutput(attributeShaderLoc) == vboOutput && + CommonRenderer_getAttributePointerCacheVboStride(attributeShaderLoc) == vboStride && + CommonRenderer_getAttributePointerCacheDataFormat(attributeShaderLoc) == dataFormat && + CommonRenderer_getAttributePointerCacheNfa(attributeShaderLoc) == nfa && + CommonRenderer_getAttributePointerCacheIsSigned(attributeShaderLoc) == isSigned + ) { return; } - activeAttributePointer[attributeShaderLoc].vboOutput = vboOutput; - activeAttributePointer[attributeShaderLoc].vboStride = vboStride; - activeAttributePointer[attributeShaderLoc].dataFormat = dataFormat; - activeAttributePointer[attributeShaderLoc].nfa = nfa; - activeAttributePointer[attributeShaderLoc].isSigned = isSigned; + CommonRenderer_setAttributePointerCacheVboOutput(attributeShaderLoc, vboOutput); + CommonRenderer_setAttributePointerCacheVboStride(attributeShaderLoc, vboStride); + CommonRenderer_setAttributePointerCacheDataFormat(attributeShaderLoc, dataFormat); + CommonRenderer_setAttributePointerCacheNfa(attributeShaderLoc, nfa); + CommonRenderer_setAttributePointerCacheIsSigned(attributeShaderLoc, isSigned); // setup attribute pointer if (dataFormat == FMT_32_32_32_32_FLOAT || dataFormat == FMT_32_32_32_32) { @@ -194,111 +153,23 @@ void OpenGLRenderer::SetAttributeArrayState(uint32 index, bool isEnabled, sint32 // Sets the currently active element array buffer and binds it void OpenGLRenderer::SetArrayElementBuffer(GLuint arrayElementBuffer) { - if (arrayElementBuffer == indexState.glActiveElementArrayBuffer) + indexState_t* indexState = CommonRenderer_getIndexState(); + if (arrayElementBuffer == indexState->glActiveElementArrayBuffer) return; glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, arrayElementBuffer); - indexState.glActiveElementArrayBuffer = arrayElementBuffer; -} - -typedef struct -{ - MPTR physAddr; - sint32 count; - uint32 primitiveRestartIndex; - uint32 primitiveMode; -}indexDataCacheKey_t; - -typedef struct _indexDataCacheEntry_t -{ - indexDataCacheKey_t key; - _indexDataCacheEntry_t* nextInBucket; // points to next element in same bucket - uint32 physSize; - uint32 hash; - _INDEX_TYPE indexType; - //sint32 indexType; - uint32 minIndex; - uint32 maxIndex; - uint32 lastAccessFrameCount; - VirtualBufferHeapEntry_t* heapEntry; - _indexDataCacheEntry_t* nextInMostRecentUsage; // points to element which was used more recently - _indexDataCacheEntry_t* prevInMostRecentUsage; // points to element which was used less recently -}indexDataCacheEntry2_t; - -#define INDEX_DATA_CACHE_BUCKETS (1783) - -indexDataCacheEntry2_t* indexDataCacheBucket[INDEX_DATA_CACHE_BUCKETS] = { 0 }; -indexDataCacheEntry2_t* indexDataCacheFirst = nullptr; // points to least recently used item -indexDataCacheEntry2_t* indexDataCacheLast = nullptr; // points to most recently used item -sint32 indexDataCacheEntryCount = 0; - -void _appendToUsageLinkedList(indexDataCacheEntry2_t* entry) -{ - if (indexDataCacheLast == nullptr) - { - indexDataCacheLast = entry; - indexDataCacheFirst = entry; - entry->nextInMostRecentUsage = nullptr; - entry->prevInMostRecentUsage = nullptr; - } - else - { - indexDataCacheLast->nextInMostRecentUsage = entry; - entry->prevInMostRecentUsage = indexDataCacheLast; - entry->nextInMostRecentUsage = nullptr; - indexDataCacheLast = entry; - } -} - -void _removeFromUsageLinkedList(indexDataCacheEntry2_t* entry) -{ - if (entry->prevInMostRecentUsage) - { - entry->prevInMostRecentUsage->nextInMostRecentUsage = entry->nextInMostRecentUsage; - } - else - indexDataCacheFirst = entry->nextInMostRecentUsage; - if (entry->nextInMostRecentUsage) - { - entry->nextInMostRecentUsage->prevInMostRecentUsage = entry->prevInMostRecentUsage; - } - else - indexDataCacheLast = entry->prevInMostRecentUsage; - entry->prevInMostRecentUsage = nullptr; - entry->nextInMostRecentUsage = nullptr; -} - -void _removeFromBucket(indexDataCacheEntry2_t* entry) -{ - uint32 indexDataBucketIdx = (uint32)((entry->key.physAddr + entry->key.count) ^ (entry->key.physAddr >> 16)) % INDEX_DATA_CACHE_BUCKETS; - if (indexDataCacheBucket[indexDataBucketIdx] == entry) - { - indexDataCacheBucket[indexDataBucketIdx] = entry->nextInBucket; - entry->nextInBucket = nullptr; - return; - } - indexDataCacheEntry2_t* cacheEntryItr = indexDataCacheBucket[indexDataBucketIdx]; - while (cacheEntryItr) - { - if (cacheEntryItr->nextInBucket == entry) - { - cacheEntryItr->nextInBucket = entry->nextInBucket; - entry->nextInBucket = nullptr; - return; - } - // next - cacheEntryItr = cacheEntryItr->nextInBucket; - } + indexState->glActiveElementArrayBuffer = arrayElementBuffer; } void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry) { uint32 count = cacheEntry->key.count; uint32 primitiveRestartIndex = cacheEntry->key.primitiveRestartIndex; + indexState_t *indexState = CommonRenderer_getIndexState(); if (cacheEntry->indexType == _INDEX_TYPE::U16_BE) { // 16bit indices uint16* indexInputU16 = (uint16*)memory_getPointerFromPhysicalOffset(cacheEntry->key.physAddr); - uint16* indexOutputU16 = (uint16*)indexState.tempIndexStorage; + uint16* indexOutputU16 = (uint16*)indexState->tempIndexStorage; cemu_assert_debug(count != 0); uint16 indexMinU16 = 0xFFFF; uint16 indexMaxU16 = 0; @@ -334,14 +205,14 @@ void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry) } cacheEntry->minIndex = indexMinU16; cacheEntry->maxIndex = indexMaxU16; - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint16), indexState.tempIndexStorage); + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint16), indexState->tempIndexStorage); performanceMonitor.cycle[performanceMonitor.cycleIndex].indexDataUploaded += (count * sizeof(uint16)); } else if(cacheEntry->indexType == _INDEX_TYPE::U32_BE) { // 32bit indices uint32* indexInputU32 = (uint32*)memory_getPointerFromPhysicalOffset(cacheEntry->key.physAddr); - uint32* indexOutputU32 = (uint32*)indexState.tempIndexStorage; + uint32* indexOutputU32 = (uint32*)indexState->tempIndexStorage; cemu_assert_debug(count != 0); uint32 indexMinU32 = _swapEndianU32(*indexInputU32); uint32 indexMaxU32 = _swapEndianU32(*indexInputU32); @@ -359,7 +230,7 @@ void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry) } cacheEntry->minIndex = indexMinU32; cacheEntry->maxIndex = indexMaxU32; - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint32), indexState.tempIndexStorage); + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint32), indexState->tempIndexStorage); performanceMonitor.cycle[performanceMonitor.cycleIndex].indexDataUploaded += (count * sizeof(uint32)); } else @@ -368,33 +239,18 @@ void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry) } } - -void LatteDraw_cleanupAfterFrame() -{ - // drop everything from cache that is older than 30 frames - uint32 frameCounter = LatteGPUState.frameCounter; - while (indexDataCacheFirst) - { - indexDataCacheEntry2_t* entry = indexDataCacheFirst; - if ((frameCounter - entry->lastAccessFrameCount) < 30) - break; - // remove entry - virtualBufferHeap_free(indexState.indexBufferVirtualHeap, entry->heapEntry); - _removeFromUsageLinkedList(entry); - _removeFromBucket(entry); - free(entry); - } -} - void LatteDrawGL_removeLeastRecentlyUsedIndexCacheEntries(sint32 count) { - while (indexDataCacheFirst && count > 0) + indexState_t *indexState = CommonRenderer_getIndexState(); + indexDataCacheEntry2_t** indexDataCacheFirst = CommonRenderer_getIndexDataCacheFirst(); + + while (*indexDataCacheFirst && count > 0) { - indexDataCacheEntry2_t* entry = indexDataCacheFirst; + indexDataCacheEntry2_t* entry = *indexDataCacheFirst; // remove entry - virtualBufferHeap_free(indexState.indexBufferVirtualHeap, entry->heapEntry); - _removeFromUsageLinkedList(entry); - _removeFromBucket(entry); + virtualBufferHeap_free(indexState->indexBufferVirtualHeap, entry->heapEntry); + CommonRenderer_removeFromUsageLinkedList(entry); + CommonRenderer_removeFromBucket(entry); free(entry); count--; } @@ -432,19 +288,22 @@ uint32 LatteDrawGL_calculateIndexDataHash(uint8* data, uint32 size) // todo - Outdated cache implementation. Update OpenGL renderer to use the generic implementation that is also used by the Vulkan renderer void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE indexType, sint32 count, sint32 primitiveMode) { + indexState_t* indexState = CommonRenderer_getIndexState(); + indexDataCacheEntry2_t** indexDataCacheFirst = CommonRenderer_getIndexDataCacheFirst(); + if (indexType == _INDEX_TYPE::AUTO) { - indexState.minIndex = 0; - indexState.maxIndex = count - 1; + indexState->minIndex = 0; + indexState->maxIndex = count - 1; // since no indices are used we don't need to unbind the element array buffer return; // automatic indices } - OpenGLRenderer::SetArrayElementBuffer(indexState.glIndexCacheBuffer); + OpenGLRenderer::SetArrayElementBuffer(indexState->glIndexCacheBuffer); uint32 indexDataBucketIdx = (uint32)((indexDataMPTR + count) ^ (indexDataMPTR >> 16)) % INDEX_DATA_CACHE_BUCKETS; // find matching entry uint32 primitiveRestartIndex = LatteGPUState.contextNew.VGT_MULTI_PRIM_IB_RESET_INDX.get_RESTART_INDEX(); - indexDataCacheEntry2_t* cacheEntryItr = indexDataCacheBucket[indexDataBucketIdx]; + indexDataCacheEntry2_t* cacheEntryItr = *CommonRenderer_getIndexDataCacheBucket(indexDataBucketIdx); indexDataCacheKey_t compareKey; compareKey.physAddr = indexDataMPTR; compareKey.count = count; @@ -459,9 +318,9 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde continue; } // entry found - indexState.minIndex = cacheEntryItr->minIndex; - indexState.maxIndex = cacheEntryItr->maxIndex; - indexState.indexData = (uint8*)(size_t)cacheEntryItr->heapEntry->startOffset; + indexState->minIndex = cacheEntryItr->minIndex; + indexState->maxIndex = cacheEntryItr->maxIndex; + indexState->indexData = (uint8*)(size_t)cacheEntryItr->heapEntry->startOffset; cacheEntryItr->lastAccessFrameCount = LatteGPUState.frameCounter; // check if the data changed uint32 h = LatteDrawGL_calculateIndexDataHash(memory_getPointerFromPhysicalOffset(indexDataMPTR), cacheEntryItr->physSize); @@ -472,8 +331,8 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde cacheEntryItr->hash = h; } // move entry to the front - _removeFromUsageLinkedList(cacheEntryItr); - _appendToUsageLinkedList(cacheEntryItr); + CommonRenderer_removeFromUsageLinkedList(cacheEntryItr); + CommonRenderer_appendToUsageLinkedList(cacheEntryItr); return; } // calculate size of index data in cache @@ -483,16 +342,16 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde else cacheIndexDataSize = count * sizeof(uint32); // no matching entry, create new one - VirtualBufferHeapEntry_t* heapEntry = virtualBufferHeap_allocate(indexState.indexBufferVirtualHeap, cacheIndexDataSize); + VirtualBufferHeapEntry_t* heapEntry = virtualBufferHeap_allocate(indexState->indexBufferVirtualHeap, cacheIndexDataSize); if (heapEntry == nullptr) { while (true) { LatteDrawGL_removeLeastRecentlyUsedIndexCacheEntries(10); - heapEntry = virtualBufferHeap_allocate(indexState.indexBufferVirtualHeap, cacheIndexDataSize); + heapEntry = virtualBufferHeap_allocate(indexState->indexBufferVirtualHeap, cacheIndexDataSize); if (heapEntry != nullptr) break; - if (indexDataCacheFirst == nullptr) + if (*indexDataCacheFirst == nullptr) { cemuLog_log(LogType::Force, "Unable to allocate entry in index cache"); assert_dbg(); @@ -511,17 +370,19 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde cacheEntry->heapEntry = heapEntry; cacheEntry->lastAccessFrameCount = LatteGPUState.frameCounter; // append entry in bucket list - cacheEntry->nextInBucket = indexDataCacheBucket[indexDataBucketIdx]; - indexDataCacheBucket[indexDataBucketIdx] = cacheEntry; + indexDataCacheEntry2_t** bucket = CommonRenderer_getIndexDataCacheBucket(indexDataBucketIdx); + cacheEntry->nextInBucket = *bucket; + *bucket = cacheEntry; // append as most recently used entry - _appendToUsageLinkedList(cacheEntry); + CommonRenderer_appendToUsageLinkedList(cacheEntry); // decode and upload the data _decodeAndUploadIndexData(cacheEntry); - indexDataCacheEntryCount++; - indexState.minIndex = cacheEntry->minIndex; - indexState.maxIndex = cacheEntry->maxIndex; - indexState.indexData = (uint8*)(size_t)cacheEntry->heapEntry->startOffset; + sint32* indexDataCacheEntryCount = CommonRenderer_getIndexDataCacheEntryCount(); + (*indexDataCacheEntryCount)++; + indexState->minIndex = cacheEntry->minIndex; + indexState->maxIndex = cacheEntry->maxIndex; + indexState->indexData = (uint8*)(size_t)cacheEntry->heapEntry->startOffset; } void LatteDraw_handleSpecialState8_clearAsDepth() @@ -590,19 +451,20 @@ void LatteDraw_handleSpecialState8_clearAsDepth() void LatteDrawGL_doDraw(_INDEX_TYPE indexType, uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count) { + indexState_t* indexState = CommonRenderer_getIndexState(); if (indexType == _INDEX_TYPE::U16_BE) { // 16bit index, big endian if (instanceCount > 1 || baseInstance != 0) { - glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_SHORT, indexState.indexData, instanceCount, baseVertex, baseInstance); + glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_SHORT, indexState->indexData, instanceCount, baseVertex, baseInstance); } else { if (baseVertex != 0) - glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState.minIndex, indexState.maxIndex, count, GL_UNSIGNED_SHORT, indexState.indexData, baseVertex); + glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState->minIndex, indexState->maxIndex, count, GL_UNSIGNED_SHORT, indexState->indexData, baseVertex); else - glDrawRangeElements(sGLActiveDrawMode, indexState.minIndex, indexState.maxIndex, count, GL_UNSIGNED_SHORT, indexState.indexData); + glDrawRangeElements(sGLActiveDrawMode, indexState->minIndex, indexState->maxIndex, count, GL_UNSIGNED_SHORT, indexState->indexData); } } else if (indexType == _INDEX_TYPE::U32_BE) @@ -611,11 +473,11 @@ void LatteDrawGL_doDraw(_INDEX_TYPE indexType, uint32 baseVertex, uint32 baseIns if (instanceCount > 1 || baseInstance != 0) { //debug_printf("Render instanced\n"); - glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_INT, indexState.indexData, instanceCount, baseVertex, baseInstance); + glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_INT, indexState->indexData, instanceCount, baseVertex, baseInstance); } else { - glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState.minIndex, indexState.maxIndex, count, GL_UNSIGNED_INT, indexState.indexData, baseVertex); + glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState->minIndex, indexState->maxIndex, count, GL_UNSIGNED_INT, indexState->indexData, baseVertex); } } else if (indexType == _INDEX_TYPE::AUTO) @@ -1040,7 +902,8 @@ void OpenGLRenderer::draw_genericDrawHandler(uint32 baseVertex, uint32 baseInsta endPerfMonProfiling(performanceMonitor.gpuTime_dcStageIndexMgr); // synchronize vertex and uniform buffers - LatteBufferCache_Sync(indexState.minIndex + baseVertex, indexState.maxIndex + baseVertex, baseInstance, instanceCount); + indexState_t *indexState = CommonRenderer_getIndexState(); + LatteBufferCache_Sync(indexState->minIndex + baseVertex, indexState->maxIndex + baseVertex, baseInstance, instanceCount); _setupVertexAttributes(); @@ -1231,23 +1094,24 @@ void OpenGLRenderer::draw_endSequence() void OpenGLRenderer::draw_init() { - if (indexState.initialized) + indexState_t* indexState = CommonRenderer_getIndexState(); + if (indexState->initialized) return; - indexState.initialized = true; + indexState->initialized = true; // create index buffer - glGenBuffers(1, &indexState.glIndexCacheBuffer); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indexState.glIndexCacheBuffer); + glGenBuffers(1, &indexState->glIndexCacheBuffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indexState->glIndexCacheBuffer); glBufferData(GL_ELEMENT_ARRAY_BUFFER, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR, NULL, GL_DYNAMIC_DRAW); #if BOOST_OS_WINDOWS - indexState.mappedIndexBuffer = (uint8*)_aligned_malloc(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR, 256); + indexState->mappedIndexBuffer = (uint8*)_aligned_malloc(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR, 256); #else - indexState.mappedIndexBuffer = (uint8*)aligned_alloc(256, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); + indexState->mappedIndexBuffer = (uint8*)aligned_alloc(256, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); #endif glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); - indexState.indexRingBuffer = LatteRingBuffer_create(indexState.mappedIndexBuffer, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); - indexState.tempIndexStorage = (uint8*)malloc(1024 * 1024 * 8); + indexState->indexRingBuffer = LatteRingBuffer_create(indexState->mappedIndexBuffer, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); + indexState->tempIndexStorage = (uint8*)malloc(1024 * 1024 * 8); // create virtual heap for index buffer - indexState.indexBufferVirtualHeap = virtualBufferHeap_create(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); + indexState->indexBufferVirtualHeap = virtualBufferHeap_create(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); } void OpenGLRenderer::bufferCache_upload(uint8* buffer, sint32 size, uint32 bufferOffset)