From a9f8e6430d2f0d6b1efb9bc5e5c8f7cbf6c0a4de Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Tue, 21 Apr 2026 13:50:38 +0200 Subject: [PATCH] fix linux build without vulkan --- src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp | 2 + .../Renderer/OpenGL/OpenGLRendererCore.cpp | 281 ++++++++++++++---- src/Cafe/HW/Latte/Renderer/RendererCore.cpp | 191 ++++-------- src/Cafe/HW/Latte/Renderer/RendererCore.h | 86 ------ .../Vulkan/VulkanPipelineCompiler.cpp | 50 ---- src/main.cpp | 2 +- 6 files changed, 274 insertions(+), 338 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp b/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp index 96b7316b..2edcf4ea 100644 --- a/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp +++ b/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp @@ -695,7 +695,9 @@ void LatteRenderTarget_itHLESwapScanBuffer() performanceMonitor.gpuTime_frameTime.beginMeasuring(); LatteTC_CleanupUnusedTextures(); +#ifdef ENABLE_OPENGL LatteDraw_cleanupAfterFrame(); +#endif LatteQuery_CancelActiveGPU7Queries(); LatteBufferCache_notifySwapTVScanBuffer(); LattePerformanceMonitor_frameBegin(); diff --git a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererCore.cpp index 8e190743..2338201d 100644 --- a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRendererCore.cpp @@ -13,21 +13,73 @@ #include "Cafe/HW/Latte/Renderer/OpenGL/CachedFBOGL.h" #include "Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h" -#include "Cafe/HW/Latte/Renderer/RendererCore.h" - #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/OS/libs/gx2/GX2.h" #include "Cafe/GameProfile/GameProfile.h" +#include "HW/Latte/Renderer/RendererCore.h" #include "config/ActiveSettings.h" + +using _INDEX_TYPE = Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE; + GLenum sGLActiveDrawMode = 0; extern bool hasValidFramebufferAttached; +#define INDEX_CACHE_ENTRIES (8) + +typedef struct +{ + MPTR prevIndexDataMPTR; + sint32 prevIndexType; + sint32 prevCount; + // index data + uint8* indexData; + uint8* indexData2; + uint32 indexBufferOffset; + sint32 indexDataSize; // current size + sint32 indexDataLimit; // maximum size + // info + uint32 maxIndex; + uint32 minIndex; +}indexDataCacheEntry_t; + +struct +{ + indexDataCacheEntry_t indexCacheEntry[INDEX_CACHE_ENTRIES]; + sint32 nextCacheEntryIndex; + // info about currently used index data + uint32 maxIndex; + uint32 minIndex; + uint8* indexData; + // buffer + GLuint glIndexCacheBuffer; + VirtualBufferHeap_t* indexBufferVirtualHeap; + uint8* mappedIndexBuffer; + LatteRingBuffer_t* indexRingBuffer; + uint8* tempIndexStorage; + // misc + bool initialized; + GLuint glActiveElementArrayBuffer; +}indexState = { 0 }; + +struct +{ + uint8* vboOutput; + uint32 vboStride; + uint8 dataFormat; + uint8 nfa; + bool isSigned; +}activeAttributePointer[LATTE_VS_ATTRIBUTE_LIMIT] = { 0 }; + void LatteDraw_resetAttributePointerCache() { - RendererCore_resetAttributePointerCache(); + for (sint32 i = 0; i < LATTE_VS_ATTRIBUTE_LIMIT; i++) + { + activeAttributePointer[i].vboOutput = (uint8*)-1; + activeAttributePointer[i].vboStride = (uint32)-1; + } } void _setAttributeBufferPointerRaw(uint32 attributeShaderLoc, uint8* buffer, uint32 bufferSize, uint32 stride, LatteParsedFetchShaderAttribute_t* attrib, uint8* vboOutput, uint32 vboStride) @@ -36,11 +88,15 @@ void _setAttributeBufferPointerRaw(uint32 attributeShaderLoc, uint8* buffer, uin bool isSigned = attrib->isSigned != 0; uint8 nfa = attrib->nfa; // don't call glVertexAttribIPointer if parameters have not changed - bool attributePointerChanged = RendererCore_checkIfAttributePointerCacheChanged(attributeShaderLoc, vboOutput, vboStride, dataFormat, nfa, isSigned); - if (!attributePointerChanged) { + if (activeAttributePointer[attributeShaderLoc].vboOutput == vboOutput && activeAttributePointer[attributeShaderLoc].vboStride == vboStride && activeAttributePointer[attributeShaderLoc].dataFormat == dataFormat && activeAttributePointer[attributeShaderLoc].nfa == nfa && activeAttributePointer[attributeShaderLoc].isSigned == isSigned) + { return; } - + activeAttributePointer[attributeShaderLoc].vboOutput = vboOutput; + activeAttributePointer[attributeShaderLoc].vboStride = vboStride; + activeAttributePointer[attributeShaderLoc].dataFormat = dataFormat; + activeAttributePointer[attributeShaderLoc].nfa = nfa; + activeAttributePointer[attributeShaderLoc].isSigned = isSigned; // setup attribute pointer if (dataFormat == FMT_32_32_32_32_FLOAT || dataFormat == FMT_32_32_32_32) { @@ -139,23 +195,111 @@ void OpenGLRenderer::SetAttributeArrayState(uint32 index, bool isEnabled, sint32 // Sets the currently active element array buffer and binds it void OpenGLRenderer::SetArrayElementBuffer(GLuint arrayElementBuffer) { - indexState_t* indexState = RendererCore_getIndexState(); - if (arrayElementBuffer == indexState->glActiveElementArrayBuffer) + if (arrayElementBuffer == indexState.glActiveElementArrayBuffer) return; glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, arrayElementBuffer); - indexState->glActiveElementArrayBuffer = arrayElementBuffer; + indexState.glActiveElementArrayBuffer = arrayElementBuffer; +} + +typedef struct +{ + MPTR physAddr; + sint32 count; + uint32 primitiveRestartIndex; + uint32 primitiveMode; +}indexDataCacheKey_t; + +typedef struct _indexDataCacheEntry_t +{ + indexDataCacheKey_t key; + _indexDataCacheEntry_t* nextInBucket; // points to next element in same bucket + uint32 physSize; + uint32 hash; + _INDEX_TYPE indexType; + //sint32 indexType; + uint32 minIndex; + uint32 maxIndex; + uint32 lastAccessFrameCount; + VirtualBufferHeapEntry_t* heapEntry; + _indexDataCacheEntry_t* nextInMostRecentUsage; // points to element which was used more recently + _indexDataCacheEntry_t* prevInMostRecentUsage; // points to element which was used less recently +}indexDataCacheEntry2_t; + +#define INDEX_DATA_CACHE_BUCKETS (1783) + +indexDataCacheEntry2_t* indexDataCacheBucket[INDEX_DATA_CACHE_BUCKETS] = { 0 }; +indexDataCacheEntry2_t* indexDataCacheFirst = nullptr; // points to least recently used item +indexDataCacheEntry2_t* indexDataCacheLast = nullptr; // points to most recently used item +sint32 indexDataCacheEntryCount = 0; + +void _appendToUsageLinkedList(indexDataCacheEntry2_t* entry) +{ + if (indexDataCacheLast == nullptr) + { + indexDataCacheLast = entry; + indexDataCacheFirst = entry; + entry->nextInMostRecentUsage = nullptr; + entry->prevInMostRecentUsage = nullptr; + } + else + { + indexDataCacheLast->nextInMostRecentUsage = entry; + entry->prevInMostRecentUsage = indexDataCacheLast; + entry->nextInMostRecentUsage = nullptr; + indexDataCacheLast = entry; + } +} + +void _removeFromUsageLinkedList(indexDataCacheEntry2_t* entry) +{ + if (entry->prevInMostRecentUsage) + { + entry->prevInMostRecentUsage->nextInMostRecentUsage = entry->nextInMostRecentUsage; + } + else + indexDataCacheFirst = entry->nextInMostRecentUsage; + if (entry->nextInMostRecentUsage) + { + entry->nextInMostRecentUsage->prevInMostRecentUsage = entry->prevInMostRecentUsage; + } + else + indexDataCacheLast = entry->prevInMostRecentUsage; + entry->prevInMostRecentUsage = nullptr; + entry->nextInMostRecentUsage = nullptr; +} + +void _removeFromBucket(indexDataCacheEntry2_t* entry) +{ + uint32 indexDataBucketIdx = (uint32)((entry->key.physAddr + entry->key.count) ^ (entry->key.physAddr >> 16)) % INDEX_DATA_CACHE_BUCKETS; + if (indexDataCacheBucket[indexDataBucketIdx] == entry) + { + indexDataCacheBucket[indexDataBucketIdx] = entry->nextInBucket; + entry->nextInBucket = nullptr; + return; + } + indexDataCacheEntry2_t* cacheEntryItr = indexDataCacheBucket[indexDataBucketIdx]; + while (cacheEntryItr) + { + if (cacheEntryItr->nextInBucket == entry) + { + cacheEntryItr->nextInBucket = entry->nextInBucket; + entry->nextInBucket = nullptr; + return; + } + // next + cacheEntryItr = cacheEntryItr->nextInBucket; + } } void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry) { uint32 count = cacheEntry->key.count; uint32 primitiveRestartIndex = cacheEntry->key.primitiveRestartIndex; - indexState_t *indexState = RendererCore_getIndexState(); if (cacheEntry->indexType == _INDEX_TYPE::U16_BE) { // 16bit indices uint16* indexInputU16 = (uint16*)memory_getPointerFromPhysicalOffset(cacheEntry->key.physAddr); - uint16* indexOutputU16 = (uint16*)indexState->tempIndexStorage; + uint16* indexOutputU16 = (uint16*)indexState.tempIndexStorage; cemu_assert_debug(count != 0); uint16 indexMinU16 = 0xFFFF; uint16 indexMaxU16 = 0; @@ -191,14 +335,14 @@ void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry) } cacheEntry->minIndex = indexMinU16; cacheEntry->maxIndex = indexMaxU16; - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint16), indexState->tempIndexStorage); + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint16), indexState.tempIndexStorage); performanceMonitor.cycle[performanceMonitor.cycleIndex].indexDataUploaded += (count * sizeof(uint16)); } else if(cacheEntry->indexType == _INDEX_TYPE::U32_BE) { // 32bit indices uint32* indexInputU32 = (uint32*)memory_getPointerFromPhysicalOffset(cacheEntry->key.physAddr); - uint32* indexOutputU32 = (uint32*)indexState->tempIndexStorage; + uint32* indexOutputU32 = (uint32*)indexState.tempIndexStorage; cemu_assert_debug(count != 0); uint32 indexMinU32 = _swapEndianU32(*indexInputU32); uint32 indexMaxU32 = _swapEndianU32(*indexInputU32); @@ -216,7 +360,7 @@ void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry) } cacheEntry->minIndex = indexMinU32; cacheEntry->maxIndex = indexMaxU32; - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint32), indexState->tempIndexStorage); + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint32), indexState.tempIndexStorage); performanceMonitor.cycle[performanceMonitor.cycleIndex].indexDataUploaded += (count * sizeof(uint32)); } else @@ -225,18 +369,35 @@ void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry) } } + +void LatteDraw_cleanupAfterFrame() +{ + if (g_renderer->GetType() != RendererAPI::OpenGL) + return; + // drop everything from cache that is older than 30 frames + uint32 frameCounter = LatteGPUState.frameCounter; + while (indexDataCacheFirst) + { + indexDataCacheEntry2_t* entry = indexDataCacheFirst; + if ((frameCounter - entry->lastAccessFrameCount) < 30) + break; + // remove entry + virtualBufferHeap_free(indexState.indexBufferVirtualHeap, entry->heapEntry); + _removeFromUsageLinkedList(entry); + _removeFromBucket(entry); + free(entry); + } +} + void LatteDrawGL_removeLeastRecentlyUsedIndexCacheEntries(sint32 count) { - indexState_t *indexState = RendererCore_getIndexState(); - indexDataCacheEntry2_t** indexDataCacheFirst = RendererCore_getIndexDataCacheFirst(); - - while (*indexDataCacheFirst && count > 0) + while (indexDataCacheFirst && count > 0) { - indexDataCacheEntry2_t* entry = *indexDataCacheFirst; + indexDataCacheEntry2_t* entry = indexDataCacheFirst; // remove entry - virtualBufferHeap_free(indexState->indexBufferVirtualHeap, entry->heapEntry); - RendererCore_removeFromUsageLinkedList(entry); - RendererCore_removeFromBucket(entry); + virtualBufferHeap_free(indexState.indexBufferVirtualHeap, entry->heapEntry); + _removeFromUsageLinkedList(entry); + _removeFromBucket(entry); free(entry); count--; } @@ -274,22 +435,19 @@ uint32 LatteDrawGL_calculateIndexDataHash(uint8* data, uint32 size) // todo - Outdated cache implementation. Update OpenGL renderer to use the generic implementation that is also used by the Vulkan renderer void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE indexType, sint32 count, sint32 primitiveMode) { - indexState_t* indexState = RendererCore_getIndexState(); - indexDataCacheEntry2_t** indexDataCacheFirst = RendererCore_getIndexDataCacheFirst(); - if (indexType == _INDEX_TYPE::AUTO) { - indexState->minIndex = 0; - indexState->maxIndex = count - 1; + indexState.minIndex = 0; + indexState.maxIndex = count - 1; // since no indices are used we don't need to unbind the element array buffer return; // automatic indices } - OpenGLRenderer::SetArrayElementBuffer(indexState->glIndexCacheBuffer); + OpenGLRenderer::SetArrayElementBuffer(indexState.glIndexCacheBuffer); uint32 indexDataBucketIdx = (uint32)((indexDataMPTR + count) ^ (indexDataMPTR >> 16)) % INDEX_DATA_CACHE_BUCKETS; // find matching entry uint32 primitiveRestartIndex = LatteGPUState.contextNew.VGT_MULTI_PRIM_IB_RESET_INDX.get_RESTART_INDEX(); - indexDataCacheEntry2_t* cacheEntryItr = *RendererCore_getIndexDataCacheBucket(indexDataBucketIdx); + indexDataCacheEntry2_t* cacheEntryItr = indexDataCacheBucket[indexDataBucketIdx]; indexDataCacheKey_t compareKey; compareKey.physAddr = indexDataMPTR; compareKey.count = count; @@ -304,9 +462,9 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde continue; } // entry found - indexState->minIndex = cacheEntryItr->minIndex; - indexState->maxIndex = cacheEntryItr->maxIndex; - indexState->indexData = (uint8*)(size_t)cacheEntryItr->heapEntry->startOffset; + indexState.minIndex = cacheEntryItr->minIndex; + indexState.maxIndex = cacheEntryItr->maxIndex; + indexState.indexData = (uint8*)(size_t)cacheEntryItr->heapEntry->startOffset; cacheEntryItr->lastAccessFrameCount = LatteGPUState.frameCounter; // check if the data changed uint32 h = LatteDrawGL_calculateIndexDataHash(memory_getPointerFromPhysicalOffset(indexDataMPTR), cacheEntryItr->physSize); @@ -317,8 +475,8 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde cacheEntryItr->hash = h; } // move entry to the front - RendererCore_removeFromUsageLinkedList(cacheEntryItr); - RendererCore_appendToUsageLinkedList(cacheEntryItr); + _removeFromUsageLinkedList(cacheEntryItr); + _appendToUsageLinkedList(cacheEntryItr); return; } // calculate size of index data in cache @@ -328,16 +486,16 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde else cacheIndexDataSize = count * sizeof(uint32); // no matching entry, create new one - VirtualBufferHeapEntry_t* heapEntry = virtualBufferHeap_allocate(indexState->indexBufferVirtualHeap, cacheIndexDataSize); + VirtualBufferHeapEntry_t* heapEntry = virtualBufferHeap_allocate(indexState.indexBufferVirtualHeap, cacheIndexDataSize); if (heapEntry == nullptr) { while (true) { LatteDrawGL_removeLeastRecentlyUsedIndexCacheEntries(10); - heapEntry = virtualBufferHeap_allocate(indexState->indexBufferVirtualHeap, cacheIndexDataSize); + heapEntry = virtualBufferHeap_allocate(indexState.indexBufferVirtualHeap, cacheIndexDataSize); if (heapEntry != nullptr) break; - if (*indexDataCacheFirst == nullptr) + if (indexDataCacheFirst == nullptr) { cemuLog_log(LogType::Force, "Unable to allocate entry in index cache"); assert_dbg(); @@ -356,37 +514,34 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde cacheEntry->heapEntry = heapEntry; cacheEntry->lastAccessFrameCount = LatteGPUState.frameCounter; // append entry in bucket list - indexDataCacheEntry2_t** bucket = RendererCore_getIndexDataCacheBucket(indexDataBucketIdx); - cacheEntry->nextInBucket = *bucket; - *bucket = cacheEntry; + cacheEntry->nextInBucket = indexDataCacheBucket[indexDataBucketIdx]; + indexDataCacheBucket[indexDataBucketIdx] = cacheEntry; // append as most recently used entry - RendererCore_appendToUsageLinkedList(cacheEntry); + _appendToUsageLinkedList(cacheEntry); // decode and upload the data _decodeAndUploadIndexData(cacheEntry); - sint32* indexDataCacheEntryCount = RendererCore_getIndexDataCacheEntryCount(); - (*indexDataCacheEntryCount)++; - indexState->minIndex = cacheEntry->minIndex; - indexState->maxIndex = cacheEntry->maxIndex; - indexState->indexData = (uint8*)(size_t)cacheEntry->heapEntry->startOffset; + indexDataCacheEntryCount++; + indexState.minIndex = cacheEntry->minIndex; + indexState.maxIndex = cacheEntry->maxIndex; + indexState.indexData = (uint8*)(size_t)cacheEntry->heapEntry->startOffset; } void LatteDrawGL_doDraw(_INDEX_TYPE indexType, uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count) { - indexState_t* indexState = RendererCore_getIndexState(); if (indexType == _INDEX_TYPE::U16_BE) { // 16bit index, big endian if (instanceCount > 1 || baseInstance != 0) { - glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_SHORT, indexState->indexData, instanceCount, baseVertex, baseInstance); + glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_SHORT, indexState.indexData, instanceCount, baseVertex, baseInstance); } else { if (baseVertex != 0) - glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState->minIndex, indexState->maxIndex, count, GL_UNSIGNED_SHORT, indexState->indexData, baseVertex); + glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState.minIndex, indexState.maxIndex, count, GL_UNSIGNED_SHORT, indexState.indexData, baseVertex); else - glDrawRangeElements(sGLActiveDrawMode, indexState->minIndex, indexState->maxIndex, count, GL_UNSIGNED_SHORT, indexState->indexData); + glDrawRangeElements(sGLActiveDrawMode, indexState.minIndex, indexState.maxIndex, count, GL_UNSIGNED_SHORT, indexState.indexData); } } else if (indexType == _INDEX_TYPE::U32_BE) @@ -395,11 +550,11 @@ void LatteDrawGL_doDraw(_INDEX_TYPE indexType, uint32 baseVertex, uint32 baseIns if (instanceCount > 1 || baseInstance != 0) { //debug_printf("Render instanced\n"); - glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_INT, indexState->indexData, instanceCount, baseVertex, baseInstance); + glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_INT, indexState.indexData, instanceCount, baseVertex, baseInstance); } else { - glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState->minIndex, indexState->maxIndex, count, GL_UNSIGNED_INT, indexState->indexData, baseVertex); + glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState.minIndex, indexState.maxIndex, count, GL_UNSIGNED_INT, indexState.indexData, baseVertex); } } else if (indexType == _INDEX_TYPE::AUTO) @@ -824,8 +979,7 @@ void OpenGLRenderer::draw_genericDrawHandler(uint32 baseVertex, uint32 baseInsta endPerfMonProfiling(performanceMonitor.gpuTime_dcStageIndexMgr); // synchronize vertex and uniform buffers - indexState_t *indexState = RendererCore_getIndexState(); - LatteBufferCache_Sync(indexState->minIndex + baseVertex, indexState->maxIndex + baseVertex, baseInstance, instanceCount); + LatteBufferCache_Sync(indexState.minIndex + baseVertex, indexState.maxIndex + baseVertex, baseInstance, instanceCount); _setupVertexAttributes(); @@ -1016,24 +1170,23 @@ void OpenGLRenderer::draw_endSequence() void OpenGLRenderer::draw_init() { - indexState_t* indexState = RendererCore_getIndexState(); - if (indexState->initialized) + if (indexState.initialized) return; - indexState->initialized = true; + indexState.initialized = true; // create index buffer - glGenBuffers(1, &indexState->glIndexCacheBuffer); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indexState->glIndexCacheBuffer); + glGenBuffers(1, &indexState.glIndexCacheBuffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indexState.glIndexCacheBuffer); glBufferData(GL_ELEMENT_ARRAY_BUFFER, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR, NULL, GL_DYNAMIC_DRAW); #if BOOST_OS_WINDOWS - indexState->mappedIndexBuffer = (uint8*)_aligned_malloc(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR, 256); + indexState.mappedIndexBuffer = (uint8*)_aligned_malloc(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR, 256); #else - indexState->mappedIndexBuffer = (uint8*)aligned_alloc(256, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); + indexState.mappedIndexBuffer = (uint8*)aligned_alloc(256, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); #endif glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); - indexState->indexRingBuffer = LatteRingBuffer_create(indexState->mappedIndexBuffer, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); - indexState->tempIndexStorage = (uint8*)malloc(1024 * 1024 * 8); + indexState.indexRingBuffer = LatteRingBuffer_create(indexState.mappedIndexBuffer, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); + indexState.tempIndexStorage = (uint8*)malloc(1024 * 1024 * 8); // create virtual heap for index buffer - indexState->indexBufferVirtualHeap = virtualBufferHeap_create(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); + indexState.indexBufferVirtualHeap = virtualBufferHeap_create(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR); } void OpenGLRenderer::bufferCache_upload(uint8* buffer, sint32 size, uint32 bufferOffset) diff --git a/src/Cafe/HW/Latte/Renderer/RendererCore.cpp b/src/Cafe/HW/Latte/Renderer/RendererCore.cpp index 4a6a82d5..96d7b7c2 100644 --- a/src/Cafe/HW/Latte/Renderer/RendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/RendererCore.cpp @@ -1,6 +1,7 @@ #include "RendererCore.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" +#include "HW/Latte/Core/LatteShader.h" #include "config/CemuConfig.h" #ifdef ENABLE_OPENGL @@ -8,142 +9,6 @@ #include "Cafe/HW/Latte/Renderer/OpenGL/LatteTextureViewGL.h" #endif -indexState_t indexState = { 0 }; -AttributePointerCacheEntry_t activeAttributePointer[LATTE_VS_ATTRIBUTE_LIMIT] = { 0 }; - -indexDataCacheEntry2_t* indexDataCacheBucket[INDEX_DATA_CACHE_BUCKETS] = { 0 }; -indexDataCacheEntry2_t* indexDataCacheFirst = nullptr; // points to least recently used item -indexDataCacheEntry2_t* indexDataCacheLast = nullptr; // points to most recently used item -sint32 indexDataCacheEntryCount = 0; - -void RendererCore_resetAttributePointerCache() -{ - for (sint32 i = 0; i < LATTE_VS_ATTRIBUTE_LIMIT; i++) - { - activeAttributePointer[i].vboOutput = (uint8*)-1; - activeAttributePointer[i].vboStride = (uint32)-1; - } -} - -bool RendererCore_checkIfAttributePointerCacheChanged(uint32 attributeShaderLoc, uint8* vboOutput, uint32 vboStride, uint8 dataFormat, uint8 nfa, bool isSigned) -{ - // don't call glVertexAttribPointer if parameters have not changed - if ( - activeAttributePointer[attributeShaderLoc].vboOutput == vboOutput && - activeAttributePointer[attributeShaderLoc].vboStride == vboStride && - activeAttributePointer[attributeShaderLoc].dataFormat == dataFormat && - activeAttributePointer[attributeShaderLoc].nfa == nfa && - activeAttributePointer[attributeShaderLoc].isSigned == isSigned - ) - { - return false; - } - activeAttributePointer[attributeShaderLoc].vboOutput = vboOutput; - activeAttributePointer[attributeShaderLoc].vboStride = vboStride; - activeAttributePointer[attributeShaderLoc].dataFormat = dataFormat; - activeAttributePointer[attributeShaderLoc].nfa = nfa; - activeAttributePointer[attributeShaderLoc].isSigned = isSigned; - - return true; -} - -indexState_t* RendererCore_getIndexState() -{ - return &indexState; -} - -indexDataCacheEntry2_t** RendererCore_getIndexDataCacheFirst() -{ - return &indexDataCacheFirst; -} - -indexDataCacheEntry2_t** RendererCore_getIndexDataCacheBucket(uint32 bucketIdx) -{ - return &indexDataCacheBucket[bucketIdx]; -} - -sint32* RendererCore_getIndexDataCacheEntryCount() -{ - return &indexDataCacheEntryCount; -} - -void RendererCore_appendToUsageLinkedList(indexDataCacheEntry2_t* entry) -{ - if (indexDataCacheLast == nullptr) - { - indexDataCacheLast = entry; - indexDataCacheFirst = entry; - entry->nextInMostRecentUsage = nullptr; - entry->prevInMostRecentUsage = nullptr; - } - else - { - indexDataCacheLast->nextInMostRecentUsage = entry; - entry->prevInMostRecentUsage = indexDataCacheLast; - entry->nextInMostRecentUsage = nullptr; - indexDataCacheLast = entry; - } -} - -void RendererCore_removeFromUsageLinkedList(indexDataCacheEntry2_t* entry) -{ - if (entry->prevInMostRecentUsage) - { - entry->prevInMostRecentUsage->nextInMostRecentUsage = entry->nextInMostRecentUsage; - } - else - indexDataCacheFirst = entry->nextInMostRecentUsage; - if (entry->nextInMostRecentUsage) - { - entry->nextInMostRecentUsage->prevInMostRecentUsage = entry->prevInMostRecentUsage; - } - else - indexDataCacheLast = entry->prevInMostRecentUsage; - entry->prevInMostRecentUsage = nullptr; - entry->nextInMostRecentUsage = nullptr; -} - -void RendererCore_removeFromBucket(indexDataCacheEntry2_t* entry) -{ - uint32 indexDataBucketIdx = (uint32)((entry->key.physAddr + entry->key.count) ^ (entry->key.physAddr >> 16)) % INDEX_DATA_CACHE_BUCKETS; - if (indexDataCacheBucket[indexDataBucketIdx] == entry) - { - indexDataCacheBucket[indexDataBucketIdx] = entry->nextInBucket; - entry->nextInBucket = nullptr; - return; - } - indexDataCacheEntry2_t* cacheEntryItr = indexDataCacheBucket[indexDataBucketIdx]; - while (cacheEntryItr) - { - if (cacheEntryItr->nextInBucket == entry) - { - cacheEntryItr->nextInBucket = entry->nextInBucket; - entry->nextInBucket = nullptr; - return; - } - // next - cacheEntryItr = cacheEntryItr->nextInBucket; - } -} - -void LatteDraw_cleanupAfterFrame() -{ - // drop everything from cache that is older than 30 frames - uint32 frameCounter = LatteGPUState.frameCounter; - while (indexDataCacheFirst) - { - indexDataCacheEntry2_t* entry = indexDataCacheFirst; - if ((frameCounter - entry->lastAccessFrameCount) < 30) - break; - // remove entry - virtualBufferHeap_free(indexState.indexBufferVirtualHeap, entry->heapEntry); - RendererCore_removeFromUsageLinkedList(entry); - RendererCore_removeFromBucket(entry); - free(entry); - } -} - - void LatteDraw_handleSpecialState8_clearAsDepth() { if (LatteGPUState.contextNew.GetSpecialStateValues()[0] == 0) @@ -212,4 +77,56 @@ void LatteDraw_handleSpecialState8_clearAsDepth() } } } -} \ No newline at end of file +} + +/* rects emulation */ + +void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) +{ + auto parameterMask = vertexShader->outputParameterMask; + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + // make sure PS has matching input + if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + continue; + gsSrc.append(fmt::format("passParameterSem{}Out = passParameterSem{}In[{}];\r\n", vsSemanticId, vsSemanticId, vIdx)); + } + gsSrc.append(fmt::format("gl_Position = gl_in[{}].gl_Position;\r\n", vIdx)); + gsSrc.append("EmitVertex();\r\n"); +} + +void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister) +{ + auto parameterMask = vertexShader->outputParameterMask; + for (uint32 i = 0; i < 32; i++) + { + if ((parameterMask & (1 << i)) == 0) + continue; + sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); + if (vsSemanticId < 0) + continue; + // make sure PS has matching input + if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) + continue; + gsSrc.append(fmt::format("passParameterSem{}Out = gen4thVertex{}(passParameterSem{}In[0], passParameterSem{}In[1], passParameterSem{}In[2]);\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId)); + } + gsSrc.append(fmt::format("gl_Position = gen4thVertex{}(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_in[2].gl_Position);\r\n", variant)); + gsSrc.append("EmitVertex();\r\n"); +} + +void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister) +{ + sint32 pList[4] = { p0, p1, p2, p3 }; + for (sint32 i = 0; i < 4; i++) + { + if (pList[i] == 3) + rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister); + else + rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister); + } +} diff --git a/src/Cafe/HW/Latte/Renderer/RendererCore.h b/src/Cafe/HW/Latte/Renderer/RendererCore.h index 1c681373..c88b20b5 100644 --- a/src/Cafe/HW/Latte/Renderer/RendererCore.h +++ b/src/Cafe/HW/Latte/Renderer/RendererCore.h @@ -3,90 +3,4 @@ #include "Cafe/HW/Latte/Core/LatteRingBuffer.h" #include "Cafe/HW/Latte/Core/Latte.h" -using _INDEX_TYPE = Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE; - -#define INDEX_CACHE_ENTRIES (8) - -typedef struct -{ - MPTR prevIndexDataMPTR; - sint32 prevIndexType; - sint32 prevCount; - // index data - uint8* indexData; - uint8* indexData2; - uint32 indexBufferOffset; - sint32 indexDataSize; // current size - sint32 indexDataLimit; // maximum size - // info - uint32 maxIndex; - uint32 minIndex; -}indexDataCacheEntry_t; - -typedef struct -{ - indexDataCacheEntry_t indexCacheEntry[INDEX_CACHE_ENTRIES]; - sint32 nextCacheEntryIndex; - // info about currently used index data - uint32 maxIndex; - uint32 minIndex; - uint8* indexData; - // buffer - unsigned int glIndexCacheBuffer; - VirtualBufferHeap_t* indexBufferVirtualHeap; - uint8* mappedIndexBuffer; - LatteRingBuffer_t* indexRingBuffer; - uint8* tempIndexStorage; - // misc - bool initialized; - unsigned int glActiveElementArrayBuffer; -}indexState_t; - -typedef struct -{ - uint8* vboOutput; - uint32 vboStride; - uint8 dataFormat; - uint8 nfa; - bool isSigned; -} AttributePointerCacheEntry_t; - -#define INDEX_DATA_CACHE_BUCKETS (1783) - -typedef struct -{ - MPTR physAddr; - sint32 count; - uint32 primitiveRestartIndex; - uint32 primitiveMode; -}indexDataCacheKey_t; - -typedef struct _indexDataCacheEntry_t -{ - indexDataCacheKey_t key; - _indexDataCacheEntry_t* nextInBucket; // points to next element in same bucket - uint32 physSize; - uint32 hash; - _INDEX_TYPE indexType; - //sint32 indexType; - uint32 minIndex; - uint32 maxIndex; - uint32 lastAccessFrameCount; - VirtualBufferHeapEntry_t* heapEntry; - _indexDataCacheEntry_t* nextInMostRecentUsage; // points to element which was used more recently - _indexDataCacheEntry_t* prevInMostRecentUsage; // points to element which was used less recently -}indexDataCacheEntry2_t; - -void RendererCore_resetAttributePointerCache(); -bool RendererCore_checkIfAttributePointerCacheChanged(uint32 attributeShaderLoc, uint8* vboOutput, uint32 vboStride, uint8 dataFormat, uint8 nfa, bool isSigned); - -indexState_t* RendererCore_getIndexState(); -indexDataCacheEntry2_t** RendererCore_getIndexDataCacheFirst(); -indexDataCacheEntry2_t** RendererCore_getIndexDataCacheBucket(uint32 bucketIdx); -sint32* RendererCore_getIndexDataCacheEntryCount(); - -void RendererCore_appendToUsageLinkedList(indexDataCacheEntry2_t* entry); -void RendererCore_removeFromUsageLinkedList(indexDataCacheEntry2_t* entry); -void RendererCore_removeFromBucket(indexDataCacheEntry2_t* entry); -void LatteDraw_cleanupAfterFrame(); void LatteDraw_handleSpecialState8_clearAsDepth(); \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp index 55b2330e..6ac2150f 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp @@ -10,57 +10,7 @@ #include "util/helpers/Serializer.h" #include "Cafe/HW/Latte/Common/RegisterSerializer.h" -/* rects emulation */ -void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) -{ - auto parameterMask = vertexShader->outputParameterMask; - for (uint32 i = 0; i < 32; i++) - { - if ((parameterMask & (1 << i)) == 0) - continue; - sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); - if (vsSemanticId < 0) - continue; - // make sure PS has matching input - if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) - continue; - gsSrc.append(fmt::format("passParameterSem{}Out = passParameterSem{}In[{}];\r\n", vsSemanticId, vsSemanticId, vIdx)); - } - gsSrc.append(fmt::format("gl_Position = gl_in[{}].gl_Position;\r\n", vIdx)); - gsSrc.append("EmitVertex();\r\n"); -} - -void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister) -{ - auto parameterMask = vertexShader->outputParameterMask; - for (uint32 i = 0; i < 32; i++) - { - if ((parameterMask & (1 << i)) == 0) - continue; - sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); - if (vsSemanticId < 0) - continue; - // make sure PS has matching input - if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) - continue; - gsSrc.append(fmt::format("passParameterSem{}Out = gen4thVertex{}(passParameterSem{}In[0], passParameterSem{}In[1], passParameterSem{}In[2]);\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId)); - } - gsSrc.append(fmt::format("gl_Position = gen4thVertex{}(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_in[2].gl_Position);\r\n", variant)); - gsSrc.append("EmitVertex();\r\n"); -} - -void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister) -{ - sint32 pList[4] = { p0, p1, p2, p3 }; - for (sint32 i = 0; i < 4; i++) - { - if (pList[i] == 3) - rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister); - else - rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister); - } -} RendererShaderVk* rectsEmulationGS_generate(LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister) { diff --git a/src/main.cpp b/src/main.cpp index a4e44115..fc632921 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -265,7 +265,7 @@ int main(int argc, char* argv[]) int BreathOfTheWildChildProcessMain(); int main(int argc, char *argv[]) { -#if BOOST_OS_LINUX +#if BOOST_OS_LINUX && defined(ENABLE_VULKAN) if (getenv("CEMU_DETECT_RADV") != nullptr) return BreathOfTheWildChildProcessMain(); #endif