fix linux build without vulkan

This commit is contained in:
goeiecool9999 2026-04-21 13:50:38 +02:00
parent 3b0e5cef00
commit a9f8e6430d
6 changed files with 274 additions and 338 deletions

View File

@ -695,7 +695,9 @@ void LatteRenderTarget_itHLESwapScanBuffer()
performanceMonitor.gpuTime_frameTime.beginMeasuring();
LatteTC_CleanupUnusedTextures();
#ifdef ENABLE_OPENGL
LatteDraw_cleanupAfterFrame();
#endif
LatteQuery_CancelActiveGPU7Queries();
LatteBufferCache_notifySwapTVScanBuffer();
LattePerformanceMonitor_frameBegin();

View File

@ -13,21 +13,73 @@
#include "Cafe/HW/Latte/Renderer/OpenGL/CachedFBOGL.h"
#include "Cafe/HW/Latte/Renderer/OpenGL/RendererShaderGL.h"
#include "Cafe/HW/Latte/Renderer/RendererCore.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "Cafe/OS/libs/gx2/GX2.h"
#include "Cafe/GameProfile/GameProfile.h"
#include "HW/Latte/Renderer/RendererCore.h"
#include "config/ActiveSettings.h"
using _INDEX_TYPE = Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE;
GLenum sGLActiveDrawMode = 0;
extern bool hasValidFramebufferAttached;
#define INDEX_CACHE_ENTRIES (8)
typedef struct
{
MPTR prevIndexDataMPTR;
sint32 prevIndexType;
sint32 prevCount;
// index data
uint8* indexData;
uint8* indexData2;
uint32 indexBufferOffset;
sint32 indexDataSize; // current size
sint32 indexDataLimit; // maximum size
// info
uint32 maxIndex;
uint32 minIndex;
}indexDataCacheEntry_t;
struct
{
indexDataCacheEntry_t indexCacheEntry[INDEX_CACHE_ENTRIES];
sint32 nextCacheEntryIndex;
// info about currently used index data
uint32 maxIndex;
uint32 minIndex;
uint8* indexData;
// buffer
GLuint glIndexCacheBuffer;
VirtualBufferHeap_t* indexBufferVirtualHeap;
uint8* mappedIndexBuffer;
LatteRingBuffer_t* indexRingBuffer;
uint8* tempIndexStorage;
// misc
bool initialized;
GLuint glActiveElementArrayBuffer;
}indexState = { 0 };
struct
{
uint8* vboOutput;
uint32 vboStride;
uint8 dataFormat;
uint8 nfa;
bool isSigned;
}activeAttributePointer[LATTE_VS_ATTRIBUTE_LIMIT] = { 0 };
void LatteDraw_resetAttributePointerCache()
{
RendererCore_resetAttributePointerCache();
for (sint32 i = 0; i < LATTE_VS_ATTRIBUTE_LIMIT; i++)
{
activeAttributePointer[i].vboOutput = (uint8*)-1;
activeAttributePointer[i].vboStride = (uint32)-1;
}
}
void _setAttributeBufferPointerRaw(uint32 attributeShaderLoc, uint8* buffer, uint32 bufferSize, uint32 stride, LatteParsedFetchShaderAttribute_t* attrib, uint8* vboOutput, uint32 vboStride)
@ -36,11 +88,15 @@ void _setAttributeBufferPointerRaw(uint32 attributeShaderLoc, uint8* buffer, uin
bool isSigned = attrib->isSigned != 0;
uint8 nfa = attrib->nfa;
// don't call glVertexAttribIPointer if parameters have not changed
bool attributePointerChanged = RendererCore_checkIfAttributePointerCacheChanged(attributeShaderLoc, vboOutput, vboStride, dataFormat, nfa, isSigned);
if (!attributePointerChanged) {
if (activeAttributePointer[attributeShaderLoc].vboOutput == vboOutput && activeAttributePointer[attributeShaderLoc].vboStride == vboStride && activeAttributePointer[attributeShaderLoc].dataFormat == dataFormat && activeAttributePointer[attributeShaderLoc].nfa == nfa && activeAttributePointer[attributeShaderLoc].isSigned == isSigned)
{
return;
}
activeAttributePointer[attributeShaderLoc].vboOutput = vboOutput;
activeAttributePointer[attributeShaderLoc].vboStride = vboStride;
activeAttributePointer[attributeShaderLoc].dataFormat = dataFormat;
activeAttributePointer[attributeShaderLoc].nfa = nfa;
activeAttributePointer[attributeShaderLoc].isSigned = isSigned;
// setup attribute pointer
if (dataFormat == FMT_32_32_32_32_FLOAT || dataFormat == FMT_32_32_32_32)
{
@ -139,23 +195,111 @@ void OpenGLRenderer::SetAttributeArrayState(uint32 index, bool isEnabled, sint32
// Sets the currently active element array buffer and binds it
void OpenGLRenderer::SetArrayElementBuffer(GLuint arrayElementBuffer)
{
indexState_t* indexState = RendererCore_getIndexState();
if (arrayElementBuffer == indexState->glActiveElementArrayBuffer)
if (arrayElementBuffer == indexState.glActiveElementArrayBuffer)
return;
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, arrayElementBuffer);
indexState->glActiveElementArrayBuffer = arrayElementBuffer;
indexState.glActiveElementArrayBuffer = arrayElementBuffer;
}
typedef struct
{
MPTR physAddr;
sint32 count;
uint32 primitiveRestartIndex;
uint32 primitiveMode;
}indexDataCacheKey_t;
typedef struct _indexDataCacheEntry_t
{
indexDataCacheKey_t key;
_indexDataCacheEntry_t* nextInBucket; // points to next element in same bucket
uint32 physSize;
uint32 hash;
_INDEX_TYPE indexType;
//sint32 indexType;
uint32 minIndex;
uint32 maxIndex;
uint32 lastAccessFrameCount;
VirtualBufferHeapEntry_t* heapEntry;
_indexDataCacheEntry_t* nextInMostRecentUsage; // points to element which was used more recently
_indexDataCacheEntry_t* prevInMostRecentUsage; // points to element which was used less recently
}indexDataCacheEntry2_t;
#define INDEX_DATA_CACHE_BUCKETS (1783)
indexDataCacheEntry2_t* indexDataCacheBucket[INDEX_DATA_CACHE_BUCKETS] = { 0 };
indexDataCacheEntry2_t* indexDataCacheFirst = nullptr; // points to least recently used item
indexDataCacheEntry2_t* indexDataCacheLast = nullptr; // points to most recently used item
sint32 indexDataCacheEntryCount = 0;
void _appendToUsageLinkedList(indexDataCacheEntry2_t* entry)
{
if (indexDataCacheLast == nullptr)
{
indexDataCacheLast = entry;
indexDataCacheFirst = entry;
entry->nextInMostRecentUsage = nullptr;
entry->prevInMostRecentUsage = nullptr;
}
else
{
indexDataCacheLast->nextInMostRecentUsage = entry;
entry->prevInMostRecentUsage = indexDataCacheLast;
entry->nextInMostRecentUsage = nullptr;
indexDataCacheLast = entry;
}
}
void _removeFromUsageLinkedList(indexDataCacheEntry2_t* entry)
{
if (entry->prevInMostRecentUsage)
{
entry->prevInMostRecentUsage->nextInMostRecentUsage = entry->nextInMostRecentUsage;
}
else
indexDataCacheFirst = entry->nextInMostRecentUsage;
if (entry->nextInMostRecentUsage)
{
entry->nextInMostRecentUsage->prevInMostRecentUsage = entry->prevInMostRecentUsage;
}
else
indexDataCacheLast = entry->prevInMostRecentUsage;
entry->prevInMostRecentUsage = nullptr;
entry->nextInMostRecentUsage = nullptr;
}
void _removeFromBucket(indexDataCacheEntry2_t* entry)
{
uint32 indexDataBucketIdx = (uint32)((entry->key.physAddr + entry->key.count) ^ (entry->key.physAddr >> 16)) % INDEX_DATA_CACHE_BUCKETS;
if (indexDataCacheBucket[indexDataBucketIdx] == entry)
{
indexDataCacheBucket[indexDataBucketIdx] = entry->nextInBucket;
entry->nextInBucket = nullptr;
return;
}
indexDataCacheEntry2_t* cacheEntryItr = indexDataCacheBucket[indexDataBucketIdx];
while (cacheEntryItr)
{
if (cacheEntryItr->nextInBucket == entry)
{
cacheEntryItr->nextInBucket = entry->nextInBucket;
entry->nextInBucket = nullptr;
return;
}
// next
cacheEntryItr = cacheEntryItr->nextInBucket;
}
}
void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry)
{
uint32 count = cacheEntry->key.count;
uint32 primitiveRestartIndex = cacheEntry->key.primitiveRestartIndex;
indexState_t *indexState = RendererCore_getIndexState();
if (cacheEntry->indexType == _INDEX_TYPE::U16_BE)
{
// 16bit indices
uint16* indexInputU16 = (uint16*)memory_getPointerFromPhysicalOffset(cacheEntry->key.physAddr);
uint16* indexOutputU16 = (uint16*)indexState->tempIndexStorage;
uint16* indexOutputU16 = (uint16*)indexState.tempIndexStorage;
cemu_assert_debug(count != 0);
uint16 indexMinU16 = 0xFFFF;
uint16 indexMaxU16 = 0;
@ -191,14 +335,14 @@ void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry)
}
cacheEntry->minIndex = indexMinU16;
cacheEntry->maxIndex = indexMaxU16;
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint16), indexState->tempIndexStorage);
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint16), indexState.tempIndexStorage);
performanceMonitor.cycle[performanceMonitor.cycleIndex].indexDataUploaded += (count * sizeof(uint16));
}
else if(cacheEntry->indexType == _INDEX_TYPE::U32_BE)
{
// 32bit indices
uint32* indexInputU32 = (uint32*)memory_getPointerFromPhysicalOffset(cacheEntry->key.physAddr);
uint32* indexOutputU32 = (uint32*)indexState->tempIndexStorage;
uint32* indexOutputU32 = (uint32*)indexState.tempIndexStorage;
cemu_assert_debug(count != 0);
uint32 indexMinU32 = _swapEndianU32(*indexInputU32);
uint32 indexMaxU32 = _swapEndianU32(*indexInputU32);
@ -216,7 +360,7 @@ void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry)
}
cacheEntry->minIndex = indexMinU32;
cacheEntry->maxIndex = indexMaxU32;
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint32), indexState->tempIndexStorage);
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, cacheEntry->heapEntry->startOffset, count * sizeof(uint32), indexState.tempIndexStorage);
performanceMonitor.cycle[performanceMonitor.cycleIndex].indexDataUploaded += (count * sizeof(uint32));
}
else
@ -225,18 +369,35 @@ void _decodeAndUploadIndexData(indexDataCacheEntry2_t* cacheEntry)
}
}
void LatteDraw_cleanupAfterFrame()
{
if (g_renderer->GetType() != RendererAPI::OpenGL)
return;
// drop everything from cache that is older than 30 frames
uint32 frameCounter = LatteGPUState.frameCounter;
while (indexDataCacheFirst)
{
indexDataCacheEntry2_t* entry = indexDataCacheFirst;
if ((frameCounter - entry->lastAccessFrameCount) < 30)
break;
// remove entry
virtualBufferHeap_free(indexState.indexBufferVirtualHeap, entry->heapEntry);
_removeFromUsageLinkedList(entry);
_removeFromBucket(entry);
free(entry);
}
}
void LatteDrawGL_removeLeastRecentlyUsedIndexCacheEntries(sint32 count)
{
indexState_t *indexState = RendererCore_getIndexState();
indexDataCacheEntry2_t** indexDataCacheFirst = RendererCore_getIndexDataCacheFirst();
while (*indexDataCacheFirst && count > 0)
while (indexDataCacheFirst && count > 0)
{
indexDataCacheEntry2_t* entry = *indexDataCacheFirst;
indexDataCacheEntry2_t* entry = indexDataCacheFirst;
// remove entry
virtualBufferHeap_free(indexState->indexBufferVirtualHeap, entry->heapEntry);
RendererCore_removeFromUsageLinkedList(entry);
RendererCore_removeFromBucket(entry);
virtualBufferHeap_free(indexState.indexBufferVirtualHeap, entry->heapEntry);
_removeFromUsageLinkedList(entry);
_removeFromBucket(entry);
free(entry);
count--;
}
@ -274,22 +435,19 @@ uint32 LatteDrawGL_calculateIndexDataHash(uint8* data, uint32 size)
// todo - Outdated cache implementation. Update OpenGL renderer to use the generic implementation that is also used by the Vulkan renderer
void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE indexType, sint32 count, sint32 primitiveMode)
{
indexState_t* indexState = RendererCore_getIndexState();
indexDataCacheEntry2_t** indexDataCacheFirst = RendererCore_getIndexDataCacheFirst();
if (indexType == _INDEX_TYPE::AUTO)
{
indexState->minIndex = 0;
indexState->maxIndex = count - 1;
indexState.minIndex = 0;
indexState.maxIndex = count - 1;
// since no indices are used we don't need to unbind the element array buffer
return; // automatic indices
}
OpenGLRenderer::SetArrayElementBuffer(indexState->glIndexCacheBuffer);
OpenGLRenderer::SetArrayElementBuffer(indexState.glIndexCacheBuffer);
uint32 indexDataBucketIdx = (uint32)((indexDataMPTR + count) ^ (indexDataMPTR >> 16)) % INDEX_DATA_CACHE_BUCKETS;
// find matching entry
uint32 primitiveRestartIndex = LatteGPUState.contextNew.VGT_MULTI_PRIM_IB_RESET_INDX.get_RESTART_INDEX();
indexDataCacheEntry2_t* cacheEntryItr = *RendererCore_getIndexDataCacheBucket(indexDataBucketIdx);
indexDataCacheEntry2_t* cacheEntryItr = indexDataCacheBucket[indexDataBucketIdx];
indexDataCacheKey_t compareKey;
compareKey.physAddr = indexDataMPTR;
compareKey.count = count;
@ -304,9 +462,9 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde
continue;
}
// entry found
indexState->minIndex = cacheEntryItr->minIndex;
indexState->maxIndex = cacheEntryItr->maxIndex;
indexState->indexData = (uint8*)(size_t)cacheEntryItr->heapEntry->startOffset;
indexState.minIndex = cacheEntryItr->minIndex;
indexState.maxIndex = cacheEntryItr->maxIndex;
indexState.indexData = (uint8*)(size_t)cacheEntryItr->heapEntry->startOffset;
cacheEntryItr->lastAccessFrameCount = LatteGPUState.frameCounter;
// check if the data changed
uint32 h = LatteDrawGL_calculateIndexDataHash(memory_getPointerFromPhysicalOffset(indexDataMPTR), cacheEntryItr->physSize);
@ -317,8 +475,8 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde
cacheEntryItr->hash = h;
}
// move entry to the front
RendererCore_removeFromUsageLinkedList(cacheEntryItr);
RendererCore_appendToUsageLinkedList(cacheEntryItr);
_removeFromUsageLinkedList(cacheEntryItr);
_appendToUsageLinkedList(cacheEntryItr);
return;
}
// calculate size of index data in cache
@ -328,16 +486,16 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde
else
cacheIndexDataSize = count * sizeof(uint32);
// no matching entry, create new one
VirtualBufferHeapEntry_t* heapEntry = virtualBufferHeap_allocate(indexState->indexBufferVirtualHeap, cacheIndexDataSize);
VirtualBufferHeapEntry_t* heapEntry = virtualBufferHeap_allocate(indexState.indexBufferVirtualHeap, cacheIndexDataSize);
if (heapEntry == nullptr)
{
while (true)
{
LatteDrawGL_removeLeastRecentlyUsedIndexCacheEntries(10);
heapEntry = virtualBufferHeap_allocate(indexState->indexBufferVirtualHeap, cacheIndexDataSize);
heapEntry = virtualBufferHeap_allocate(indexState.indexBufferVirtualHeap, cacheIndexDataSize);
if (heapEntry != nullptr)
break;
if (*indexDataCacheFirst == nullptr)
if (indexDataCacheFirst == nullptr)
{
cemuLog_log(LogType::Force, "Unable to allocate entry in index cache");
assert_dbg();
@ -356,37 +514,34 @@ void LatteDrawGL_prepareIndicesWithGPUCache(MPTR indexDataMPTR, _INDEX_TYPE inde
cacheEntry->heapEntry = heapEntry;
cacheEntry->lastAccessFrameCount = LatteGPUState.frameCounter;
// append entry in bucket list
indexDataCacheEntry2_t** bucket = RendererCore_getIndexDataCacheBucket(indexDataBucketIdx);
cacheEntry->nextInBucket = *bucket;
*bucket = cacheEntry;
cacheEntry->nextInBucket = indexDataCacheBucket[indexDataBucketIdx];
indexDataCacheBucket[indexDataBucketIdx] = cacheEntry;
// append as most recently used entry
RendererCore_appendToUsageLinkedList(cacheEntry);
_appendToUsageLinkedList(cacheEntry);
// decode and upload the data
_decodeAndUploadIndexData(cacheEntry);
sint32* indexDataCacheEntryCount = RendererCore_getIndexDataCacheEntryCount();
(*indexDataCacheEntryCount)++;
indexState->minIndex = cacheEntry->minIndex;
indexState->maxIndex = cacheEntry->maxIndex;
indexState->indexData = (uint8*)(size_t)cacheEntry->heapEntry->startOffset;
indexDataCacheEntryCount++;
indexState.minIndex = cacheEntry->minIndex;
indexState.maxIndex = cacheEntry->maxIndex;
indexState.indexData = (uint8*)(size_t)cacheEntry->heapEntry->startOffset;
}
void LatteDrawGL_doDraw(_INDEX_TYPE indexType, uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count)
{
indexState_t* indexState = RendererCore_getIndexState();
if (indexType == _INDEX_TYPE::U16_BE)
{
// 16bit index, big endian
if (instanceCount > 1 || baseInstance != 0)
{
glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_SHORT, indexState->indexData, instanceCount, baseVertex, baseInstance);
glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_SHORT, indexState.indexData, instanceCount, baseVertex, baseInstance);
}
else
{
if (baseVertex != 0)
glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState->minIndex, indexState->maxIndex, count, GL_UNSIGNED_SHORT, indexState->indexData, baseVertex);
glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState.minIndex, indexState.maxIndex, count, GL_UNSIGNED_SHORT, indexState.indexData, baseVertex);
else
glDrawRangeElements(sGLActiveDrawMode, indexState->minIndex, indexState->maxIndex, count, GL_UNSIGNED_SHORT, indexState->indexData);
glDrawRangeElements(sGLActiveDrawMode, indexState.minIndex, indexState.maxIndex, count, GL_UNSIGNED_SHORT, indexState.indexData);
}
}
else if (indexType == _INDEX_TYPE::U32_BE)
@ -395,11 +550,11 @@ void LatteDrawGL_doDraw(_INDEX_TYPE indexType, uint32 baseVertex, uint32 baseIns
if (instanceCount > 1 || baseInstance != 0)
{
//debug_printf("Render instanced\n");
glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_INT, indexState->indexData, instanceCount, baseVertex, baseInstance);
glDrawElementsInstancedBaseVertexBaseInstance(sGLActiveDrawMode, count, GL_UNSIGNED_INT, indexState.indexData, instanceCount, baseVertex, baseInstance);
}
else
{
glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState->minIndex, indexState->maxIndex, count, GL_UNSIGNED_INT, indexState->indexData, baseVertex);
glDrawRangeElementsBaseVertex(sGLActiveDrawMode, indexState.minIndex, indexState.maxIndex, count, GL_UNSIGNED_INT, indexState.indexData, baseVertex);
}
}
else if (indexType == _INDEX_TYPE::AUTO)
@ -824,8 +979,7 @@ void OpenGLRenderer::draw_genericDrawHandler(uint32 baseVertex, uint32 baseInsta
endPerfMonProfiling(performanceMonitor.gpuTime_dcStageIndexMgr);
// synchronize vertex and uniform buffers
indexState_t *indexState = RendererCore_getIndexState();
LatteBufferCache_Sync(indexState->minIndex + baseVertex, indexState->maxIndex + baseVertex, baseInstance, instanceCount);
LatteBufferCache_Sync(indexState.minIndex + baseVertex, indexState.maxIndex + baseVertex, baseInstance, instanceCount);
_setupVertexAttributes();
@ -1016,24 +1170,23 @@ void OpenGLRenderer::draw_endSequence()
void OpenGLRenderer::draw_init()
{
indexState_t* indexState = RendererCore_getIndexState();
if (indexState->initialized)
if (indexState.initialized)
return;
indexState->initialized = true;
indexState.initialized = true;
// create index buffer
glGenBuffers(1, &indexState->glIndexCacheBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indexState->glIndexCacheBuffer);
glGenBuffers(1, &indexState.glIndexCacheBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indexState.glIndexCacheBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR, NULL, GL_DYNAMIC_DRAW);
#if BOOST_OS_WINDOWS
indexState->mappedIndexBuffer = (uint8*)_aligned_malloc(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR, 256);
indexState.mappedIndexBuffer = (uint8*)_aligned_malloc(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR, 256);
#else
indexState->mappedIndexBuffer = (uint8*)aligned_alloc(256, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR);
indexState.mappedIndexBuffer = (uint8*)aligned_alloc(256, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR);
#endif
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
indexState->indexRingBuffer = LatteRingBuffer_create(indexState->mappedIndexBuffer, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR);
indexState->tempIndexStorage = (uint8*)malloc(1024 * 1024 * 8);
indexState.indexRingBuffer = LatteRingBuffer_create(indexState.mappedIndexBuffer, GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR);
indexState.tempIndexStorage = (uint8*)malloc(1024 * 1024 * 8);
// create virtual heap for index buffer
indexState->indexBufferVirtualHeap = virtualBufferHeap_create(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR);
indexState.indexBufferVirtualHeap = virtualBufferHeap_create(GPU7_INDEX_BUFFER_CACHE_SIZE_DEPR);
}
void OpenGLRenderer::bufferCache_upload(uint8* buffer, sint32 size, uint32 bufferOffset)

View File

@ -1,6 +1,7 @@
#include "RendererCore.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "HW/Latte/Core/LatteShader.h"
#include "config/CemuConfig.h"
#ifdef ENABLE_OPENGL
@ -8,142 +9,6 @@
#include "Cafe/HW/Latte/Renderer/OpenGL/LatteTextureViewGL.h"
#endif
indexState_t indexState = { 0 };
AttributePointerCacheEntry_t activeAttributePointer[LATTE_VS_ATTRIBUTE_LIMIT] = { 0 };
indexDataCacheEntry2_t* indexDataCacheBucket[INDEX_DATA_CACHE_BUCKETS] = { 0 };
indexDataCacheEntry2_t* indexDataCacheFirst = nullptr; // points to least recently used item
indexDataCacheEntry2_t* indexDataCacheLast = nullptr; // points to most recently used item
sint32 indexDataCacheEntryCount = 0;
void RendererCore_resetAttributePointerCache()
{
for (sint32 i = 0; i < LATTE_VS_ATTRIBUTE_LIMIT; i++)
{
activeAttributePointer[i].vboOutput = (uint8*)-1;
activeAttributePointer[i].vboStride = (uint32)-1;
}
}
bool RendererCore_checkIfAttributePointerCacheChanged(uint32 attributeShaderLoc, uint8* vboOutput, uint32 vboStride, uint8 dataFormat, uint8 nfa, bool isSigned)
{
// don't call glVertexAttribPointer if parameters have not changed
if (
activeAttributePointer[attributeShaderLoc].vboOutput == vboOutput &&
activeAttributePointer[attributeShaderLoc].vboStride == vboStride &&
activeAttributePointer[attributeShaderLoc].dataFormat == dataFormat &&
activeAttributePointer[attributeShaderLoc].nfa == nfa &&
activeAttributePointer[attributeShaderLoc].isSigned == isSigned
)
{
return false;
}
activeAttributePointer[attributeShaderLoc].vboOutput = vboOutput;
activeAttributePointer[attributeShaderLoc].vboStride = vboStride;
activeAttributePointer[attributeShaderLoc].dataFormat = dataFormat;
activeAttributePointer[attributeShaderLoc].nfa = nfa;
activeAttributePointer[attributeShaderLoc].isSigned = isSigned;
return true;
}
indexState_t* RendererCore_getIndexState()
{
return &indexState;
}
indexDataCacheEntry2_t** RendererCore_getIndexDataCacheFirst()
{
return &indexDataCacheFirst;
}
indexDataCacheEntry2_t** RendererCore_getIndexDataCacheBucket(uint32 bucketIdx)
{
return &indexDataCacheBucket[bucketIdx];
}
sint32* RendererCore_getIndexDataCacheEntryCount()
{
return &indexDataCacheEntryCount;
}
void RendererCore_appendToUsageLinkedList(indexDataCacheEntry2_t* entry)
{
if (indexDataCacheLast == nullptr)
{
indexDataCacheLast = entry;
indexDataCacheFirst = entry;
entry->nextInMostRecentUsage = nullptr;
entry->prevInMostRecentUsage = nullptr;
}
else
{
indexDataCacheLast->nextInMostRecentUsage = entry;
entry->prevInMostRecentUsage = indexDataCacheLast;
entry->nextInMostRecentUsage = nullptr;
indexDataCacheLast = entry;
}
}
void RendererCore_removeFromUsageLinkedList(indexDataCacheEntry2_t* entry)
{
if (entry->prevInMostRecentUsage)
{
entry->prevInMostRecentUsage->nextInMostRecentUsage = entry->nextInMostRecentUsage;
}
else
indexDataCacheFirst = entry->nextInMostRecentUsage;
if (entry->nextInMostRecentUsage)
{
entry->nextInMostRecentUsage->prevInMostRecentUsage = entry->prevInMostRecentUsage;
}
else
indexDataCacheLast = entry->prevInMostRecentUsage;
entry->prevInMostRecentUsage = nullptr;
entry->nextInMostRecentUsage = nullptr;
}
void RendererCore_removeFromBucket(indexDataCacheEntry2_t* entry)
{
uint32 indexDataBucketIdx = (uint32)((entry->key.physAddr + entry->key.count) ^ (entry->key.physAddr >> 16)) % INDEX_DATA_CACHE_BUCKETS;
if (indexDataCacheBucket[indexDataBucketIdx] == entry)
{
indexDataCacheBucket[indexDataBucketIdx] = entry->nextInBucket;
entry->nextInBucket = nullptr;
return;
}
indexDataCacheEntry2_t* cacheEntryItr = indexDataCacheBucket[indexDataBucketIdx];
while (cacheEntryItr)
{
if (cacheEntryItr->nextInBucket == entry)
{
cacheEntryItr->nextInBucket = entry->nextInBucket;
entry->nextInBucket = nullptr;
return;
}
// next
cacheEntryItr = cacheEntryItr->nextInBucket;
}
}
void LatteDraw_cleanupAfterFrame()
{
// drop everything from cache that is older than 30 frames
uint32 frameCounter = LatteGPUState.frameCounter;
while (indexDataCacheFirst)
{
indexDataCacheEntry2_t* entry = indexDataCacheFirst;
if ((frameCounter - entry->lastAccessFrameCount) < 30)
break;
// remove entry
virtualBufferHeap_free(indexState.indexBufferVirtualHeap, entry->heapEntry);
RendererCore_removeFromUsageLinkedList(entry);
RendererCore_removeFromBucket(entry);
free(entry);
}
}
void LatteDraw_handleSpecialState8_clearAsDepth()
{
if (LatteGPUState.contextNew.GetSpecialStateValues()[0] == 0)
@ -212,4 +77,56 @@ void LatteDraw_handleSpecialState8_clearAsDepth()
}
}
}
}
}
/* rects emulation */
void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
{
auto parameterMask = vertexShader->outputParameterMask;
for (uint32 i = 0; i < 32; i++)
{
if ((parameterMask & (1 << i)) == 0)
continue;
sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
if (vsSemanticId < 0)
continue;
// make sure PS has matching input
if (!psInputTable->hasPSImportForSemanticId(vsSemanticId))
continue;
gsSrc.append(fmt::format("passParameterSem{}Out = passParameterSem{}In[{}];\r\n", vsSemanticId, vsSemanticId, vIdx));
}
gsSrc.append(fmt::format("gl_Position = gl_in[{}].gl_Position;\r\n", vIdx));
gsSrc.append("EmitVertex();\r\n");
}
void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister)
{
auto parameterMask = vertexShader->outputParameterMask;
for (uint32 i = 0; i < 32; i++)
{
if ((parameterMask & (1 << i)) == 0)
continue;
sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
if (vsSemanticId < 0)
continue;
// make sure PS has matching input
if (!psInputTable->hasPSImportForSemanticId(vsSemanticId))
continue;
gsSrc.append(fmt::format("passParameterSem{}Out = gen4thVertex{}(passParameterSem{}In[0], passParameterSem{}In[1], passParameterSem{}In[2]);\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId));
}
gsSrc.append(fmt::format("gl_Position = gen4thVertex{}(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_in[2].gl_Position);\r\n", variant));
gsSrc.append("EmitVertex();\r\n");
}
void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister)
{
sint32 pList[4] = { p0, p1, p2, p3 };
for (sint32 i = 0; i < 4; i++)
{
if (pList[i] == 3)
rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister);
else
rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister);
}
}

View File

@ -3,90 +3,4 @@
#include "Cafe/HW/Latte/Core/LatteRingBuffer.h"
#include "Cafe/HW/Latte/Core/Latte.h"
using _INDEX_TYPE = Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE;
#define INDEX_CACHE_ENTRIES (8)
typedef struct
{
MPTR prevIndexDataMPTR;
sint32 prevIndexType;
sint32 prevCount;
// index data
uint8* indexData;
uint8* indexData2;
uint32 indexBufferOffset;
sint32 indexDataSize; // current size
sint32 indexDataLimit; // maximum size
// info
uint32 maxIndex;
uint32 minIndex;
}indexDataCacheEntry_t;
typedef struct
{
indexDataCacheEntry_t indexCacheEntry[INDEX_CACHE_ENTRIES];
sint32 nextCacheEntryIndex;
// info about currently used index data
uint32 maxIndex;
uint32 minIndex;
uint8* indexData;
// buffer
unsigned int glIndexCacheBuffer;
VirtualBufferHeap_t* indexBufferVirtualHeap;
uint8* mappedIndexBuffer;
LatteRingBuffer_t* indexRingBuffer;
uint8* tempIndexStorage;
// misc
bool initialized;
unsigned int glActiveElementArrayBuffer;
}indexState_t;
typedef struct
{
uint8* vboOutput;
uint32 vboStride;
uint8 dataFormat;
uint8 nfa;
bool isSigned;
} AttributePointerCacheEntry_t;
#define INDEX_DATA_CACHE_BUCKETS (1783)
typedef struct
{
MPTR physAddr;
sint32 count;
uint32 primitiveRestartIndex;
uint32 primitiveMode;
}indexDataCacheKey_t;
typedef struct _indexDataCacheEntry_t
{
indexDataCacheKey_t key;
_indexDataCacheEntry_t* nextInBucket; // points to next element in same bucket
uint32 physSize;
uint32 hash;
_INDEX_TYPE indexType;
//sint32 indexType;
uint32 minIndex;
uint32 maxIndex;
uint32 lastAccessFrameCount;
VirtualBufferHeapEntry_t* heapEntry;
_indexDataCacheEntry_t* nextInMostRecentUsage; // points to element which was used more recently
_indexDataCacheEntry_t* prevInMostRecentUsage; // points to element which was used less recently
}indexDataCacheEntry2_t;
void RendererCore_resetAttributePointerCache();
bool RendererCore_checkIfAttributePointerCacheChanged(uint32 attributeShaderLoc, uint8* vboOutput, uint32 vboStride, uint8 dataFormat, uint8 nfa, bool isSigned);
indexState_t* RendererCore_getIndexState();
indexDataCacheEntry2_t** RendererCore_getIndexDataCacheFirst();
indexDataCacheEntry2_t** RendererCore_getIndexDataCacheBucket(uint32 bucketIdx);
sint32* RendererCore_getIndexDataCacheEntryCount();
void RendererCore_appendToUsageLinkedList(indexDataCacheEntry2_t* entry);
void RendererCore_removeFromUsageLinkedList(indexDataCacheEntry2_t* entry);
void RendererCore_removeFromBucket(indexDataCacheEntry2_t* entry);
void LatteDraw_cleanupAfterFrame();
void LatteDraw_handleSpecialState8_clearAsDepth();

View File

@ -10,57 +10,7 @@
#include "util/helpers/Serializer.h"
#include "Cafe/HW/Latte/Common/RegisterSerializer.h"
/* rects emulation */
void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
{
auto parameterMask = vertexShader->outputParameterMask;
for (uint32 i = 0; i < 32; i++)
{
if ((parameterMask & (1 << i)) == 0)
continue;
sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
if (vsSemanticId < 0)
continue;
// make sure PS has matching input
if (!psInputTable->hasPSImportForSemanticId(vsSemanticId))
continue;
gsSrc.append(fmt::format("passParameterSem{}Out = passParameterSem{}In[{}];\r\n", vsSemanticId, vsSemanticId, vIdx));
}
gsSrc.append(fmt::format("gl_Position = gl_in[{}].gl_Position;\r\n", vIdx));
gsSrc.append("EmitVertex();\r\n");
}
void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister)
{
auto parameterMask = vertexShader->outputParameterMask;
for (uint32 i = 0; i < 32; i++)
{
if ((parameterMask & (1 << i)) == 0)
continue;
sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i);
if (vsSemanticId < 0)
continue;
// make sure PS has matching input
if (!psInputTable->hasPSImportForSemanticId(vsSemanticId))
continue;
gsSrc.append(fmt::format("passParameterSem{}Out = gen4thVertex{}(passParameterSem{}In[0], passParameterSem{}In[1], passParameterSem{}In[2]);\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId));
}
gsSrc.append(fmt::format("gl_Position = gen4thVertex{}(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_in[2].gl_Position);\r\n", variant));
gsSrc.append("EmitVertex();\r\n");
}
void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister)
{
sint32 pList[4] = { p0, p1, p2, p3 };
for (sint32 i = 0; i < 4; i++)
{
if (pList[i] == 3)
rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister);
else
rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister);
}
}
RendererShaderVk* rectsEmulationGS_generate(LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister)
{

View File

@ -265,7 +265,7 @@ int main(int argc, char* argv[])
int BreathOfTheWildChildProcessMain();
int main(int argc, char *argv[])
{
#if BOOST_OS_LINUX
#if BOOST_OS_LINUX && defined(ENABLE_VULKAN)
if (getenv("CEMU_DETECT_RADV") != nullptr)
return BreathOfTheWildChildProcessMain();
#endif