mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-04-08 18:41:30 -06:00
Merge branch 'master' into add_encrypted_iso
This commit is contained in:
commit
f9d496ff2f
2
3rdparty/FAudio
vendored
2
3rdparty/FAudio
vendored
@ -1 +1 @@
|
||||
Subproject commit dc034fc671b07bbd14e8410d5dd6be6da38fdf6d
|
||||
Subproject commit 0372329dbb56e7814d0dea7b6eafa7a613bd8042
|
||||
2
3rdparty/SoundTouch/soundtouch
vendored
2
3rdparty/SoundTouch/soundtouch
vendored
@ -1 +1 @@
|
||||
Subproject commit 3982730833b6daefe77dcfb32b5c282851640c17
|
||||
Subproject commit a0fba77b6f9cfbdb71f8bbec58b6ac4e5e3b1097
|
||||
2
3rdparty/libsdl-org/SDL
vendored
2
3rdparty/libsdl-org/SDL
vendored
@ -1 +1 @@
|
||||
Subproject commit 683181b47cfabd293e3ea409f838915b8297a4fd
|
||||
Subproject commit 5848e584a1b606de26e3dbd1c7e4ecbc34f807a6
|
||||
@ -493,6 +493,10 @@ inline FT build_function_asm(std::string_view name, F&& builder, ::jit_runtime*
|
||||
return reinterpret_cast<FT>(uptr(result));
|
||||
}
|
||||
|
||||
#if defined(__INTELLISENSE__) && !defined(LLVM_AVAILABLE)
|
||||
#define LLVM_AVAILABLE
|
||||
#endif
|
||||
|
||||
#ifdef LLVM_AVAILABLE
|
||||
|
||||
namespace llvm
|
||||
|
||||
@ -210,7 +210,7 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
|
||||
#endif
|
||||
}
|
||||
|
||||
llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type) const
|
||||
llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type, std::source_location src_loc) const
|
||||
{
|
||||
uint s1 = type->getScalarSizeInBits();
|
||||
uint s2 = val->getType()->getScalarSizeInBits();
|
||||
@ -222,7 +222,7 @@ llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type) const
|
||||
|
||||
if (s1 != s2)
|
||||
{
|
||||
fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)", s1, s2);
|
||||
fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)\nCalled from: %s", s1, s2, src_loc);
|
||||
}
|
||||
|
||||
if (val->getType() == type)
|
||||
|
||||
@ -43,6 +43,7 @@
|
||||
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
#include <source_location>
|
||||
|
||||
// Helper function
|
||||
llvm::Value* peek_through_bitcasts(llvm::Value*);
|
||||
@ -3239,7 +3240,7 @@ public:
|
||||
}
|
||||
|
||||
// Bitcast with immediate constant folding
|
||||
llvm::Value* bitcast(llvm::Value* val, llvm::Type* type) const;
|
||||
llvm::Value* bitcast(llvm::Value* val, llvm::Type* type, std::source_location src_loc = std::source_location::current()) const;
|
||||
|
||||
template <typename T>
|
||||
llvm::Value* bitcast(llvm::Value* val)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include "Emu/Memory/vm_ptr.h"
|
||||
#include "cellPamf.h"
|
||||
#include "Emu/Cell/ErrorCodes.h"
|
||||
#include "Utilities/BitField.h"
|
||||
|
||||
// Error Codes
|
||||
enum CellDmuxError :u32
|
||||
@ -18,6 +19,10 @@ enum CellDmuxStreamType : s32
|
||||
CELL_DMUX_STREAM_TYPE_UNDEF = 0,
|
||||
CELL_DMUX_STREAM_TYPE_PAMF = 1,
|
||||
CELL_DMUX_STREAM_TYPE_TERMINATOR = 2,
|
||||
|
||||
// Only used in cellSail
|
||||
CELL_DMUX_STREAM_TYPE_MP4 = 0x81,
|
||||
CELL_DMUX_STREAM_TYPE_AVI = 0x82
|
||||
};
|
||||
|
||||
enum CellDmuxMsgType : s32
|
||||
@ -48,13 +53,14 @@ struct CellDmuxEsMsg
|
||||
struct CellDmuxType
|
||||
{
|
||||
be_t<s32> streamType; // CellDmuxStreamType
|
||||
be_t<u32> reserved[2];
|
||||
be_t<s32> reserved1;
|
||||
be_t<s32> reserved2;
|
||||
};
|
||||
|
||||
struct CellDmuxType2
|
||||
{
|
||||
be_t<s32> streamType; // CellDmuxStreamType
|
||||
be_t<u32> streamSpecificInfo;
|
||||
be_t<s32> streamType;
|
||||
vm::bcptr<void> streamSpecificInfo;
|
||||
};
|
||||
|
||||
struct CellDmuxResource
|
||||
@ -73,8 +79,8 @@ struct CellDmuxResourceEx
|
||||
be_t<u32> memSize;
|
||||
be_t<u32> ppuThreadPriority;
|
||||
be_t<u32> ppuThreadStackSize;
|
||||
be_t<u32> spurs_addr;
|
||||
u8 priority[8];
|
||||
vm::bptr<void> spurs; // CellSpurs*
|
||||
be_t<u64, 1> priority;
|
||||
be_t<u32> maxContention;
|
||||
};
|
||||
|
||||
@ -85,33 +91,23 @@ struct CellDmuxResourceSpurs
|
||||
be_t<u32> maxContention;
|
||||
};
|
||||
|
||||
/*
|
||||
struct CellDmuxResource2Ex
|
||||
{
|
||||
b8 isResourceEx; //true
|
||||
CellDmuxResourceEx resourceEx;
|
||||
};
|
||||
|
||||
struct CellDmuxResource2NoEx
|
||||
{
|
||||
b8 isResourceEx; //false
|
||||
CellDmuxResource resource;
|
||||
};
|
||||
*/
|
||||
|
||||
struct CellDmuxResource2
|
||||
{
|
||||
b8 isResourceEx;
|
||||
be_t<u32> memAddr;
|
||||
be_t<u32> memSize;
|
||||
be_t<u32> ppuThreadPriority;
|
||||
be_t<u32> ppuThreadStackSize;
|
||||
be_t<u32> shit[4];
|
||||
|
||||
union
|
||||
{
|
||||
CellDmuxResource resource;
|
||||
CellDmuxResourceEx resourceEx;
|
||||
};
|
||||
};
|
||||
|
||||
using CellDmuxCbMsg = u32(u32 demuxerHandle, vm::cptr<CellDmuxMsg> demuxerMsg, vm::ptr<void> cbArg);
|
||||
struct DmuxContext;
|
||||
struct DmuxEsContext;
|
||||
|
||||
using CellDmuxCbEsMsg = u32(u32 demuxerHandle, u32 esHandle, vm::cptr<CellDmuxEsMsg> esMsg, vm::ptr<void> cbArg);
|
||||
using CellDmuxCbMsg = u32(vm::ptr<DmuxContext> demuxerHandle, vm::cptr<CellDmuxMsg> demuxerMsg, vm::ptr<void> cbArg);
|
||||
|
||||
using CellDmuxCbEsMsg = u32(vm::ptr<DmuxContext> demuxerHandle, vm::ptr<DmuxEsContext> esHandle, vm::cptr<CellDmuxEsMsg> esMsg, vm::ptr<void> cbArg);
|
||||
|
||||
// Used for internal callbacks as well
|
||||
template <typename F>
|
||||
@ -177,6 +173,70 @@ struct DmuxAuInfo
|
||||
be_t<u32> specific_info_size;
|
||||
};
|
||||
|
||||
struct DmuxAuQueueElement
|
||||
{
|
||||
be_t<u32> index;
|
||||
u8 unk; // unused
|
||||
DmuxAuInfo au_info;
|
||||
};
|
||||
|
||||
CHECK_SIZE(DmuxAuQueueElement, 0x38);
|
||||
|
||||
enum DmuxState : u32
|
||||
{
|
||||
DMUX_STOPPED = 1 << 0,
|
||||
DMUX_RUNNING = 1 << 1,
|
||||
};
|
||||
|
||||
struct alignas(0x10) DmuxContext // CellDmuxHandle = DmuxContext*
|
||||
{
|
||||
vm::bptr<DmuxContext> _this;
|
||||
be_t<u32> _this_size;
|
||||
be_t<u32> version;
|
||||
be_t<u32> dmux_state;
|
||||
CellDmuxType dmux_type;
|
||||
CellDmuxCb dmux_cb;
|
||||
b8 stream_is_set;
|
||||
vm::bptr<void> core_handle;
|
||||
be_t<u32> version_; // Same value as 'version'
|
||||
be_t<u64> user_data;
|
||||
be_t<s32> max_enabled_es_num;
|
||||
be_t<s32> enabled_es_num;
|
||||
be_t<u32> _dx_mhd; // sys_mutex_t
|
||||
u8 reserved[0x7c];
|
||||
};
|
||||
|
||||
CHECK_SIZE_ALIGN(DmuxContext, 0xc0, 0x10);
|
||||
|
||||
struct alignas(0x10) DmuxEsContext // CellDmuxEsHandle = DmuxEsContext*
|
||||
{
|
||||
be_t<u32> _dx_mes; // sys_mutex_t
|
||||
be_t<u32> is_enabled;
|
||||
be_t<u32> error_mem_size;
|
||||
be_t<u32> error_count;
|
||||
vm::bptr<void> error_mem_addr;
|
||||
vm::bptr<DmuxEsContext> _this;
|
||||
be_t<u32> _this_size;
|
||||
be_t<s32> _this_index;
|
||||
vm::bptr<DmuxContext> dmux_handle;
|
||||
CellDmuxEsCb es_cb;
|
||||
vm::bptr<void> core_es_handle;
|
||||
bf_t<be_t<u32>, 0, 1> flush_started;
|
||||
|
||||
struct
|
||||
{
|
||||
be_t<s32> max_size;
|
||||
be_t<s32> allocated_size;
|
||||
be_t<s32> size;
|
||||
be_t<s32> front;
|
||||
be_t<s32> back;
|
||||
be_t<s32> allocated_back;
|
||||
}
|
||||
au_queue;
|
||||
};
|
||||
|
||||
CHECK_SIZE_ALIGN(DmuxEsContext, 0x50, 0x10);
|
||||
|
||||
using DmuxNotifyDemuxDone = error_code(vm::ptr<void>, u32, vm::ptr<void>);
|
||||
using DmuxNotifyFatalErr = error_code(vm::ptr<void>, u32, vm::ptr<void>);
|
||||
using DmuxNotifyProgEndCode = error_code(vm::ptr<void>, vm::ptr<void>);
|
||||
@ -194,10 +254,10 @@ using CellDmuxCoreOpSetStream = error_code(vm::ptr<void>, vm::cptr<void>, u32, b
|
||||
using CellDmuxCoreOpReleaseAu = error_code(vm::ptr<void>, vm::ptr<void>, u32);
|
||||
using CellDmuxCoreOpQueryEsAttr = error_code(vm::cptr<void>, vm::cptr<void>, vm::ptr<CellDmuxPamfEsAttr>);
|
||||
using CellDmuxCoreOpEnableEs = error_code(vm::ptr<void>, vm::cptr<void>, vm::cptr<CellDmuxEsResource>, vm::cptr<DmuxCb<DmuxEsNotifyAuFound>>, vm::cptr<DmuxCb<DmuxEsNotifyFlushDone>>, vm::cptr<void>, vm::pptr<void>);
|
||||
using CellDmuxCoreOpDisableEs = u32(vm::ptr<void>);
|
||||
using CellDmuxCoreOpFlushEs = u32(vm::ptr<void>);
|
||||
using CellDmuxCoreOpResetEs = u32(vm::ptr<void>);
|
||||
using CellDmuxCoreOpResetStreamAndWaitDone = u32(vm::ptr<void>);
|
||||
using CellDmuxCoreOpDisableEs = error_code(vm::ptr<void>);
|
||||
using CellDmuxCoreOpFlushEs = error_code(vm::ptr<void>);
|
||||
using CellDmuxCoreOpResetEs = error_code(vm::ptr<void>);
|
||||
using CellDmuxCoreOpResetStreamAndWaitDone = error_code(vm::ptr<void>);
|
||||
|
||||
struct CellDmuxCoreOps
|
||||
{
|
||||
|
||||
@ -2591,7 +2591,7 @@ template <bool raw_es>
|
||||
error_code _CellDmuxCoreOpEnableEs(ppu_thread& ppu, vm::ptr<CellDmuxPamfHandle> handle, vm::cptr<void> esFilterId, vm::cptr<CellDmuxEsResource> esResource, vm::cptr<DmuxCb<DmuxEsNotifyAuFound>> notifyAuFound,
|
||||
vm::cptr<DmuxCb<DmuxEsNotifyFlushDone>> notifyFlushDone, vm::cptr<void> esSpecificInfo, vm::pptr<CellDmuxPamfEsHandle> esHandle)
|
||||
{
|
||||
cellDmuxPamf.notice("_CellDmuxCoreOpEnableEs<raw_es=%d>(handle=*0x%x, esFilterId=*0x%x, esResource=*0x%x, notifyAuFound=*0x%x, notifyFlushDone=*0x%x, esSpecificInfo=*0x%x, esHandle)",
|
||||
cellDmuxPamf.notice("_CellDmuxCoreOpEnableEs<raw_es=%d>(handle=*0x%x, esFilterId=*0x%x, esResource=*0x%x, notifyAuFound=*0x%x, notifyFlushDone=*0x%x, esSpecificInfo=*0x%x, esHandle=**0x%x)",
|
||||
raw_es, handle, esFilterId, esResource, notifyAuFound, notifyFlushDone, esSpecificInfo, esHandle);
|
||||
|
||||
if (!handle || !esFilterId || !esResource || !esResource->memAddr || esResource->memSize == 0u || !notifyAuFound || !notifyAuFound->cbFunc || !notifyAuFound->cbArg || !notifyFlushDone || !notifyFlushDone->cbFunc || !notifyFlushDone->cbArg)
|
||||
|
||||
@ -192,7 +192,46 @@ error_code cellNetCtlDelHandler(s32 hid)
|
||||
|
||||
error_code cellNetCtlGetInfo(s32 code, vm::ptr<CellNetCtlInfo> info)
|
||||
{
|
||||
cellNetCtl.warning("cellNetCtlGetInfo(code=0x%x (%s), info=*0x%x)", code, InfoCodeToName(code), info);
|
||||
bool log_it_once = false;
|
||||
|
||||
switch (code)
|
||||
{
|
||||
case CELL_NET_CTL_INFO_ETHER_ADDR:
|
||||
case CELL_NET_CTL_INFO_DEVICE:
|
||||
case CELL_NET_CTL_INFO_MTU:
|
||||
case CELL_NET_CTL_INFO_LINK_TYPE:
|
||||
case CELL_NET_CTL_INFO_IP_CONFIG:
|
||||
case CELL_NET_CTL_INFO_IP_ADDRESS:
|
||||
case CELL_NET_CTL_INFO_NETMASK:
|
||||
case CELL_NET_CTL_INFO_DEFAULT_ROUTE:
|
||||
case CELL_NET_CTL_INFO_HTTP_PROXY_CONFIG:
|
||||
case CELL_NET_CTL_INFO_UPNP_CONFIG:
|
||||
{
|
||||
log_it_once = true;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool log_it = true;
|
||||
|
||||
if (log_it_once && vm::check_addr(info.addr()))
|
||||
{
|
||||
struct logged_t
|
||||
{
|
||||
std::array<atomic_t<bool>, 256> logged_code{};
|
||||
};
|
||||
|
||||
if (g_fxo->get<logged_t>().logged_code[::narrow<u8>(code)].exchange(true))
|
||||
{
|
||||
log_it = false;
|
||||
}
|
||||
}
|
||||
|
||||
(log_it ? cellNetCtl.warning : cellNetCtl.trace)("cellNetCtlGetInfo(code=0x%x (%s), info=*0x%x)", code, InfoCodeToName(code), info);
|
||||
|
||||
auto& nph = g_fxo->get<named_thread<np::np_handler>>();
|
||||
|
||||
|
||||
@ -5,14 +5,6 @@
|
||||
#include <bitset>
|
||||
#include "cellPamf.h"
|
||||
|
||||
const std::function<bool()> SQUEUE_ALWAYS_EXIT = []() { return true; };
|
||||
const std::function<bool()> SQUEUE_NEVER_EXIT = []() { return false; };
|
||||
|
||||
bool squeue_test_exit()
|
||||
{
|
||||
return Emu.IsStopped();
|
||||
}
|
||||
|
||||
LOG_CHANNEL(cellPamf);
|
||||
|
||||
template<>
|
||||
|
||||
@ -595,345 +595,3 @@ struct CellPamfReader
|
||||
CHECK_SIZE(CellPamfReader, 128);
|
||||
|
||||
error_code cellPamfReaderInitialize(vm::ptr<CellPamfReader> pSelf, vm::cptr<PamfHeader> pAddr, u64 fileSize, u32 attribute);
|
||||
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
|
||||
extern const std::function<bool()> SQUEUE_ALWAYS_EXIT;
|
||||
extern const std::function<bool()> SQUEUE_NEVER_EXIT;
|
||||
|
||||
bool squeue_test_exit();
|
||||
|
||||
// TODO: eliminate this boolshit
|
||||
template<typename T, u32 sq_size = 256>
|
||||
class squeue_t
|
||||
{
|
||||
struct squeue_sync_var_t
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 position : 31;
|
||||
u32 pop_lock : 1;
|
||||
};
|
||||
struct
|
||||
{
|
||||
u32 count : 31;
|
||||
u32 push_lock : 1;
|
||||
};
|
||||
};
|
||||
|
||||
atomic_t<squeue_sync_var_t> m_sync;
|
||||
|
||||
mutable std::mutex m_rcv_mutex;
|
||||
mutable std::mutex m_wcv_mutex;
|
||||
mutable std::condition_variable m_rcv;
|
||||
mutable std::condition_variable m_wcv;
|
||||
|
||||
T m_data[sq_size];
|
||||
|
||||
enum squeue_sync_var_result : u32
|
||||
{
|
||||
SQSVR_OK = 0,
|
||||
SQSVR_LOCKED = 1,
|
||||
SQSVR_FAILED = 2,
|
||||
};
|
||||
|
||||
public:
|
||||
squeue_t()
|
||||
: m_sync(squeue_sync_var_t{})
|
||||
{
|
||||
}
|
||||
|
||||
static u32 get_max_size()
|
||||
{
|
||||
return sq_size;
|
||||
}
|
||||
|
||||
bool is_full() const
|
||||
{
|
||||
return m_sync.load().count == sq_size;
|
||||
}
|
||||
|
||||
bool push(const T& data, const std::function<bool()>& test_exit)
|
||||
{
|
||||
u32 pos = 0;
|
||||
|
||||
while (u32 res = m_sync.atomic_op([&pos](squeue_sync_var_t& sync) -> u32
|
||||
{
|
||||
ensure(sync.count <= sq_size);
|
||||
ensure(sync.position < sq_size);
|
||||
|
||||
if (sync.push_lock)
|
||||
{
|
||||
return SQSVR_LOCKED;
|
||||
}
|
||||
if (sync.count == sq_size)
|
||||
{
|
||||
return SQSVR_FAILED;
|
||||
}
|
||||
|
||||
sync.push_lock = 1;
|
||||
pos = sync.position + sync.count;
|
||||
return SQSVR_OK;
|
||||
}))
|
||||
{
|
||||
if (res == SQSVR_FAILED && (test_exit() || squeue_test_exit()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> wcv_lock(m_wcv_mutex);
|
||||
m_wcv.wait_for(wcv_lock, std::chrono::milliseconds(1));
|
||||
}
|
||||
|
||||
m_data[pos >= sq_size ? pos - sq_size : pos] = data;
|
||||
|
||||
m_sync.atomic_op([](squeue_sync_var_t& sync)
|
||||
{
|
||||
ensure(sync.count <= sq_size);
|
||||
ensure(sync.position < sq_size);
|
||||
ensure(!!sync.push_lock);
|
||||
sync.push_lock = 0;
|
||||
sync.count++;
|
||||
});
|
||||
|
||||
m_rcv.notify_one();
|
||||
m_wcv.notify_one();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool push(const T& data, const volatile bool* do_exit)
|
||||
{
|
||||
return push(data, [do_exit]() { return do_exit && *do_exit; });
|
||||
}
|
||||
|
||||
bool push(const T& data)
|
||||
{
|
||||
return push(data, SQUEUE_NEVER_EXIT);
|
||||
}
|
||||
|
||||
bool try_push(const T& data)
|
||||
{
|
||||
return push(data, SQUEUE_ALWAYS_EXIT);
|
||||
}
|
||||
|
||||
bool pop(T& data, const std::function<bool()>& test_exit)
|
||||
{
|
||||
u32 pos = 0;
|
||||
|
||||
while (u32 res = m_sync.atomic_op([&pos](squeue_sync_var_t& sync) -> u32
|
||||
{
|
||||
ensure(sync.count <= sq_size);
|
||||
ensure(sync.position < sq_size);
|
||||
|
||||
if (!sync.count)
|
||||
{
|
||||
return SQSVR_FAILED;
|
||||
}
|
||||
if (sync.pop_lock)
|
||||
{
|
||||
return SQSVR_LOCKED;
|
||||
}
|
||||
|
||||
sync.pop_lock = 1;
|
||||
pos = sync.position;
|
||||
return SQSVR_OK;
|
||||
}))
|
||||
{
|
||||
if (res == SQSVR_FAILED && (test_exit() || squeue_test_exit()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> rcv_lock(m_rcv_mutex);
|
||||
m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1));
|
||||
}
|
||||
|
||||
data = m_data[pos];
|
||||
|
||||
m_sync.atomic_op([](squeue_sync_var_t& sync)
|
||||
{
|
||||
ensure(sync.count <= sq_size);
|
||||
ensure(sync.position < sq_size);
|
||||
ensure(!!sync.pop_lock);
|
||||
sync.pop_lock = 0;
|
||||
sync.position++;
|
||||
sync.count--;
|
||||
if (sync.position == sq_size)
|
||||
{
|
||||
sync.position = 0;
|
||||
}
|
||||
});
|
||||
|
||||
m_rcv.notify_one();
|
||||
m_wcv.notify_one();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool pop(T& data, const volatile bool* do_exit)
|
||||
{
|
||||
return pop(data, [do_exit]() { return do_exit && *do_exit; });
|
||||
}
|
||||
|
||||
bool pop(T& data)
|
||||
{
|
||||
return pop(data, SQUEUE_NEVER_EXIT);
|
||||
}
|
||||
|
||||
bool try_pop(T& data)
|
||||
{
|
||||
return pop(data, SQUEUE_ALWAYS_EXIT);
|
||||
}
|
||||
|
||||
bool peek(T& data, u32 start_pos, const std::function<bool()>& test_exit)
|
||||
{
|
||||
ensure(start_pos < sq_size);
|
||||
u32 pos = 0;
|
||||
|
||||
while (u32 res = m_sync.atomic_op([&pos, start_pos](squeue_sync_var_t& sync) -> u32
|
||||
{
|
||||
ensure(sync.count <= sq_size);
|
||||
ensure(sync.position < sq_size);
|
||||
|
||||
if (sync.count <= start_pos)
|
||||
{
|
||||
return SQSVR_FAILED;
|
||||
}
|
||||
if (sync.pop_lock)
|
||||
{
|
||||
return SQSVR_LOCKED;
|
||||
}
|
||||
|
||||
sync.pop_lock = 1;
|
||||
pos = sync.position + start_pos;
|
||||
return SQSVR_OK;
|
||||
}))
|
||||
{
|
||||
if (res == SQSVR_FAILED && (test_exit() || squeue_test_exit()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> rcv_lock(m_rcv_mutex);
|
||||
m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1));
|
||||
}
|
||||
|
||||
data = m_data[pos >= sq_size ? pos - sq_size : pos];
|
||||
|
||||
m_sync.atomic_op([](squeue_sync_var_t& sync)
|
||||
{
|
||||
ensure(sync.count <= sq_size);
|
||||
ensure(sync.position < sq_size);
|
||||
ensure(!!sync.pop_lock);
|
||||
sync.pop_lock = 0;
|
||||
});
|
||||
|
||||
m_rcv.notify_one();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool peek(T& data, u32 start_pos, const volatile bool* do_exit)
|
||||
{
|
||||
return peek(data, start_pos, [do_exit]() { return do_exit && *do_exit; });
|
||||
}
|
||||
|
||||
bool peek(T& data, u32 start_pos = 0)
|
||||
{
|
||||
return peek(data, start_pos, SQUEUE_NEVER_EXIT);
|
||||
}
|
||||
|
||||
bool try_peek(T& data, u32 start_pos = 0)
|
||||
{
|
||||
return peek(data, start_pos, SQUEUE_ALWAYS_EXIT);
|
||||
}
|
||||
|
||||
class squeue_data_t
|
||||
{
|
||||
T* const m_data;
|
||||
const u32 m_pos;
|
||||
const u32 m_count;
|
||||
|
||||
squeue_data_t(T* data, u32 pos, u32 count)
|
||||
: m_data(data)
|
||||
, m_pos(pos)
|
||||
, m_count(count)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
T& operator [] (u32 index)
|
||||
{
|
||||
ensure(index < m_count);
|
||||
index += m_pos;
|
||||
index = index < sq_size ? index : index - sq_size;
|
||||
return m_data[index];
|
||||
}
|
||||
};
|
||||
|
||||
void process(void(*proc)(squeue_data_t data))
|
||||
{
|
||||
u32 pos, count;
|
||||
|
||||
while (m_sync.atomic_op([&pos, &count](squeue_sync_var_t& sync) -> u32
|
||||
{
|
||||
ensure(sync.count <= sq_size);
|
||||
ensure(sync.position < sq_size);
|
||||
|
||||
if (sync.pop_lock || sync.push_lock)
|
||||
{
|
||||
return SQSVR_LOCKED;
|
||||
}
|
||||
|
||||
pos = sync.position;
|
||||
count = sync.count;
|
||||
sync.pop_lock = 1;
|
||||
sync.push_lock = 1;
|
||||
return SQSVR_OK;
|
||||
}))
|
||||
{
|
||||
std::unique_lock<std::mutex> rcv_lock(m_rcv_mutex);
|
||||
m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1));
|
||||
}
|
||||
|
||||
proc(squeue_data_t(m_data, pos, count));
|
||||
|
||||
m_sync.atomic_op([](squeue_sync_var_t& sync)
|
||||
{
|
||||
ensure(sync.count <= sq_size);
|
||||
ensure(sync.position < sq_size);
|
||||
ensure(!!sync.pop_lock);
|
||||
ensure(!!sync.push_lock);
|
||||
sync.pop_lock = 0;
|
||||
sync.push_lock = 0;
|
||||
});
|
||||
|
||||
m_wcv.notify_one();
|
||||
m_rcv.notify_one();
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
while (m_sync.atomic_op([](squeue_sync_var_t& sync) -> u32
|
||||
{
|
||||
ensure(sync.count <= sq_size);
|
||||
ensure(sync.position < sq_size);
|
||||
|
||||
if (sync.pop_lock || sync.push_lock)
|
||||
{
|
||||
return SQSVR_LOCKED;
|
||||
}
|
||||
|
||||
sync.pop_lock = 1;
|
||||
sync.push_lock = 1;
|
||||
return SQSVR_OK;
|
||||
}))
|
||||
{
|
||||
std::unique_lock<std::mutex> rcv_lock(m_rcv_mutex);
|
||||
m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1));
|
||||
}
|
||||
|
||||
m_sync.exchange({});
|
||||
m_wcv.notify_one();
|
||||
m_rcv.notify_one();
|
||||
}
|
||||
};
|
||||
|
||||
@ -340,7 +340,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
|
||||
|
||||
const auto ftype = FunctionType::get(get_type<void>(), {
|
||||
get_type<u8*>(), // Exec base
|
||||
m_ir->getPtrTy(), // PPU context
|
||||
get_type<u8*>(), // PPU context
|
||||
get_type<u64>(), // Segment address (for PRX)
|
||||
get_type<u8*>(), // Memory base
|
||||
get_type<u64>(), // r0
|
||||
@ -386,7 +386,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
|
||||
const auto addr_array = new GlobalVariable(*m_module, addr_array_type, false, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, vec_addrs));
|
||||
|
||||
// Create an array of function pointers
|
||||
const auto func_table_type = ArrayType::get(m_ir->getPtrTy(), functions.size());
|
||||
const auto func_table_type = ArrayType::get(get_type<u8*>(), functions.size());
|
||||
const auto init_func_table = ConstantArray::get(func_table_type, functions);
|
||||
const auto func_table = new GlobalVariable(*m_module, func_table_type, false, GlobalVariable::PrivateLinkage, init_func_table);
|
||||
|
||||
@ -413,7 +413,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
|
||||
const auto func_pc = ZExt(m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst), get_type<u64>());
|
||||
|
||||
ptr_inst = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(func_table->getValueType(), func_table, {m_ir->getInt64(0), index_value}));
|
||||
assert(ptr_inst->getResultElementType() == m_ir->getPtrTy());
|
||||
assert(ptr_inst->getResultElementType() == get_type<u8*>());
|
||||
|
||||
const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst);
|
||||
const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc;
|
||||
@ -622,7 +622,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
||||
const auto pos = m_ir->CreateShl(indirect, 1);
|
||||
const auto ptr = m_ir->CreatePtrAdd(m_exec, pos);
|
||||
const auto val = m_ir->CreateLoad(get_type<u64>(), ptr);
|
||||
callee = FunctionCallee(type, m_ir->CreateIntToPtr(val, m_ir->getPtrTy()));
|
||||
callee = FunctionCallee(type, m_ir->CreateIntToPtr(val, get_type<u8*>()));
|
||||
|
||||
// Load new segment address
|
||||
const auto seg_base_ptr = m_ir->CreatePtrAdd(m_exec, m_ir->getInt64(vm::g_exec_addr_seg_offset));
|
||||
@ -5414,7 +5414,7 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo)
|
||||
void PPUTranslator::build_interpreter()
|
||||
{
|
||||
#define BUILD_VEC_INST(i) { \
|
||||
m_function = llvm::cast<llvm::Function>(m_module->getOrInsertFunction("op_" #i, get_type<void>(), m_ir->getPtrTy()).getCallee()); \
|
||||
m_function = llvm::cast<llvm::Function>(m_module->getOrInsertFunction("op_" #i, get_type<void>(), get_type<u8*>()).getCallee()); \
|
||||
std::fill(std::begin(m_globals), std::end(m_globals), nullptr); \
|
||||
std::fill(std::begin(m_locals), std::end(m_locals), nullptr); \
|
||||
IRBuilder<> irb(BasicBlock::Create(m_context, "__entry", m_function)); \
|
||||
|
||||
@ -6194,7 +6194,7 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
{
|
||||
const auto arg = reduced_loop->find_reg(reg);
|
||||
|
||||
if (arg && reg != op_rt)
|
||||
if (arg && arg->regs.count() != 0)
|
||||
{
|
||||
if (reg_first == reg)
|
||||
{
|
||||
@ -6217,6 +6217,12 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
}
|
||||
|
||||
if (type & spu_itype::memory || type == spu_itype::RDCH || type == spu_itype::RCHCNT)
|
||||
{
|
||||
// Register external origin
|
||||
org.add_register_origin(s_reg_max);
|
||||
}
|
||||
|
||||
*ensure(reduced_loop->find_reg(op_rt)) = org;
|
||||
}
|
||||
|
||||
@ -6359,26 +6365,55 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
}
|
||||
|
||||
std::array<u32, s_reg_max> reg_use{};
|
||||
std::bitset<s_reg_max> reg_maybe_float{};
|
||||
std::bitset<s_reg_max> reg_mod{};
|
||||
|
||||
for (auto it = m_bbs.find(reduced_loop->loop_pc); it != m_bbs.end() && it->first <= bpc; it++)
|
||||
{
|
||||
for (u32 i = 0; i < s_reg_max; i++)
|
||||
{
|
||||
if (!reg_mod[i])
|
||||
{
|
||||
reg_use[i] += it->second.reg_use[i];
|
||||
}
|
||||
}
|
||||
|
||||
reg_maybe_float |= it->second.reg_maybe_float;
|
||||
reg_mod |= it->second.reg_mod;
|
||||
|
||||
// Note: update when sup_conds are implemented
|
||||
if (it->first == bpc && it->first != reduced_loop->loop_pc)
|
||||
{
|
||||
reduced_loop->loop_may_update |= it->second.reg_mod;
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < s_reg_max; i++)
|
||||
{
|
||||
const auto& b = ::at32(m_bbs, reduced_loop->loop_pc);
|
||||
const auto& b2 = ::at32(m_bbs, bpc);
|
||||
|
||||
if (!::at32(reduced_loop->loop_dicts, i))
|
||||
{
|
||||
if (b.reg_use[i] || (!::at32(b.reg_mod, i) && b2.reg_use[i]))
|
||||
if (reg_use[i] && reg_mod[i])
|
||||
{
|
||||
if ((b.reg_use[i] && ::at32(b.reg_mod, i)) || ::at32(b2.reg_mod, i))
|
||||
reduced_loop->is_constant_expression = false;
|
||||
reduced_loop->loop_writes.set(i);
|
||||
reduced_loop->loop_may_update.reset(i);
|
||||
}
|
||||
else if (reg_use[i])
|
||||
{
|
||||
reduced_loop->loop_args.set(i);
|
||||
|
||||
if (reg_use[i] >= 3 && reg_maybe_float[i])
|
||||
{
|
||||
reduced_loop->is_constant_expression = false;
|
||||
reduced_loop->loop_writes.set(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
reduced_loop->loop_args.set(i);
|
||||
reduced_loop->gpr_not_nans.set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Cleanup
|
||||
reduced_loop->loop_may_update.reset(i);
|
||||
}
|
||||
}
|
||||
|
||||
reduced_loop_all.emplace(reduced_loop->loop_pc, *reduced_loop);
|
||||
@ -6731,6 +6766,13 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
break;
|
||||
}
|
||||
|
||||
if (reg_index != i && ::at32(reg->regs, reg_index))
|
||||
{
|
||||
// Unimplemented
|
||||
break_reduced_loop_pattern(30, reduced_loop->discard());
|
||||
break;
|
||||
}
|
||||
|
||||
u32 cond_val_incr = static_cast<s32>(reg_org->IMM);
|
||||
|
||||
if (reg_org->mod1_type == spu_itype::AI || reg_org->mod1_type == spu_itype::AHI)
|
||||
@ -7049,26 +7091,55 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
}
|
||||
}
|
||||
|
||||
std::array<u32, s_reg_max> reg_use{};
|
||||
std::bitset<s_reg_max> reg_maybe_float{};
|
||||
std::bitset<s_reg_max> reg_mod{};
|
||||
|
||||
for (auto it = m_bbs.find(reduced_loop->loop_pc); it != m_bbs.end() && it->first <= bpc; it++)
|
||||
{
|
||||
for (u32 i = 0; i < s_reg_max; i++)
|
||||
{
|
||||
if (!reg_mod[i])
|
||||
{
|
||||
reg_use[i] += it->second.reg_use[i];
|
||||
}
|
||||
}
|
||||
|
||||
reg_maybe_float |= it->second.reg_maybe_float;
|
||||
reg_mod |= it->second.reg_mod;
|
||||
|
||||
// Note: update when sup_conds are implemented
|
||||
if (it->first == bpc && it->first != reduced_loop->loop_pc)
|
||||
{
|
||||
reduced_loop->loop_may_update |= it->second.reg_mod;
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < s_reg_max; i++)
|
||||
{
|
||||
const auto& b = ::at32(m_bbs, reduced_loop->loop_pc);
|
||||
const auto& b2 = ::at32(m_bbs, bpc);
|
||||
|
||||
if (!::at32(reduced_loop->loop_dicts, i))
|
||||
{
|
||||
if (b.reg_use[i] || (!::at32(b.reg_mod, i) && b2.reg_use[i]))
|
||||
if (reg_use[i] && reg_mod[i])
|
||||
{
|
||||
if ((b.reg_use[i] && ::at32(b.reg_mod, i)) || ::at32(b2.reg_mod, i))
|
||||
reduced_loop->is_constant_expression = false;
|
||||
reduced_loop->loop_writes.set(i);
|
||||
reduced_loop->loop_may_update.reset(i);
|
||||
}
|
||||
else if (reg_use[i])
|
||||
{
|
||||
reduced_loop->loop_args.set(i);
|
||||
|
||||
if (reg_use[i] >= 3 && reg_maybe_float[i])
|
||||
{
|
||||
reduced_loop->is_constant_expression = false;
|
||||
reduced_loop->loop_writes.set(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
reduced_loop->loop_args.set(i);
|
||||
reduced_loop->gpr_not_nans.set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Cleanup
|
||||
reduced_loop->loop_may_update.reset(i);
|
||||
}
|
||||
}
|
||||
|
||||
reduced_loop_all.emplace(reduced_loop->loop_pc, *reduced_loop);
|
||||
@ -8608,6 +8679,16 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point, s
|
||||
|
||||
fmt::append(regs, " r%u-r", i);
|
||||
}
|
||||
|
||||
if (::at32(pattern.loop_may_update, i))
|
||||
{
|
||||
if (regs.size() != 1)
|
||||
{
|
||||
regs += ",";
|
||||
}
|
||||
|
||||
fmt::append(regs, " r%u-m", i);
|
||||
}
|
||||
}
|
||||
|
||||
regs += " }";
|
||||
|
||||
@ -60,6 +60,7 @@ const extern spu_decoder<spu_iflag> g_spu_iflag;
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
#pragma optimize("", off)
|
||||
#ifdef ARCH_ARM64
|
||||
#include "Emu/CPU/Backends/AArch64/AArch64JIT.h"
|
||||
#endif
|
||||
@ -152,6 +153,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||
// Current register values
|
||||
std::array<llvm::Value*, s_reg_max> reg{};
|
||||
|
||||
// Opimization: restoring register state for registers that would be rewritten in other blocks
|
||||
std::array<llvm::Value*, s_reg_max> reg_save_and_restore{};
|
||||
|
||||
// PHI nodes created for this block (if any)
|
||||
std::array<llvm::PHINode*, s_reg_max> phi{};
|
||||
|
||||
@ -177,11 +181,6 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||
const usz first_id = store_context_first_id[i];
|
||||
return counter != 1 && first_id != umax && counter < first_id;
|
||||
}
|
||||
|
||||
bool is_gpr_not_NaN_hint(u32 i) const noexcept
|
||||
{
|
||||
return block_wide_reg_store_elimination && ::at32(bb->reg_maybe_float, i) && ::at32(bb->reg_use, i) >= 3 && !::at32(bb->reg_mod, i);
|
||||
}
|
||||
};
|
||||
|
||||
struct function_info
|
||||
@ -197,10 +196,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||
};
|
||||
|
||||
// Current block
|
||||
block_info* m_block;
|
||||
block_info* m_block = nullptr;
|
||||
|
||||
// Current function or chunk
|
||||
function_info* m_finfo;
|
||||
function_info* m_finfo = nullptr;
|
||||
|
||||
// Reduced Loop Pattern information (if available)
|
||||
reduced_loop_t* m_reduced_loop_info = nullptr;
|
||||
|
||||
// All blocks in the current function chunk
|
||||
std::unordered_map<u32, block_info, value_hash<u32, 2>> m_blocks;
|
||||
@ -2280,7 +2282,7 @@ public:
|
||||
}
|
||||
|
||||
const bool is_reduced_loop = m_inst_attrs[(baddr - start) / 4] == inst_attr::reduced_loop;
|
||||
const auto reduced_loop_info = is_reduced_loop ? std::static_pointer_cast<reduced_loop_t>(ensure(m_patterns.at(baddr - start).info_ptr)) : nullptr;
|
||||
m_reduced_loop_info = is_reduced_loop ? std::static_pointer_cast<reduced_loop_t>(ensure(m_patterns.at(baddr - start).info_ptr)).get() : nullptr;
|
||||
|
||||
BasicBlock* block_optimization_phi_parent = nullptr;
|
||||
const auto block_optimization_inner = is_reduced_loop ? BasicBlock::Create(m_context, fmt::format("b-loop-it-0x%x", m_pos), m_function) : nullptr;
|
||||
@ -2290,11 +2292,24 @@ public:
|
||||
std::array<llvm::PHINode*, s_reg_max> reduced_loop_phi_nodes{};
|
||||
std::array<llvm::Value*, s_reg_max> reduced_loop_init_regs{};
|
||||
|
||||
auto make_reduced_loop_condition = [&](llvm::BasicBlock* optimization_block, bool is_second_time, u32 reserve_iterations)
|
||||
// Reserve additional iteration for rare case where GPR may not be rewritten after the iteration
|
||||
// So that it would have to be rewritten by future code
|
||||
// This avoids using additional PHI connectors
|
||||
const u32 reserve_iterations = m_reduced_loop_info && m_reduced_loop_info->loop_may_update.count() != 0 ? 3 : 2;
|
||||
|
||||
for (u32 i = 0; i < s_reg_max; i++)
|
||||
{
|
||||
if (m_reduced_loop_info && m_reduced_loop_info->loop_may_update.test(i))
|
||||
{
|
||||
m_block->reg_save_and_restore[i] = m_block->reg[i];
|
||||
}
|
||||
}
|
||||
|
||||
auto make_reduced_loop_condition = [&](llvm::BasicBlock* optimization_block, bool is_second_time)
|
||||
{
|
||||
llvm::ICmpInst::Predicate compare{};
|
||||
|
||||
switch (reduced_loop_info->cond_val_compare)
|
||||
switch (m_reduced_loop_info->cond_val_compare)
|
||||
{
|
||||
case CMP_SLESS: compare = ICmpInst::ICMP_SLT; break;
|
||||
case CMP_SGREATER: compare = ICmpInst::ICMP_SGT; break;
|
||||
@ -2323,11 +2338,11 @@ public:
|
||||
llvm::Value* loop_dictator_after_adjustment{};
|
||||
|
||||
spu_opcode_t reg_target{};
|
||||
reg_target.rt = static_cast<u32>(reduced_loop_info->cond_val_register_idx);
|
||||
reg_target.rt = static_cast<u32>(m_reduced_loop_info->cond_val_register_idx);
|
||||
|
||||
if (reg_target.rt != reduced_loop_info->cond_val_register_idx)
|
||||
if (reg_target.rt != m_reduced_loop_info->cond_val_register_idx)
|
||||
{
|
||||
fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition register index: 0x%llx", reduced_loop_info->cond_val_register_idx);
|
||||
fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition register index: 0x%llx", m_reduced_loop_info->cond_val_register_idx);
|
||||
}
|
||||
|
||||
if (!m_block->reg[reg_target.rt])
|
||||
@ -2335,7 +2350,7 @@ public:
|
||||
m_block->reg[reg_target.rt] = reduced_loop_init_regs[reg_target.rt];
|
||||
}
|
||||
|
||||
switch (reduced_loop_info->cond_val_mask)
|
||||
switch (m_reduced_loop_info->cond_val_mask)
|
||||
{
|
||||
case u8{umax}:
|
||||
{
|
||||
@ -2360,28 +2375,28 @@ public:
|
||||
}
|
||||
default:
|
||||
{
|
||||
fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition bit mask: 0x%llx", reduced_loop_info->cond_val_mask);
|
||||
fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition bit mask: 0x%llx", m_reduced_loop_info->cond_val_mask);
|
||||
}
|
||||
}
|
||||
|
||||
const u32 type_bits = std::popcount(reduced_loop_info->cond_val_mask);
|
||||
const u32 type_bits = std::popcount(m_reduced_loop_info->cond_val_mask);
|
||||
|
||||
llvm::Value* cond_val_incr = nullptr;
|
||||
|
||||
if (reduced_loop_info->cond_val_incr_is_immediate)
|
||||
if (m_reduced_loop_info->cond_val_incr_is_immediate)
|
||||
{
|
||||
cond_val_incr = m_ir->getIntN(type_bits, reduced_loop_info->cond_val_incr & reduced_loop_info->cond_val_mask);
|
||||
cond_val_incr = m_ir->getIntN(type_bits, m_reduced_loop_info->cond_val_incr & m_reduced_loop_info->cond_val_mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
spu_opcode_t reg_incr{};
|
||||
reg_incr.rt = static_cast<u32>(reduced_loop_info->cond_val_incr);
|
||||
reg_incr.rt = static_cast<u32>(m_reduced_loop_info->cond_val_incr);
|
||||
|
||||
if (reg_incr.rt != reduced_loop_info->cond_val_incr)
|
||||
if (reg_incr.rt != m_reduced_loop_info->cond_val_incr)
|
||||
{
|
||||
fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal increment arguemnt register index: 0x%llx", reduced_loop_info->cond_val_incr);
|
||||
fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal increment arguemnt register index: 0x%llx", m_reduced_loop_info->cond_val_incr);
|
||||
}
|
||||
switch (reduced_loop_info->cond_val_mask)
|
||||
switch (m_reduced_loop_info->cond_val_mask)
|
||||
{
|
||||
case u8{umax}:
|
||||
{
|
||||
@ -2407,7 +2422,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if (reduced_loop_info->cond_val_incr_before_cond && !reduced_loop_info->cond_val_incr_before_cond_taken_in_account)
|
||||
if (m_reduced_loop_info->cond_val_incr_before_cond && !m_reduced_loop_info->cond_val_incr_before_cond_taken_in_account)
|
||||
{
|
||||
loop_dictator_after_adjustment = m_ir->CreateAdd(loop_dictator_before_adjustment, cond_val_incr);
|
||||
}
|
||||
@ -2418,21 +2433,21 @@ public:
|
||||
|
||||
llvm::Value* loop_argument = nullptr;
|
||||
|
||||
if (reduced_loop_info->cond_val_is_immediate)
|
||||
if (m_reduced_loop_info->cond_val_is_immediate)
|
||||
{
|
||||
loop_argument = m_ir->CreateTrunc(m_ir->getInt64(reduced_loop_info->cond_val_min & reduced_loop_info->cond_val_mask), loop_dictator_before_adjustment->getType());
|
||||
loop_argument = m_ir->CreateTrunc(m_ir->getInt64(m_reduced_loop_info->cond_val_min & m_reduced_loop_info->cond_val_mask), loop_dictator_before_adjustment->getType());
|
||||
}
|
||||
else
|
||||
{
|
||||
spu_opcode_t reg_target2{};
|
||||
reg_target2.rt = static_cast<u32>(reduced_loop_info->cond_val_register_argument_idx);
|
||||
reg_target2.rt = static_cast<u32>(m_reduced_loop_info->cond_val_register_argument_idx);
|
||||
|
||||
if (reg_target2.rt != reduced_loop_info->cond_val_register_argument_idx)
|
||||
if (reg_target2.rt != m_reduced_loop_info->cond_val_register_argument_idx)
|
||||
{
|
||||
fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition arguemnt register index: 0x%llx", reduced_loop_info->cond_val_register_argument_idx);
|
||||
fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition arguemnt register index: 0x%llx", m_reduced_loop_info->cond_val_register_argument_idx);
|
||||
}
|
||||
|
||||
switch (reduced_loop_info->cond_val_mask)
|
||||
switch (m_reduced_loop_info->cond_val_mask)
|
||||
{
|
||||
case u8{umax}:
|
||||
{
|
||||
@ -2464,7 +2479,7 @@ public:
|
||||
{
|
||||
condition = m_ir->CreateICmp(compare, loop_dictator_after_adjustment, loop_argument);
|
||||
}
|
||||
// else if ((reduced_loop_info->cond_val_compare == CMP_LGREATER || (reduced_loop_info->cond_val_compare == CMP_LGREATER_EQUAL && reduced_loop_info->cond_val_is_immediate && reduced_loop_info->cond_val_incr)) && cond_val_incr->getSExtValue() < 0)
|
||||
// else if ((m_reduced_loop_info->cond_val_compare == CMP_LGREATER || (m_reduced_loop_info->cond_val_compare == CMP_LGREATER_EQUAL && m_reduced_loop_info->cond_val_is_immediate && m_reduced_loop_info->cond_val_incr)) && cond_val_incr->getSExtValue() < 0)
|
||||
// {
|
||||
// const auto cond_val_incr_multiplied = m_ir->CreateMul(cond_val_incr, reserve_iterations - 1);
|
||||
// condition = m_ir->CreateICmp(compare, select(m_ir->CreateICmpUGE(cond_val_incr_multiplied, loop_dictator_after_adjustment), m_ir->CreateAdd(loop_dictator_after_adjustment, cond_val_incr_multiplied), m_ir->getIntN(type_bits, 0)), loop_argument);
|
||||
@ -2493,7 +2508,7 @@ public:
|
||||
{
|
||||
const bool is_last = !(count <= 20 && i < s_reg_max);
|
||||
|
||||
if (is_last || m_block->is_gpr_not_NaN_hint(i))
|
||||
if (is_last || m_reduced_loop_info->is_gpr_not_NaN_hint(i))
|
||||
{
|
||||
count++;
|
||||
|
||||
@ -2542,9 +2557,24 @@ public:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//condition = m_ir->getInt1(0);
|
||||
// TODO: Optimze so constant evalatuated cases will not be checked
|
||||
const bool is_cond_need_runtime_verify = compare == ICmpInst::ICMP_NE && (!m_reduced_loop_info->cond_val_is_immediate || m_reduced_loop_info->cond_val_incr % 2 == 0);
|
||||
|
||||
if (is_cond_need_runtime_verify)
|
||||
{
|
||||
// Verify that it is actually possible to finish the loop and it is not an infinite loop
|
||||
|
||||
// First: create a mask of the bits that definitely do not change between iterations (0 results in umax which is accurate here)
|
||||
const auto no_change_bits = m_ir->CreateAnd(m_ir->CreateNot(cond_val_incr), m_ir->CreateSub(cond_val_incr, m_ir->getIntN(type_bits, 1)));
|
||||
|
||||
// Compare that when the mask applied to both the result and the original value is the same
|
||||
const auto cond_verify = m_ir->CreateICmpEQ(m_ir->CreateAnd(loop_dictator_after_adjustment, no_change_bits), m_ir->CreateAnd(loop_argument, no_change_bits));
|
||||
|
||||
// Amend condition
|
||||
condition = m_ir->CreateAnd(cond_verify, condition);
|
||||
}
|
||||
}
|
||||
|
||||
m_ir->CreateCondBr(condition, optimization_block, block_optimization_next);
|
||||
};
|
||||
@ -2555,7 +2585,7 @@ public:
|
||||
{
|
||||
llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type<f64[4]>() : get_reg_type(i);
|
||||
|
||||
if (i < reduced_loop_info->loop_dicts.size() && (reduced_loop_info->loop_dicts.test(i) || reduced_loop_info->loop_writes.test(i)))
|
||||
if (i < m_reduced_loop_info->loop_dicts.size() && (m_reduced_loop_info->loop_dicts.test(i) || m_reduced_loop_info->loop_writes.test(i)))
|
||||
{
|
||||
// Connect registers which are used and then modified by the block
|
||||
auto value = m_block->reg[i];
|
||||
@ -2567,7 +2597,7 @@ public:
|
||||
|
||||
reduced_loop_init_regs[i] = value;
|
||||
}
|
||||
else if (i < reduced_loop_info->loop_dicts.size() && reduced_loop_info->loop_args.test(i))
|
||||
else if (i < m_reduced_loop_info->loop_dicts.size() && m_reduced_loop_info->loop_args.test(i))
|
||||
{
|
||||
// Load registers used as arguments of the loop
|
||||
if (!m_block->reg[i])
|
||||
@ -2580,8 +2610,8 @@ public:
|
||||
const auto prev_insert_block = m_ir->GetInsertBlock();
|
||||
|
||||
block_optimization_phi_parent = prev_insert_block;
|
||||
|
||||
make_reduced_loop_condition(block_optimization_inner, false, 2);
|
||||
|
||||
make_reduced_loop_condition(block_optimization_inner, false);
|
||||
m_ir->SetInsertPoint(block_optimization_inner);
|
||||
|
||||
for (u32 i = 0; i < s_reg_max; i++)
|
||||
@ -2611,7 +2641,7 @@ public:
|
||||
|
||||
for (u32 iteration_emit = 0; is_reduced_loop; m_pos += 4)
|
||||
{
|
||||
if (m_pos != baddr && m_block_info[m_pos / 4] && reduced_loop_info->loop_end < m_pos)
|
||||
if (m_pos != baddr && m_block_info[m_pos / 4] && m_reduced_loop_info->loop_end < m_pos)
|
||||
{
|
||||
fmt::throw_exception("LLVM: Reduced Loop Pattern: Exit(1) too early at 0x%x", m_pos);
|
||||
}
|
||||
@ -2667,8 +2697,8 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
ensure(!!m_block->reg[reduced_loop_info->cond_val_register_idx]);
|
||||
make_reduced_loop_condition(block_optimization_inner, true, 2);
|
||||
ensure(!!m_block->reg[m_reduced_loop_info->cond_val_register_idx]);
|
||||
make_reduced_loop_condition(block_optimization_inner, true);
|
||||
m_ir->SetInsertPoint(block_optimization_next);
|
||||
m_block->block_wide_reg_store_elimination = false;
|
||||
|
||||
@ -2763,6 +2793,16 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < s_reg_max; i++)
|
||||
{
|
||||
if (m_reduced_loop_info && m_reduced_loop_info->loop_may_update.test(i))
|
||||
{
|
||||
m_block->reg[i] = m_block->reg_save_and_restore[i];
|
||||
}
|
||||
}
|
||||
|
||||
m_reduced_loop_info = nullptr;
|
||||
|
||||
// Emit instructions
|
||||
for (m_pos = baddr; m_pos >= start && m_pos < end && !m_ir->GetInsertBlock()->getTerminator(); m_pos += 4)
|
||||
{
|
||||
@ -3546,7 +3586,7 @@ public:
|
||||
|
||||
// Create interpreter table
|
||||
const auto if_type = get_ftype<void, u8*, u8*, u32, u32, u8*, u32, u8*>();
|
||||
m_function_table = new GlobalVariable(*m_module, ArrayType::get(m_ir->getPtrTy(), 1ull << m_interp_magn), true, GlobalValue::InternalLinkage, nullptr);
|
||||
m_function_table = new GlobalVariable(*m_module, ArrayType::get(get_type<u8*>(), 1ull << m_interp_magn), true, GlobalValue::InternalLinkage, nullptr);
|
||||
|
||||
init_luts();
|
||||
|
||||
@ -3590,7 +3630,7 @@ public:
|
||||
m_ir->CreateStore(m_ir->CreateCall(get_intrinsic<u64>(Intrinsic::read_register), {rsp_name}), native_sp);
|
||||
|
||||
// Decode (shift) and load function pointer
|
||||
const auto first = m_ir->CreateLoad(m_ir->getPtrTy(), m_ir->CreateGEP(m_ir->getPtrTy(), m_interp_table, m_ir->CreateLShr(m_interp_op, 32u - m_interp_magn)));
|
||||
const auto first = m_ir->CreateLoad(get_type<u8*>(), m_ir->CreateGEP(get_type<u8*>(), m_interp_table, m_ir->CreateLShr(m_interp_op, 32u - m_interp_magn)));
|
||||
const auto call0 = m_ir->CreateCall(if_type, first, {m_lsptr, m_thread, m_interp_pc, m_interp_op, m_interp_table, m_interp_7f0, m_interp_regs});
|
||||
call0->setCallingConv(CallingConv::GHC);
|
||||
m_ir->CreateRetVoid();
|
||||
@ -3734,7 +3774,7 @@ public:
|
||||
const auto next_pc = itype & spu_itype::branch ? m_interp_pc : m_interp_pc_next;
|
||||
const auto be32_op = m_ir->CreateLoad(get_type<u32>(), _ptr(m_lsptr, m_ir->CreateZExt(next_pc, get_type<u64>())));
|
||||
const auto next_op = m_ir->CreateCall(get_intrinsic<u32>(Intrinsic::bswap), {be32_op});
|
||||
const auto next_if = m_ir->CreateLoad(m_ir->getPtrTy(), m_ir->CreateGEP(m_ir->getPtrTy(), m_interp_table, m_ir->CreateLShr(next_op, 32u - m_interp_magn)));
|
||||
const auto next_if = m_ir->CreateLoad(get_type<u8*>(), m_ir->CreateGEP(get_type<u8*>(), m_interp_table, m_ir->CreateLShr(next_op, 32u - m_interp_magn)));
|
||||
llvm::cast<LoadInst>(next_if)->setVolatile(true);
|
||||
|
||||
if (!(itype & spu_itype::branch))
|
||||
@ -3859,7 +3899,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
m_function_table->setInitializer(ConstantArray::get(ArrayType::get(m_ir->getPtrTy(), 1ull << m_interp_magn), iptrs));
|
||||
m_function_table->setInitializer(ConstantArray::get(ArrayType::get(get_type<u8*>(), 1ull << m_interp_magn), iptrs));
|
||||
m_function_table = nullptr;
|
||||
|
||||
for (auto& f : *_module)
|
||||
@ -6978,7 +7018,7 @@ public:
|
||||
|
||||
value_t<f32[4]> clamp_smax(value_t<f32[4]> v, u32 gpr = s_reg_max)
|
||||
{
|
||||
if (m_block && gpr < s_reg_max && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(gpr))
|
||||
if (m_reduced_loop_info && gpr < s_reg_max && m_reduced_loop_info->is_gpr_not_NaN_hint(gpr))
|
||||
{
|
||||
return v;
|
||||
}
|
||||
@ -7129,12 +7169,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if (m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.ra))
|
||||
if (m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra))
|
||||
{
|
||||
safe_finite_compare.set(0);
|
||||
}
|
||||
|
||||
if (m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.rb))
|
||||
if (m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb))
|
||||
{
|
||||
safe_finite_compare.set(1);
|
||||
}
|
||||
@ -7328,8 +7368,8 @@ public:
|
||||
}
|
||||
});
|
||||
|
||||
const u32 a_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.ra) ? 1 : 0;
|
||||
const u32 b_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.rb) ? 1 : 0;
|
||||
const u32 a_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra) ? 1 : 0;
|
||||
const u32 b_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb) ? 1 : 0;
|
||||
|
||||
if (op.ra == op.rb && !m_interp_magn)
|
||||
{
|
||||
@ -7765,8 +7805,8 @@ public:
|
||||
const auto [a, b, c] = get_vrs<f32[4]>(op.ra, op.rb, op.rc);
|
||||
static const auto MT = match<f32[4]>();
|
||||
|
||||
const u32 a_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.ra) ? 1 : 0;
|
||||
const u32 b_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.rb) ? 1 : 0;
|
||||
const u32 a_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra) ? 1 : 0;
|
||||
const u32 b_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb) ? 1 : 0;
|
||||
|
||||
auto check_sqrt_pattern_for_float = [&](f32 float_value) -> bool
|
||||
{
|
||||
|
||||
@ -361,10 +361,12 @@ public:
|
||||
std::bitset<s_reg_max> loop_args;
|
||||
std::bitset<s_reg_max> loop_dicts;
|
||||
std::bitset<s_reg_max> loop_writes;
|
||||
std::bitset<s_reg_max> loop_may_update;
|
||||
std::bitset<s_reg_max> gpr_not_nans;
|
||||
|
||||
struct origin_t
|
||||
{
|
||||
std::bitset<s_reg_max> regs{};
|
||||
std::bitset<s_reg_max + 1> regs{};
|
||||
u32 modified = 0;
|
||||
spu_itype_t mod1_type = spu_itype::UNK;
|
||||
spu_itype_t mod2_type = spu_itype::UNK;
|
||||
@ -680,6 +682,11 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_gpr_not_NaN_hint(u32 i) const noexcept
|
||||
{
|
||||
return ::at32(gpr_not_nans, i);
|
||||
}
|
||||
|
||||
origin_t get_reg(u32 reg_val) noexcept
|
||||
{
|
||||
const auto org = find_reg(reg_val);
|
||||
|
||||
@ -63,7 +63,7 @@ extern const std::map<std::string_view, int> g_prx_list
|
||||
{ "libcelpenc.sprx", 0 },
|
||||
{ "libddpdec.sprx", 0 },
|
||||
{ "libdivxdec.sprx", 0 },
|
||||
{ "libdmux.sprx", 0 },
|
||||
{ "libdmux.sprx", 1 },
|
||||
{ "libdmuxpamf.sprx", 1 },
|
||||
{ "libdtslbrdec.sprx", 0 },
|
||||
{ "libfiber.sprx", 0 },
|
||||
|
||||
@ -1731,24 +1731,34 @@ namespace rsx
|
||||
}
|
||||
case deferred_request_command::cubemap_unwrap:
|
||||
{
|
||||
rsx::simple_array<copy_region_descriptor> sections(6);
|
||||
for (u16 n = 0; n < 6; ++n)
|
||||
rsx::simple_array<copy_region_descriptor> sections(6 * desc.mipmaps);
|
||||
for (u16 n = 0, section_id = 0; n < 6; ++n)
|
||||
{
|
||||
sections[n] =
|
||||
u16 mip_w = desc.width, mip_h = desc.height;
|
||||
u16 y_offset = static_cast<u16>(desc.slice_h * n);
|
||||
|
||||
for (u8 mip = 0; mip < desc.mipmaps; ++mip)
|
||||
{
|
||||
.src = desc.external_handle,
|
||||
.xform = surface_transform::coordinate_transform,
|
||||
.level = 0,
|
||||
.src_x = 0,
|
||||
.src_y = static_cast<u16>(desc.slice_h * n),
|
||||
.dst_x = 0,
|
||||
.dst_y = 0,
|
||||
.dst_z = n,
|
||||
.src_w = desc.width,
|
||||
.src_h = desc.height,
|
||||
.dst_w = desc.width,
|
||||
.dst_h = desc.height
|
||||
};
|
||||
sections[section_id++] =
|
||||
{
|
||||
.src = desc.external_handle,
|
||||
.xform = surface_transform::coordinate_transform,
|
||||
.level = mip,
|
||||
.src_x = 0,
|
||||
.src_y = y_offset,
|
||||
.dst_x = 0,
|
||||
.dst_y = 0,
|
||||
.dst_z = n,
|
||||
.src_w = mip_w,
|
||||
.src_h = mip_h,
|
||||
.dst_w = mip_w,
|
||||
.dst_h = mip_h
|
||||
};
|
||||
|
||||
y_offset += mip_h;
|
||||
mip_w = std::max<u16>(mip_w / 2, 1);
|
||||
mip_h = std::max<u16>(mip_h / 2, 1);
|
||||
}
|
||||
}
|
||||
|
||||
result = generate_cubemap_from_images(cmd, desc.gcm_format, desc.width, sections, desc.remap);
|
||||
|
||||
@ -384,7 +384,21 @@ void GLGSRender::load_texture_env()
|
||||
}
|
||||
}
|
||||
|
||||
m_fs_sampler_states[i].apply(tex, fs_sampler_state[i].get());
|
||||
u32 actual_mipcount = 1;
|
||||
if (sampler_state->upload_context == rsx::texture_upload_context::shader_read)
|
||||
{
|
||||
actual_mipcount = tex.get_exact_mipmap_count();
|
||||
}
|
||||
else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::mipmap_gather)
|
||||
{
|
||||
actual_mipcount = sampler_state->external_subresource_desc.sections_to_copy.size();
|
||||
}
|
||||
else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::cubemap_unwrap)
|
||||
{
|
||||
actual_mipcount = sampler_state->external_subresource_desc.mipmaps;
|
||||
}
|
||||
|
||||
m_fs_sampler_states[i].apply(tex, fs_sampler_state[i].get(), actual_mipcount > 1);
|
||||
|
||||
const auto texture_format = sampler_state->format_ex.format();
|
||||
// Depth format redirected to BGRA8 resample stage. Do not filter to avoid bits leaking.
|
||||
|
||||
@ -586,7 +586,8 @@ namespace gl
|
||||
gl::texture_view* generate_cubemap_from_images(gl::command_context& cmd, u32 gcm_format, u16 size, const rsx::simple_array<copy_region_descriptor>& sources, const rsx::texture_channel_remap_t& remap_vector) override
|
||||
{
|
||||
auto _template = get_template_from_collection_impl(sources);
|
||||
auto result = create_temporary_subresource_impl(cmd, _template, GL_NONE, GL_TEXTURE_CUBE_MAP, gcm_format, 0, 0, size, size, 1, 1, remap_vector, false);
|
||||
const u8 mip_count = 1 + sources.reduce(0, FN(std::max<u8>(x, y.level)));
|
||||
auto result = create_temporary_subresource_impl(cmd, _template, GL_NONE, GL_TEXTURE_CUBE_MAP, gcm_format, 0, 0, size, size, 1, mip_count, remap_vector, false);
|
||||
|
||||
copy_transfer_regions_impl(cmd, result->image(), sources);
|
||||
return result;
|
||||
|
||||
@ -72,7 +72,7 @@ namespace gl
|
||||
}
|
||||
|
||||
// Apply sampler state settings
|
||||
void sampler_state::apply(const rsx::fragment_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image)
|
||||
void sampler_state::apply(const rsx::fragment_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image, bool allow_mipmaps)
|
||||
{
|
||||
set_parameteri(GL_TEXTURE_WRAP_S, wrap_mode(tex.wrap_s()));
|
||||
set_parameteri(GL_TEXTURE_WRAP_T, wrap_mode(tex.wrap_t()));
|
||||
@ -114,8 +114,7 @@ namespace gl
|
||||
}
|
||||
}
|
||||
|
||||
if (sampled_image->upload_context != rsx::texture_upload_context::shader_read ||
|
||||
tex.get_exact_mipmap_count() == 1)
|
||||
if (!allow_mipmaps || tex.get_exact_mipmap_count() == 1)
|
||||
{
|
||||
GLint min_filter = tex_min_filter(tex.min_filter());
|
||||
|
||||
|
||||
@ -75,7 +75,7 @@ namespace gl
|
||||
return (prop == m_propertiesf.end()) ? 0 : prop->second;
|
||||
}
|
||||
|
||||
void apply(const rsx::fragment_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image);
|
||||
void apply(const rsx::fragment_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image, bool allow_mipmaps = true);
|
||||
void apply(const rsx::vertex_texture& tex, const rsx::sampled_image_descriptor_base* sampled_image);
|
||||
|
||||
void apply_defaults(GLenum default_filter = GL_NEAREST);
|
||||
|
||||
@ -14,7 +14,7 @@ namespace rsx
|
||||
namespace nv0039
|
||||
{
|
||||
// Transfer with stride
|
||||
inline void block2d_copy_with_stride(u8* dst, const u8* src, u32 width, u32 height, u32 src_pitch, u32 dst_pitch, u8 src_stride, u8 dst_stride)
|
||||
inline void block2d_copy_with_stride(u8* dst, const u8* src, u32 width, u32 height, s32 src_pitch, s32 dst_pitch, u8 src_stride, u8 dst_stride)
|
||||
{
|
||||
for (u32 row = 0; row < height; ++row)
|
||||
{
|
||||
@ -33,7 +33,7 @@ namespace rsx
|
||||
}
|
||||
}
|
||||
|
||||
inline void block2d_copy(u8* dst, const u8* src, u32 width, u32 height, u32 src_pitch, u32 dst_pitch)
|
||||
inline void block2d_copy(u8* dst, const u8* src, u32 width, u32 height, s32 src_pitch, s32 dst_pitch)
|
||||
{
|
||||
for (u32 i = 0; i < height; ++i)
|
||||
{
|
||||
|
||||
@ -633,9 +633,17 @@ namespace rsx
|
||||
case 2:
|
||||
break;
|
||||
default:
|
||||
rsx_log.error("Unknown render mode %d", mode);
|
||||
{
|
||||
struct logged_t
|
||||
{
|
||||
atomic_t<u8> logged_cause[256]{};
|
||||
};
|
||||
|
||||
const auto& is_error = ::at32(g_fxo->get<logged_t>().logged_cause, mode).try_inc(10);
|
||||
(is_error ? rsx_log.error : rsx_log.trace)("Unknown render mode %d", mode);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const u32 offset = arg & 0xffffff;
|
||||
auto address_ptr = util::get_report_data_impl(ctx, offset);
|
||||
|
||||
@ -471,6 +471,10 @@ void VKGSRender::load_texture_env()
|
||||
// Clamp min and max lod
|
||||
actual_mipmaps = static_cast<f32>(sampler_state->external_subresource_desc.sections_to_copy.size());
|
||||
}
|
||||
else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::cubemap_unwrap)
|
||||
{
|
||||
actual_mipmaps = static_cast<f32>(sampler_state->external_subresource_desc.mipmaps);
|
||||
}
|
||||
else
|
||||
{
|
||||
actual_mipmaps = 1.f;
|
||||
|
||||
@ -2115,7 +2115,7 @@ void VKGSRender::load_program_env()
|
||||
|
||||
if (vk::emulate_conditional_rendering())
|
||||
{
|
||||
const vk::buffer& predicate = m_cond_render_buffer ? *m_cond_render_buffer : *vk::get_scratch_buffer(*m_current_command_buffer, 4);
|
||||
const vk::buffer& predicate = m_cond_render_buffer ? *m_cond_render_buffer : *vk::get_scratch_buffer(*m_current_command_buffer, 4, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_ACCESS_NONE);
|
||||
const u32 offset = cond_render_ctrl.hw_cond_active ? 0 : 4;
|
||||
m_program->bind_uniform({ predicate, offset, 4 }, vk::glsl::binding_set_index_vertex, m_vs_binding_table->cr_pred_buffer_location);
|
||||
}
|
||||
@ -2910,7 +2910,7 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
|
||||
else if (num_hw_queries > 0)
|
||||
{
|
||||
// We'll need to do some result aggregation using a compute shader.
|
||||
auto scratch = vk::get_scratch_buffer(*m_current_command_buffer, num_hw_queries * 4);
|
||||
vk::buffer* scratch = nullptr;
|
||||
|
||||
// Range latching. Because of how the query pool manages allocations using a stack, we get an inverse sequential set of handles/indices that we can easily group together.
|
||||
// This drastically boosts performance on some drivers like the NVIDIA proprietary one that seems to have a rather high cost for every individual query transer command.
|
||||
@ -2918,6 +2918,11 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
|
||||
|
||||
auto copy_query_range_impl = [&]()
|
||||
{
|
||||
if (!scratch)
|
||||
{
|
||||
scratch = vk::get_scratch_buffer(*m_current_command_buffer, num_hw_queries * 4, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
}
|
||||
|
||||
const auto count = (query_range.last - query_range.first + 1);
|
||||
m_occlusion_query_manager->get_query_result_indirect(*m_current_command_buffer, query_range.first, count, scratch->value, dst_offset);
|
||||
dst_offset += count * 4;
|
||||
@ -2964,7 +2969,7 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
|
||||
}
|
||||
|
||||
// Sanity check
|
||||
ensure(dst_offset <= scratch->size());
|
||||
ensure(scratch && dst_offset <= scratch->size());
|
||||
|
||||
if (!partial_eval)
|
||||
{
|
||||
|
||||
@ -71,10 +71,11 @@ namespace vk
|
||||
|
||||
enum image_upload_options
|
||||
{
|
||||
upload_contents_async = 1,
|
||||
initialize_image_layout = 2,
|
||||
preserve_image_layout = 4,
|
||||
source_is_gpu_resident = 8,
|
||||
upload_contents_async = 0x0001,
|
||||
initialize_image_layout = 0x0002,
|
||||
preserve_image_layout = 0x0004,
|
||||
source_is_gpu_resident = 0x0008,
|
||||
source_is_userptr = 0x0010,
|
||||
|
||||
// meta-flags
|
||||
upload_contents_inline = 0,
|
||||
|
||||
@ -724,6 +724,7 @@ namespace vk
|
||||
subres.height_in_block
|
||||
);
|
||||
subres.data = std::span(ext_data);
|
||||
upload_flags |= source_is_userptr;
|
||||
#else
|
||||
const auto [scratch_buf, linear_data_scratch_offset] = vk::detile_memory_block(cmd, tiled_region, range, subres.width_in_block, subres.height_in_block, get_bpp());
|
||||
|
||||
|
||||
@ -598,7 +598,7 @@ namespace vk
|
||||
const auto transfer_size = surface->get_memory_range().length();
|
||||
if (transfer_size > max_copy_length || src_offset_in_buffer || surface->is_depth_surface())
|
||||
{
|
||||
auto scratch = vk::get_scratch_buffer(cmd, transfer_size * 4);
|
||||
auto scratch = vk::get_scratch_buffer(cmd, transfer_size * 4, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
dest = scratch;
|
||||
}
|
||||
|
||||
|
||||
@ -376,7 +376,7 @@ namespace vk
|
||||
const auto min_scratch_size = calculate_working_buffer_size(src_length, src->aspect() | dst->aspect());
|
||||
|
||||
// Initialize scratch memory
|
||||
auto scratch_buf = vk::get_scratch_buffer(cmd, min_scratch_size);
|
||||
auto scratch_buf = vk::get_scratch_buffer(cmd, min_scratch_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
|
||||
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
|
||||
{
|
||||
@ -601,7 +601,7 @@ namespace vk
|
||||
const auto dst_w = dst_rect.width();
|
||||
const auto dst_h = dst_rect.height();
|
||||
|
||||
auto scratch_buf = vk::get_scratch_buffer(cmd, std::max(src_w, dst_w) * std::max(src_h, dst_h) * 4);
|
||||
auto scratch_buf = vk::get_scratch_buffer(cmd, std::max(src_w, dst_w) * std::max(src_h, dst_h) * 4, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
|
||||
//1. Copy unscaled to typeless surface
|
||||
VkBufferImageCopy info{};
|
||||
@ -990,7 +990,7 @@ namespace vk
|
||||
auto pdev = vk::get_current_renderer();
|
||||
rsx::texture_uploader_capabilities caps{ .supports_dxt = pdev->get_texture_compression_bc_support(), .alignment = heap_align };
|
||||
rsx::texture_memory_info opt{};
|
||||
bool check_caps = true;
|
||||
bool check_hw_caps = !(image_setup_flags & source_is_userptr);
|
||||
|
||||
vk::buffer* scratch_buf = nullptr;
|
||||
u32 scratch_offset = 0;
|
||||
@ -1015,13 +1015,13 @@ namespace vk
|
||||
image_linear_size = row_pitch * layout.depth * (rsx::is_compressed_host_format(caps, format) ? layout.height_in_block : layout.height_in_texel);
|
||||
|
||||
// Only do GPU-side conversion if occupancy is good
|
||||
if (check_caps)
|
||||
if (check_hw_caps)
|
||||
{
|
||||
caps.supports_byteswap = (image_linear_size >= 1024) || (image_setup_flags & source_is_gpu_resident);
|
||||
caps.supports_hw_deswizzle = caps.supports_byteswap;
|
||||
caps.supports_zero_copy = caps.supports_byteswap;
|
||||
caps.supports_vtc_decoding = false;
|
||||
check_caps = false;
|
||||
check_hw_caps = false;
|
||||
}
|
||||
|
||||
auto buf_allocator = [&](usz) -> std::tuple<void*, usz>
|
||||
@ -1124,7 +1124,7 @@ namespace vk
|
||||
scratch_buf_size += (image_linear_size * 5) / 4;
|
||||
}
|
||||
|
||||
scratch_buf = vk::get_scratch_buffer(cmd2, scratch_buf_size);
|
||||
scratch_buf = vk::get_scratch_buffer(cmd2, scratch_buf_size, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
buffer_copies.reserve(subresource_layout.size());
|
||||
}
|
||||
|
||||
@ -1183,13 +1183,6 @@ namespace vk
|
||||
{
|
||||
ensure(scratch_buf);
|
||||
|
||||
// WAW hazard - complete previous work before executing any transfers
|
||||
insert_buffer_memory_barrier(
|
||||
cmd2, scratch_buf->value, 0, scratch_offset,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
|
||||
if (upload_commands.size() > 1)
|
||||
{
|
||||
auto range_ptr = buffer_copies.data();
|
||||
@ -1199,8 +1192,9 @@ namespace vk
|
||||
range_ptr += op.second;
|
||||
}
|
||||
}
|
||||
else if (!buffer_copies.empty())
|
||||
else
|
||||
{
|
||||
ensure(!buffer_copies.empty());
|
||||
vkCmdCopyBuffer(cmd2, upload_buffer->value, scratch_buf->value, static_cast<u32>(buffer_copies.size()), buffer_copies.data());
|
||||
}
|
||||
|
||||
@ -1279,7 +1273,10 @@ namespace vk
|
||||
vk::load_dma(range.start, section_length);
|
||||
|
||||
// Allocate scratch and prepare for the GPU job
|
||||
const auto scratch_buf = vk::get_scratch_buffer(cmd, section_length * 3); // 0 = linear data, 1 = padding (deswz), 2 = tiled data
|
||||
const auto scratch_buf = vk::get_scratch_buffer(cmd, section_length * 3, // 0 = linear data, 1 = padding (deswz), 2 = tiled data
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT);
|
||||
|
||||
const auto tiled_data_scratch_offset = section_length * 2;
|
||||
const auto linear_data_scratch_offset = 0u;
|
||||
|
||||
@ -1313,16 +1310,16 @@ namespace vk
|
||||
};
|
||||
vkCmdCopyBuffer(cmd, dma_mapping.second->value, scratch_buf->value, 1, ©_rgn);
|
||||
|
||||
// Barrier
|
||||
// Post-Transfer barrier
|
||||
vk::insert_buffer_memory_barrier(
|
||||
cmd, scratch_buf->value, linear_data_scratch_offset, section_length,
|
||||
cmd, scratch_buf->value, tiled_data_scratch_offset, section_length,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
// Detile
|
||||
vk::get_compute_task<vk::cs_tile_memcpy<RSX_detiler_op::decode>>()->run(cmd, config);
|
||||
|
||||
// Barrier
|
||||
// Post-Compute barrier
|
||||
vk::insert_buffer_memory_barrier(
|
||||
cmd, scratch_buf->value, linear_data_scratch_offset, static_cast<u32>(width) * height * bpp,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
|
||||
@ -100,7 +100,7 @@ namespace vk
|
||||
auto dma_sync_region = valid_range;
|
||||
dma_mapping_handle dma_mapping = { 0, nullptr };
|
||||
|
||||
auto dma_sync = [&dma_sync_region, &dma_mapping](bool load, bool force = false)
|
||||
auto dma_sync = [&](bool load, bool force = false)
|
||||
{
|
||||
if (dma_mapping.second && !force)
|
||||
{
|
||||
@ -130,9 +130,10 @@ namespace vk
|
||||
dma_sync_region = tiled_region.tile_align(dma_sync_region);
|
||||
}
|
||||
#endif
|
||||
|
||||
auto working_buffer = vk::get_scratch_buffer(cmd, working_buffer_length);
|
||||
u32 result_offset = 0;
|
||||
auto working_buffer = vk::get_scratch_buffer(cmd, working_buffer_length,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT);
|
||||
|
||||
VkBufferImageCopy region = {};
|
||||
region.imageSubresource = { src->aspect(), 0, 0, 1 };
|
||||
@ -220,7 +221,7 @@ namespace vk
|
||||
// Transfer -> Compute barrier
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, dst_offset, dma_sync_region.length(),
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT);
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT);
|
||||
}
|
||||
|
||||
// Prepare payload
|
||||
@ -284,8 +285,10 @@ namespace vk
|
||||
if (require_rw_barrier)
|
||||
{
|
||||
vk::insert_buffer_memory_barrier(cmd, working_buffer->value, result_offset, dma_sync_region.length(),
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT);
|
||||
}
|
||||
|
||||
if (rsx_pitch == real_pitch) [[likely]]
|
||||
@ -332,6 +335,14 @@ namespace vk
|
||||
vkCmdCopyImageToBuffer(cmd, src->value, src->current_layout, dma_mapping.second->value, 1, ®ion);
|
||||
}
|
||||
|
||||
// Post-transfer barrier on dma layer
|
||||
vk::insert_buffer_memory_barrier(
|
||||
cmd, dma_mapping.second->value,
|
||||
dma_mapping.first, dma_sync_region.length(),
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT
|
||||
);
|
||||
|
||||
src->pop_layout(cmd);
|
||||
|
||||
VkBufferMemoryBarrier2KHR mem_barrier =
|
||||
@ -761,8 +772,9 @@ namespace vk
|
||||
const rsx::simple_array<copy_region_descriptor>& sections_to_copy, const rsx::texture_channel_remap_t& remap_vector)
|
||||
{
|
||||
auto _template = get_template_from_collection_impl(sections_to_copy);
|
||||
const u8 mip_count = 1 + sections_to_copy.reduce(0, FN(std::max<u8>(x, y.level)));
|
||||
auto result = create_temporary_subresource_view_impl(cmd, _template, VK_IMAGE_TYPE_2D,
|
||||
VK_IMAGE_VIEW_TYPE_CUBE, gcm_format, 0, 0, size, size, 1, 1, remap_vector, false);
|
||||
VK_IMAGE_VIEW_TYPE_CUBE, gcm_format, 0, 0, size, size, 1, mip_count, remap_vector, false);
|
||||
|
||||
if (!result)
|
||||
{
|
||||
@ -772,7 +784,7 @@ namespace vk
|
||||
|
||||
const auto image = result->image();
|
||||
VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format);
|
||||
VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 6 };
|
||||
VkImageSubresourceRange dst_range = { dst_aspect, 0, mip_count, 0, 6 };
|
||||
vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range);
|
||||
|
||||
if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT))
|
||||
@ -786,6 +798,14 @@ namespace vk
|
||||
vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range);
|
||||
}
|
||||
|
||||
vk::insert_image_memory_barrier(
|
||||
cmd,
|
||||
image->handle(),
|
||||
image->current_layout, image->current_layout,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
dst_range);
|
||||
|
||||
copy_transfer_regions_impl(cmd, image, sections_to_copy);
|
||||
|
||||
vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
|
||||
@ -821,6 +841,14 @@ namespace vk
|
||||
vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range);
|
||||
}
|
||||
|
||||
vk::insert_image_memory_barrier(
|
||||
cmd,
|
||||
image->handle(),
|
||||
image->current_layout, image->current_layout,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
dst_range);
|
||||
|
||||
copy_transfer_regions_impl(cmd, image, sections_to_copy);
|
||||
|
||||
vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
|
||||
@ -859,6 +887,14 @@ namespace vk
|
||||
}
|
||||
}
|
||||
|
||||
vk::insert_image_memory_barrier(
|
||||
cmd,
|
||||
image->handle(),
|
||||
image->current_layout, image->current_layout,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
dst_range);
|
||||
|
||||
copy_transfer_regions_impl(cmd, image, sections_to_copy);
|
||||
|
||||
vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
|
||||
@ -895,6 +931,14 @@ namespace vk
|
||||
vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range);
|
||||
}
|
||||
|
||||
vk::insert_image_memory_barrier(
|
||||
cmd,
|
||||
image->handle(),
|
||||
image->current_layout, image->current_layout,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
dst_range);
|
||||
|
||||
copy_transfer_regions_impl(cmd, image, sections_to_copy);
|
||||
|
||||
vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
|
||||
@ -1015,6 +1059,14 @@ namespace vk
|
||||
VkClearDepthStencilValue clear{ 1.f, 255 };
|
||||
vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &range);
|
||||
}
|
||||
|
||||
vk::insert_image_memory_barrier(
|
||||
cmd,
|
||||
image->handle(),
|
||||
image->current_layout, image->current_layout,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -177,7 +177,7 @@ namespace vk
|
||||
return { scratch_buffer.get(), is_new };
|
||||
}
|
||||
|
||||
vk::buffer* get_scratch_buffer(const vk::command_buffer& cmd, u64 min_required_size, bool zero_memory)
|
||||
vk::buffer* get_scratch_buffer(const vk::command_buffer& cmd, u64 min_required_size, VkPipelineStageFlags dst_stage_flags, VkAccessFlags dst_access, bool zero_memory)
|
||||
{
|
||||
const auto [buf, init_mem] = get_scratch_buffer(cmd.get_queue_family(), min_required_size);
|
||||
|
||||
@ -191,6 +191,12 @@ namespace vk
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
}
|
||||
else if (dst_access != VK_ACCESS_NONE)
|
||||
{
|
||||
insert_buffer_memory_barrier(cmd, buf->value, 0, min_required_size,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_flags,
|
||||
VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, dst_access);
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
@ -6,7 +6,13 @@ namespace vk
|
||||
VkSampler null_sampler();
|
||||
image_view* null_image_view(const command_buffer& cmd, VkImageViewType type);
|
||||
image* get_typeless_helper(VkFormat format, rsx::format_class format_class, u32 requested_width, u32 requested_height);
|
||||
buffer* get_scratch_buffer(const command_buffer& cmd, u64 min_required_size, bool zero_memory = false);
|
||||
|
||||
buffer* get_scratch_buffer(
|
||||
const command_buffer& cmd,
|
||||
u64 min_required_size,
|
||||
VkPipelineStageFlags dst_stage_flags,
|
||||
VkAccessFlags dst_access,
|
||||
bool zero_memory = false);
|
||||
|
||||
void clear_scratch_resources();
|
||||
}
|
||||
|
||||
@ -38,6 +38,7 @@ game_list_table::game_list_table(game_list_frame* frame, std::shared_ptr<persist
|
||||
horizontalHeader()->setStretchLastSection(true);
|
||||
horizontalHeader()->setDefaultSectionSize(150);
|
||||
horizontalHeader()->setDefaultAlignment(Qt::AlignLeft);
|
||||
horizontalHeader()->setSectionsMovable(true);
|
||||
setContextMenuPolicy(Qt::CustomContextMenu);
|
||||
setAlternatingRowColors(true);
|
||||
setColumnCount(static_cast<int>(gui::game_list_columns::count));
|
||||
@ -72,6 +73,8 @@ void game_list_table::restore_layout(const QByteArray& state)
|
||||
// Nothing to do
|
||||
}
|
||||
|
||||
// Re-apply after restoreState() since it resets setSectionsMovable to false
|
||||
horizontalHeader()->setSectionsMovable(true);
|
||||
// Make sure no columns are squished
|
||||
fix_narrow_columns();
|
||||
|
||||
|
||||
@ -253,7 +253,11 @@ namespace utils
|
||||
|
||||
#ifdef __APPLE__
|
||||
#ifdef ARCH_ARM64
|
||||
auto ptr = ::mmap(use_addr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE | MAP_JIT | c_map_noreserve, -1, 0);
|
||||
// Memory mapping regions will be replaced by file-backed MAP_FIXED mappings
|
||||
// (via shm::map), which is incompatible with MAP_JIT. Only use MAP_JIT for
|
||||
// non-mapping regions that need JIT executable support.
|
||||
const int jit_flag = is_memory_mapping ? 0 : MAP_JIT;
|
||||
auto ptr = ::mmap(use_addr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE | jit_flag | c_map_noreserve, -1, 0);
|
||||
#else
|
||||
auto ptr = ::mmap(use_addr, size, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_JIT | c_map_noreserve, -1, 0);
|
||||
#endif
|
||||
|
||||
Loading…
Reference in New Issue
Block a user