mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-06-01 12:15:27 -06:00
- Make the memory allocator a unique child of the render device. Fixes object lifetime issues with swapchain management due to cyclic dependencies
433 lines
11 KiB
C++
433 lines
11 KiB
C++
#pragma once
|
|
#include "Emu/RSX/GSRender.h"
|
|
#include "VKHelpers.h"
|
|
#include "VKTextureCache.h"
|
|
#include "VKRenderTargets.h"
|
|
#include "VKFormats.h"
|
|
#include "VKTextOut.h"
|
|
#include "VKOverlays.h"
|
|
#include "restore_new.h"
|
|
#include "define_new_memleakdetect.h"
|
|
#include "VKProgramBuffer.h"
|
|
#include "../GCM.h"
|
|
#include "../rsx_utils.h"
|
|
#include <thread>
|
|
#include <atomic>
|
|
|
|
namespace vk
|
|
{
|
|
using vertex_cache = rsx::vertex_cache::default_vertex_cache<rsx::vertex_cache::uploaded_range<VkFormat>, VkFormat>;
|
|
using weak_vertex_cache = rsx::vertex_cache::weak_vertex_cache<VkFormat>;
|
|
using null_vertex_cache = vertex_cache;
|
|
|
|
using shader_cache = rsx::shaders_cache<vk::pipeline_props, VKProgramBuffer>;
|
|
|
|
struct vertex_upload_info
|
|
{
|
|
VkPrimitiveTopology primitive;
|
|
u32 vertex_draw_count;
|
|
u32 allocated_vertex_count;
|
|
u32 vertex_index_base;
|
|
u32 persistent_window_offset;
|
|
u32 volatile_window_offset;
|
|
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
|
|
};
|
|
}
|
|
|
|
//Heap allocation sizes in MB
|
|
//NOTE: Texture uploads can be huge, up to 16MB for a single texture (4096x4096px)
|
|
#define VK_ATTRIB_RING_BUFFER_SIZE_M 384
|
|
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256
|
|
#define VK_UBO_RING_BUFFER_SIZE_M 64
|
|
#define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 64
|
|
#define VK_INDEX_RING_BUFFER_SIZE_M 64
|
|
|
|
#define VK_MAX_ASYNC_CB_COUNT 64
|
|
#define VK_MAX_ASYNC_FRAMES 2
|
|
|
|
extern u64 get_system_time();
|
|
|
|
struct command_buffer_chunk: public vk::command_buffer
|
|
{
|
|
VkFence submit_fence = VK_NULL_HANDLE;
|
|
VkDevice m_device = VK_NULL_HANDLE;
|
|
|
|
std::atomic_bool pending = { false };
|
|
std::atomic<u64> last_sync = { 0 };
|
|
shared_mutex guard_mutex;
|
|
|
|
command_buffer_chunk()
|
|
{}
|
|
|
|
void init_fence(VkDevice dev)
|
|
{
|
|
m_device = dev;
|
|
|
|
VkFenceCreateInfo info = {};
|
|
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
|
CHECK_RESULT(vkCreateFence(m_device, &info, nullptr, &submit_fence));
|
|
}
|
|
|
|
void destroy()
|
|
{
|
|
vk::command_buffer::destroy();
|
|
|
|
if (submit_fence != VK_NULL_HANDLE)
|
|
vkDestroyFence(m_device, submit_fence, nullptr);
|
|
}
|
|
|
|
void tag()
|
|
{
|
|
last_sync = get_system_time();
|
|
}
|
|
|
|
void reset()
|
|
{
|
|
if (pending)
|
|
poke();
|
|
|
|
if (pending)
|
|
wait();
|
|
|
|
CHECK_RESULT(vkResetCommandBuffer(commands, 0));
|
|
}
|
|
|
|
bool poke()
|
|
{
|
|
reader_lock lock(guard_mutex);
|
|
|
|
if (vkGetFenceStatus(m_device, submit_fence) == VK_SUCCESS)
|
|
{
|
|
lock.upgrade();
|
|
|
|
if (pending)
|
|
{
|
|
pending = false;
|
|
vk::reset_fence(&submit_fence);
|
|
}
|
|
}
|
|
|
|
return !pending;
|
|
}
|
|
|
|
void wait()
|
|
{
|
|
reader_lock lock(guard_mutex);
|
|
|
|
if (!pending)
|
|
return;
|
|
|
|
switch(vkGetFenceStatus(m_device, submit_fence))
|
|
{
|
|
case VK_SUCCESS:
|
|
break;
|
|
case VK_NOT_READY:
|
|
CHECK_RESULT(vkWaitForFences(m_device, 1, &submit_fence, VK_TRUE, UINT64_MAX));
|
|
break;
|
|
}
|
|
|
|
lock.upgrade();
|
|
|
|
if (pending)
|
|
{
|
|
vk::reset_fence(&submit_fence);
|
|
pending = false;
|
|
}
|
|
}
|
|
};
|
|
|
|
struct occlusion_data
|
|
{
|
|
std::vector<u32> indices;
|
|
command_buffer_chunk* command_buffer_to_wait = nullptr;
|
|
};
|
|
|
|
struct frame_context_t
|
|
{
|
|
VkSemaphore present_semaphore = VK_NULL_HANDLE;
|
|
VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
|
|
vk::descriptor_pool descriptor_pool;
|
|
u32 used_descriptors = 0;
|
|
|
|
std::vector<std::unique_ptr<vk::buffer_view>> buffer_views_to_clean;
|
|
std::vector<std::unique_ptr<vk::sampler>> samplers_to_clean;
|
|
|
|
u32 present_image = UINT32_MAX;
|
|
command_buffer_chunk* swap_command_buffer = nullptr;
|
|
|
|
//Heap pointers
|
|
s64 attrib_heap_ptr = 0;
|
|
s64 ubo_heap_ptr = 0;
|
|
s64 vtxconst_heap_ptr = 0;
|
|
s64 index_heap_ptr = 0;
|
|
s64 texture_upload_heap_ptr = 0;
|
|
|
|
u64 last_frame_sync_time = 0;
|
|
|
|
//Copy shareable information
|
|
void grab_resources(frame_context_t &other)
|
|
{
|
|
present_semaphore = other.present_semaphore;
|
|
descriptor_set = other.descriptor_set;
|
|
descriptor_pool = other.descriptor_pool;
|
|
used_descriptors = other.used_descriptors;
|
|
|
|
attrib_heap_ptr = other.attrib_heap_ptr;
|
|
ubo_heap_ptr = other.attrib_heap_ptr;
|
|
vtxconst_heap_ptr = other.vtxconst_heap_ptr;
|
|
index_heap_ptr = other.attrib_heap_ptr;
|
|
texture_upload_heap_ptr = other.texture_upload_heap_ptr;
|
|
}
|
|
|
|
//Exchange storage (non-copyable)
|
|
void swap_storage(frame_context_t &other)
|
|
{
|
|
std::swap(buffer_views_to_clean, other.buffer_views_to_clean);
|
|
std::swap(samplers_to_clean, other.samplers_to_clean);
|
|
}
|
|
|
|
void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 vtxconst_loc, s64 index_loc, s64 texture_loc)
|
|
{
|
|
attrib_heap_ptr = attrib_loc;
|
|
ubo_heap_ptr = ubo_loc;
|
|
vtxconst_heap_ptr = vtxconst_loc;
|
|
index_heap_ptr = index_loc;
|
|
texture_upload_heap_ptr = texture_loc;
|
|
|
|
last_frame_sync_time = get_system_time();
|
|
}
|
|
|
|
void reset_heap_ptrs()
|
|
{
|
|
last_frame_sync_time = 0;
|
|
}
|
|
};
|
|
|
|
struct flush_request_task
|
|
{
|
|
atomic_t<bool> pending_state{ false }; //Flush request status; true if rsx::thread is yet to service this request
|
|
atomic_t<int> num_waiters{ 0 }; //Number of threads waiting for this request to be serviced
|
|
bool hard_sync = false;
|
|
|
|
flush_request_task(){}
|
|
|
|
void post(bool _hard_sync)
|
|
{
|
|
hard_sync = (hard_sync || _hard_sync);
|
|
pending_state = true;
|
|
num_waiters++;
|
|
}
|
|
|
|
void remove_one()
|
|
{
|
|
num_waiters--;
|
|
}
|
|
|
|
void clear_pending_flag()
|
|
{
|
|
hard_sync = false;
|
|
pending_state.store(false);
|
|
}
|
|
|
|
bool pending() const
|
|
{
|
|
return pending_state.load();
|
|
}
|
|
|
|
void consumer_wait() const
|
|
{
|
|
while (num_waiters.load() != 0)
|
|
{
|
|
_mm_lfence();
|
|
_mm_pause();
|
|
}
|
|
}
|
|
|
|
void producer_wait() const
|
|
{
|
|
while (pending_state.load())
|
|
{
|
|
_mm_lfence();
|
|
std::this_thread::yield();
|
|
}
|
|
}
|
|
};
|
|
|
|
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
|
|
{
|
|
private:
|
|
VKFragmentProgram m_fragment_prog;
|
|
VKVertexProgram m_vertex_prog;
|
|
vk::glsl::program *m_program;
|
|
|
|
vk::texture_cache m_texture_cache;
|
|
rsx::vk_render_targets m_rtts;
|
|
|
|
std::unique_ptr<vk::buffer> null_buffer;
|
|
std::unique_ptr<vk::buffer_view> null_buffer_view;
|
|
|
|
std::unique_ptr<vk::text_writer> m_text_writer;
|
|
std::unique_ptr<vk::depth_convert_pass> m_depth_converter;
|
|
std::unique_ptr<vk::ui_overlay_renderer> m_ui_renderer;
|
|
std::unique_ptr<vk::attachment_clear_pass> m_attachment_clear_pass;
|
|
|
|
shared_mutex m_sampler_mutex;
|
|
u64 surface_store_tag = 0;
|
|
std::atomic_bool m_samplers_dirty = { true };
|
|
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count> fs_sampler_state = {};
|
|
std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count> vs_sampler_state = {};
|
|
std::array<std::unique_ptr<vk::sampler>, rsx::limits::fragment_textures_count> fs_sampler_handles;
|
|
std::array<std::unique_ptr<vk::sampler>, rsx::limits::vertex_textures_count> vs_sampler_handles;
|
|
|
|
std::unique_ptr<vk::buffer_view> m_persistent_attribute_storage;
|
|
std::unique_ptr<vk::buffer_view> m_volatile_attribute_storage;
|
|
|
|
public:
|
|
//vk::fbo draw_fbo;
|
|
std::unique_ptr<vk::vertex_cache> m_vertex_cache;
|
|
std::unique_ptr<vk::shader_cache> m_shaders_cache;
|
|
|
|
private:
|
|
std::unique_ptr<VKProgramBuffer> m_prog_buffer;
|
|
|
|
std::unique_ptr<vk::swapchain_base> m_swapchain;
|
|
vk::context m_thread_context;
|
|
vk::render_device *m_device;
|
|
|
|
//Vulkan internals
|
|
vk::command_pool m_command_buffer_pool;
|
|
vk::occlusion_query_pool m_occlusion_query_pool;
|
|
bool m_occlusion_query_active = false;
|
|
rsx::reports::occlusion_query_info *m_active_query_info = nullptr;
|
|
std::unordered_map<u32, occlusion_data> m_occlusion_map;
|
|
|
|
shared_mutex m_secondary_cb_guard;
|
|
vk::command_pool m_secondary_command_buffer_pool;
|
|
vk::command_buffer m_secondary_command_buffer; //command buffer used for setup operations
|
|
|
|
u32 m_current_cb_index = 0;
|
|
std::array<command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
|
|
command_buffer_chunk* m_current_command_buffer = nullptr;
|
|
|
|
std::array<VkRenderPass, 120> m_render_passes;
|
|
|
|
VkDescriptorSetLayout descriptor_layouts;
|
|
VkPipelineLayout pipeline_layout;
|
|
|
|
std::unique_ptr<vk::framebuffer_holder> m_draw_fbo;
|
|
|
|
bool present_surface_dirty_flag = false;
|
|
bool renderer_unavailable = false;
|
|
|
|
u64 m_last_heap_sync_time = 0;
|
|
vk::vk_data_heap m_attrib_ring_info;
|
|
vk::vk_data_heap m_uniform_buffer_ring_info;
|
|
vk::vk_data_heap m_transform_constants_ring_info;
|
|
vk::vk_data_heap m_index_buffer_ring_info;
|
|
vk::vk_data_heap m_texture_upload_buffer_ring_info;
|
|
|
|
VkDescriptorBufferInfo m_vertex_state_buffer_info;
|
|
VkDescriptorBufferInfo m_vertex_constants_buffer_info;
|
|
VkDescriptorBufferInfo m_fragment_state_buffer_info;
|
|
|
|
std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
|
|
//Temp frame context to use if the real frame queue is overburdened. Only used for storage
|
|
frame_context_t m_aux_frame_context;
|
|
|
|
//framebuffers are shared between frame contexts
|
|
std::list<std::unique_ptr<vk::framebuffer_holder>> m_framebuffers_to_clean;
|
|
|
|
u32 m_current_queue_index = 0;
|
|
frame_context_t* m_current_frame = nullptr;
|
|
|
|
u32 m_client_width = 0;
|
|
u32 m_client_height = 0;
|
|
|
|
// Draw call stats
|
|
u32 m_draw_calls = 0;
|
|
|
|
// Timers
|
|
s64 m_setup_time = 0;
|
|
s64 m_vertex_upload_time = 0;
|
|
s64 m_textures_upload_time = 0;
|
|
s64 m_draw_time = 0;
|
|
s64 m_flip_time = 0;
|
|
|
|
u8 m_draw_buffers_count = 0;
|
|
bool m_flush_draw_buffers = false;
|
|
|
|
shared_mutex m_flush_queue_mutex;
|
|
flush_request_task m_flush_requests;
|
|
|
|
std::thread::id rsx_thread;
|
|
std::atomic<u64> m_last_sync_event = { 0 };
|
|
|
|
bool render_pass_open = false;
|
|
size_t m_current_renderpass_id = 0;
|
|
|
|
//Vertex layout
|
|
rsx::vertex_input_layout m_vertex_layout;
|
|
|
|
#if !defined(_WIN32) && defined(HAVE_VULKAN)
|
|
Display *m_display_handle = nullptr;
|
|
#endif
|
|
|
|
public:
|
|
VKGSRender();
|
|
~VKGSRender();
|
|
|
|
private:
|
|
void clear_surface(u32 mask);
|
|
void close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
|
void open_command_buffer();
|
|
void sync_at_semaphore_release();
|
|
void prepare_rtts(rsx::framebuffer_creation_context context);
|
|
void copy_render_targets_to_dma_location();
|
|
|
|
void flush_command_queue(bool hard_sync = false);
|
|
void queue_swap_request();
|
|
void process_swap_request(frame_context_t *ctx, bool free_resources = false);
|
|
void advance_queued_frames();
|
|
void present(frame_context_t *ctx);
|
|
void reinitialize_swapchain();
|
|
|
|
void begin_render_pass();
|
|
void close_render_pass();
|
|
|
|
void update_draw_state();
|
|
|
|
void check_heap_status();
|
|
|
|
vk::vertex_upload_info upload_vertex_data();
|
|
|
|
public:
|
|
bool check_program_status();
|
|
void load_program(const vk::vertex_upload_info& vertex_info);
|
|
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
|
|
void read_buffers();
|
|
void write_buffers();
|
|
void set_viewport();
|
|
|
|
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
|
void end_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
|
bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override;
|
|
void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override;
|
|
void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override;
|
|
|
|
protected:
|
|
void begin() override;
|
|
void end() override;
|
|
|
|
void on_init_thread() override;
|
|
void on_exit() override;
|
|
bool do_method(u32 id, u32 arg) override;
|
|
void flip(int buffer) override;
|
|
|
|
void do_local_task(rsx::FIFO_state state) override;
|
|
bool scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate) override;
|
|
void notify_tile_unbound(u32 tile) override;
|
|
|
|
bool on_access_violation(u32 address, bool is_writing) override;
|
|
void on_invalidate_memory_range(u32 address_base, u32 size) override;
|
|
};
|