Merge pull request #4443 from ameerj/vk-async-shaders
vulkan_renderer: Async shader/graphics pipeline compilation
This commit is contained in:
		
						commit
						cbaf1bc711
					
				@ -177,15 +177,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (device.UseAsynchronousShaders()) {
 | 
			
		||||
        // Max worker threads we should allow
 | 
			
		||||
        constexpr u32 MAX_THREADS = 4;
 | 
			
		||||
        // Deduce how many threads we can use
 | 
			
		||||
        const u32 threads_used = std::thread::hardware_concurrency() / 4;
 | 
			
		||||
        // Always allow at least 1 thread regardless of our settings
 | 
			
		||||
        const auto max_worker_count = std::max(1U, threads_used);
 | 
			
		||||
        // Don't use more than MAX_THREADS
 | 
			
		||||
        const auto worker_count = std::min(max_worker_count, MAX_THREADS);
 | 
			
		||||
        async_shaders.AllocateWorkers(worker_count);
 | 
			
		||||
        async_shaders.AllocateWorkers();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -382,6 +382,8 @@ bool VKDevice::Create() {
 | 
			
		||||
 | 
			
		||||
    graphics_queue = logical.GetQueue(graphics_family);
 | 
			
		||||
    present_queue = logical.GetQueue(present_family);
 | 
			
		||||
 | 
			
		||||
    use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -202,6 +202,11 @@ public:
 | 
			
		||||
        return reported_extensions;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Returns true if the setting for async shader compilation is enabled.
 | 
			
		||||
    bool UseAsynchronousShaders() const {
 | 
			
		||||
        return use_asynchronous_shaders;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Checks if the physical device is suitable.
 | 
			
		||||
    static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
 | 
			
		||||
 | 
			
		||||
@ -252,6 +257,9 @@ private:
 | 
			
		||||
    bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state.
 | 
			
		||||
    bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config.
 | 
			
		||||
 | 
			
		||||
    // Asynchronous Graphics Pipeline setting
 | 
			
		||||
    bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
 | 
			
		||||
 | 
			
		||||
    // Telemetry parameters
 | 
			
		||||
    std::string vendor_name;                      ///< Device's driver name.
 | 
			
		||||
    std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
 | 
			
		||||
 | 
			
		||||
@ -29,7 +29,7 @@ void InnerFence::Queue() {
 | 
			
		||||
    }
 | 
			
		||||
    ASSERT(!event);
 | 
			
		||||
 | 
			
		||||
    event = device.GetLogical().CreateEvent();
 | 
			
		||||
    event = device.GetLogical().CreateNewEvent();
 | 
			
		||||
    ticks = scheduler.Ticks();
 | 
			
		||||
 | 
			
		||||
    scheduler.RequestOutsideRenderPassOperationContext();
 | 
			
		||||
 | 
			
		||||
@ -78,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
 | 
			
		||||
                                       const GraphicsPipelineCacheKey& key,
 | 
			
		||||
                                       vk::Span<VkDescriptorSetLayoutBinding> bindings,
 | 
			
		||||
                                       const SPIRVProgram& program)
 | 
			
		||||
    : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
 | 
			
		||||
    : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()},
 | 
			
		||||
      descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
 | 
			
		||||
      descriptor_allocator{descriptor_pool, *descriptor_set_layout},
 | 
			
		||||
      update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
 | 
			
		||||
      descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
 | 
			
		||||
                                                                        program)},
 | 
			
		||||
      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
 | 
			
		||||
                                                                             key.renderpass_params,
 | 
			
		||||
                                                                             program)} {}
 | 
			
		||||
      renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)},
 | 
			
		||||
      pipeline{CreatePipeline(cache_key.renderpass_params, program)} {}
 | 
			
		||||
 | 
			
		||||
VKGraphicsPipeline::~VKGraphicsPipeline() = default;
 | 
			
		||||
 | 
			
		||||
@ -181,7 +180,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
 | 
			
		||||
 | 
			
		||||
vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
 | 
			
		||||
                                                const SPIRVProgram& program) const {
 | 
			
		||||
    const auto& state = fixed_state;
 | 
			
		||||
    const auto& state = cache_key.fixed_state;
 | 
			
		||||
    const auto& viewport_swizzles = state.viewport_swizzles;
 | 
			
		||||
 | 
			
		||||
    FixedPipelineState::DynamicState dynamic;
 | 
			
		||||
 | 
			
		||||
@ -19,7 +19,27 @@ namespace Vulkan {
 | 
			
		||||
 | 
			
		||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 | 
			
		||||
 | 
			
		||||
struct GraphicsPipelineCacheKey;
 | 
			
		||||
struct GraphicsPipelineCacheKey {
 | 
			
		||||
    RenderPassParams renderpass_params;
 | 
			
		||||
    u32 padding;
 | 
			
		||||
    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
 | 
			
		||||
    FixedPipelineState fixed_state;
 | 
			
		||||
 | 
			
		||||
    std::size_t Hash() const noexcept;
 | 
			
		||||
 | 
			
		||||
    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
 | 
			
		||||
 | 
			
		||||
    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
 | 
			
		||||
        return !operator==(rhs);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::size_t Size() const noexcept {
 | 
			
		||||
        return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
 | 
			
		||||
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
 | 
			
		||||
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
 | 
			
		||||
 | 
			
		||||
class VKDescriptorPool;
 | 
			
		||||
class VKDevice;
 | 
			
		||||
@ -54,6 +74,10 @@ public:
 | 
			
		||||
        return renderpass;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    GraphicsPipelineCacheKey GetCacheKey() const {
 | 
			
		||||
        return cache_key;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    vk::DescriptorSetLayout CreateDescriptorSetLayout(
 | 
			
		||||
        vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
 | 
			
		||||
@ -70,7 +94,7 @@ private:
 | 
			
		||||
 | 
			
		||||
    const VKDevice& device;
 | 
			
		||||
    VKScheduler& scheduler;
 | 
			
		||||
    const FixedPipelineState fixed_state;
 | 
			
		||||
    const GraphicsPipelineCacheKey cache_key;
 | 
			
		||||
    const u64 hash;
 | 
			
		||||
 | 
			
		||||
    vk::DescriptorSetLayout descriptor_set_layout;
 | 
			
		||||
 | 
			
		||||
@ -28,6 +28,7 @@
 | 
			
		||||
#include "video_core/shader/compiler_settings.h"
 | 
			
		||||
#include "video_core/shader/memory_util.h"
 | 
			
		||||
#include "video_core/shader_cache.h"
 | 
			
		||||
#include "video_core/shader_notify.h"
 | 
			
		||||
 | 
			
		||||
namespace Vulkan {
 | 
			
		||||
 | 
			
		||||
@ -205,24 +206,43 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
 | 
			
		||||
    return last_shaders = shaders;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
 | 
			
		||||
VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
 | 
			
		||||
    const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
 | 
			
		||||
    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
 | 
			
		||||
 | 
			
		||||
    if (last_graphics_pipeline && last_graphics_key == key) {
 | 
			
		||||
        return *last_graphics_pipeline;
 | 
			
		||||
        return last_graphics_pipeline;
 | 
			
		||||
    }
 | 
			
		||||
    last_graphics_key = key;
 | 
			
		||||
 | 
			
		||||
    if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) {
 | 
			
		||||
        std::unique_lock lock{pipeline_cache};
 | 
			
		||||
        const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
 | 
			
		||||
        if (is_cache_miss) {
 | 
			
		||||
            system.GPU().ShaderNotify().MarkSharderBuilding();
 | 
			
		||||
            LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
 | 
			
		||||
            const auto [program, bindings] = DecompileShaders(key.fixed_state);
 | 
			
		||||
            async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
 | 
			
		||||
                                            update_descriptor_queue, renderpass_cache, bindings,
 | 
			
		||||
                                            program, key);
 | 
			
		||||
        }
 | 
			
		||||
        last_graphics_pipeline = pair->second.get();
 | 
			
		||||
        return last_graphics_pipeline;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
 | 
			
		||||
    auto& entry = pair->second;
 | 
			
		||||
    if (is_cache_miss) {
 | 
			
		||||
        system.GPU().ShaderNotify().MarkSharderBuilding();
 | 
			
		||||
        LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
 | 
			
		||||
        const auto [program, bindings] = DecompileShaders(key);
 | 
			
		||||
        const auto [program, bindings] = DecompileShaders(key.fixed_state);
 | 
			
		||||
        entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
 | 
			
		||||
                                                     update_descriptor_queue, renderpass_cache, key,
 | 
			
		||||
                                                     bindings, program);
 | 
			
		||||
        system.GPU().ShaderNotify().MarkShaderComplete();
 | 
			
		||||
    }
 | 
			
		||||
    return *(last_graphics_pipeline = entry.get());
 | 
			
		||||
    last_graphics_pipeline = entry.get();
 | 
			
		||||
    return last_graphics_pipeline;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
 | 
			
		||||
@ -277,6 +297,12 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
 | 
			
		||||
    return *entry;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
 | 
			
		||||
    system.GPU().ShaderNotify().MarkShaderComplete();
 | 
			
		||||
    std::unique_lock lock{pipeline_cache};
 | 
			
		||||
    graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void VKPipelineCache::OnShaderRemoval(Shader* shader) {
 | 
			
		||||
    bool finished = false;
 | 
			
		||||
    const auto Finish = [&] {
 | 
			
		||||
@ -312,8 +338,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
 | 
			
		||||
VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
 | 
			
		||||
    const auto& fixed_state = key.fixed_state;
 | 
			
		||||
VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
 | 
			
		||||
    auto& memory_manager = system.GPU().MemoryManager();
 | 
			
		||||
    const auto& gpu = system.GPU().Maxwell3D();
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -22,6 +22,7 @@
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/wrapper.h"
 | 
			
		||||
#include "video_core/shader/async_shaders.h"
 | 
			
		||||
#include "video_core/shader/memory_util.h"
 | 
			
		||||
#include "video_core/shader/registry.h"
 | 
			
		||||
#include "video_core/shader/shader_ir.h"
 | 
			
		||||
@ -43,28 +44,6 @@ class VKUpdateDescriptorQueue;
 | 
			
		||||
 | 
			
		||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 | 
			
		||||
 | 
			
		||||
struct GraphicsPipelineCacheKey {
 | 
			
		||||
    RenderPassParams renderpass_params;
 | 
			
		||||
    u32 padding;
 | 
			
		||||
    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
 | 
			
		||||
    FixedPipelineState fixed_state;
 | 
			
		||||
 | 
			
		||||
    std::size_t Hash() const noexcept;
 | 
			
		||||
 | 
			
		||||
    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
 | 
			
		||||
 | 
			
		||||
    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
 | 
			
		||||
        return !operator==(rhs);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::size_t Size() const noexcept {
 | 
			
		||||
        return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
 | 
			
		||||
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
 | 
			
		||||
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
 | 
			
		||||
 | 
			
		||||
struct ComputePipelineCacheKey {
 | 
			
		||||
    GPUVAddr shader;
 | 
			
		||||
    u32 shared_memory_size;
 | 
			
		||||
@ -152,16 +131,19 @@ public:
 | 
			
		||||
 | 
			
		||||
    std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
 | 
			
		||||
 | 
			
		||||
    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
 | 
			
		||||
    VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
 | 
			
		||||
                                            VideoCommon::Shader::AsyncShaders& async_shaders);
 | 
			
		||||
 | 
			
		||||
    VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
 | 
			
		||||
 | 
			
		||||
    void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
    void OnShaderRemoval(Shader* shader) final;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
 | 
			
		||||
        const GraphicsPipelineCacheKey& key);
 | 
			
		||||
        const FixedPipelineState& fixed_state);
 | 
			
		||||
 | 
			
		||||
    Core::System& system;
 | 
			
		||||
    const VKDevice& device;
 | 
			
		||||
@ -178,6 +160,7 @@ private:
 | 
			
		||||
    GraphicsPipelineCacheKey last_graphics_key;
 | 
			
		||||
    VKGraphicsPipeline* last_graphics_pipeline = nullptr;
 | 
			
		||||
 | 
			
		||||
    std::mutex pipeline_cache;
 | 
			
		||||
    std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
 | 
			
		||||
        graphics_cache;
 | 
			
		||||
    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
 | 
			
		||||
 | 
			
		||||
@ -14,6 +14,7 @@
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "common/logging/log.h"
 | 
			
		||||
#include "common/microprofile.h"
 | 
			
		||||
#include "common/scope_exit.h"
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "core/settings.h"
 | 
			
		||||
#include "video_core/engines/kepler_compute.h"
 | 
			
		||||
@ -400,8 +401,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
 | 
			
		||||
      buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
 | 
			
		||||
      sampler_cache(device),
 | 
			
		||||
      fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
 | 
			
		||||
      query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} {
 | 
			
		||||
      query_cache(system, *this, device, scheduler),
 | 
			
		||||
      wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
 | 
			
		||||
    scheduler.SetQueryCache(query_cache);
 | 
			
		||||
    if (device.UseAsynchronousShaders()) {
 | 
			
		||||
        async_shaders.AllocateWorkers();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
RasterizerVulkan::~RasterizerVulkan() = default;
 | 
			
		||||
@ -413,6 +418,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 | 
			
		||||
 | 
			
		||||
    query_cache.UpdateCounters();
 | 
			
		||||
 | 
			
		||||
    SCOPE_EXIT({ system.GPU().TickWork(); });
 | 
			
		||||
 | 
			
		||||
    const auto& gpu = system.GPU().Maxwell3D();
 | 
			
		||||
    GraphicsPipelineCacheKey key;
 | 
			
		||||
    key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
 | 
			
		||||
@ -439,10 +446,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 | 
			
		||||
    key.renderpass_params = GetRenderPassParams(texceptions);
 | 
			
		||||
    key.padding = 0;
 | 
			
		||||
 | 
			
		||||
    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
 | 
			
		||||
    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
 | 
			
		||||
    auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
 | 
			
		||||
    if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
 | 
			
		||||
        // Async graphics pipeline was not ready.
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const auto renderpass = pipeline.GetRenderPass();
 | 
			
		||||
    scheduler.BindGraphicsPipeline(pipeline->GetHandle());
 | 
			
		||||
 | 
			
		||||
    const auto renderpass = pipeline->GetRenderPass();
 | 
			
		||||
    const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
 | 
			
		||||
    scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
 | 
			
		||||
 | 
			
		||||
@ -452,8 +464,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 | 
			
		||||
 | 
			
		||||
    BeginTransformFeedback();
 | 
			
		||||
 | 
			
		||||
    const auto pipeline_layout = pipeline.GetLayout();
 | 
			
		||||
    const auto descriptor_set = pipeline.CommitDescriptorSet();
 | 
			
		||||
    const auto pipeline_layout = pipeline->GetLayout();
 | 
			
		||||
    const auto descriptor_set = pipeline->CommitDescriptorSet();
 | 
			
		||||
    scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
 | 
			
		||||
        if (descriptor_set) {
 | 
			
		||||
            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
 | 
			
		||||
@ -463,8 +475,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    EndTransformFeedback();
 | 
			
		||||
 | 
			
		||||
    system.GPU().TickWork();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerVulkan::Clear() {
 | 
			
		||||
 | 
			
		||||
@ -32,6 +32,7 @@
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/wrapper.h"
 | 
			
		||||
#include "video_core/shader/async_shaders.h"
 | 
			
		||||
 | 
			
		||||
namespace Core {
 | 
			
		||||
class System;
 | 
			
		||||
@ -136,6 +137,14 @@ public:
 | 
			
		||||
                           u32 pixel_stride) override;
 | 
			
		||||
    void SetupDirtyFlags() override;
 | 
			
		||||
 | 
			
		||||
    VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
 | 
			
		||||
        return async_shaders;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
 | 
			
		||||
        return async_shaders;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Maximum supported size that a constbuffer can have in bytes.
 | 
			
		||||
    static constexpr std::size_t MaxConstbufferSize = 0x10000;
 | 
			
		||||
    static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
 | 
			
		||||
@ -297,6 +306,7 @@ private:
 | 
			
		||||
    vk::Buffer default_buffer;
 | 
			
		||||
    VKMemoryCommit default_buffer_commit;
 | 
			
		||||
    vk::Event wfi_event;
 | 
			
		||||
    VideoCommon::Shader::AsyncShaders async_shaders;
 | 
			
		||||
 | 
			
		||||
    std::array<View, Maxwell::NumRenderTargets> color_attachments;
 | 
			
		||||
    View zeta_attachment;
 | 
			
		||||
 | 
			
		||||
@ -644,7 +644,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
 | 
			
		||||
    return ShaderModule(object, handle, *dld);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Event Device::CreateEvent() const {
 | 
			
		||||
Event Device::CreateNewEvent() const {
 | 
			
		||||
    static constexpr VkEventCreateInfo ci{
 | 
			
		||||
        .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
 | 
			
		||||
        .pNext = nullptr,
 | 
			
		||||
 | 
			
		||||
@ -721,7 +721,7 @@ public:
 | 
			
		||||
 | 
			
		||||
    ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
 | 
			
		||||
 | 
			
		||||
    Event CreateEvent() const;
 | 
			
		||||
    Event CreateNewEvent() const;
 | 
			
		||||
 | 
			
		||||
    SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -2,7 +2,6 @@
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include <chrono>
 | 
			
		||||
#include <condition_variable>
 | 
			
		||||
#include <mutex>
 | 
			
		||||
#include <thread>
 | 
			
		||||
@ -20,9 +19,18 @@ AsyncShaders::~AsyncShaders() {
 | 
			
		||||
    KillWorkers();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void AsyncShaders::AllocateWorkers(std::size_t num_workers) {
 | 
			
		||||
    // If we're already have workers queued or don't want to queue workers, ignore
 | 
			
		||||
    if (num_workers == worker_threads.size() || num_workers == 0) {
 | 
			
		||||
void AsyncShaders::AllocateWorkers() {
 | 
			
		||||
    // Max worker threads we should allow
 | 
			
		||||
    constexpr u32 MAX_THREADS = 4;
 | 
			
		||||
    // Deduce how many threads we can use
 | 
			
		||||
    const u32 threads_used = std::thread::hardware_concurrency() / 4;
 | 
			
		||||
    // Always allow at least 1 thread regardless of our settings
 | 
			
		||||
    const auto max_worker_count = std::max(1U, threads_used);
 | 
			
		||||
    // Don't use more than MAX_THREADS
 | 
			
		||||
    const auto num_workers = std::min(max_worker_count, MAX_THREADS);
 | 
			
		||||
 | 
			
		||||
    // If we already have workers queued, ignore
 | 
			
		||||
    if (num_workers == worker_threads.size()) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -111,24 +119,50 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
 | 
			
		||||
                                     VideoCommon::Shader::CompilerSettings compiler_settings,
 | 
			
		||||
                                     const VideoCommon::Shader::Registry& registry,
 | 
			
		||||
                                     VAddr cpu_addr) {
 | 
			
		||||
    WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM
 | 
			
		||||
                                                    : AsyncShaders::Backend::OpenGL,
 | 
			
		||||
                        device,
 | 
			
		||||
                        shader_type,
 | 
			
		||||
                        uid,
 | 
			
		||||
                        std::move(code),
 | 
			
		||||
                        std::move(code_b),
 | 
			
		||||
                        main_offset,
 | 
			
		||||
                        compiler_settings,
 | 
			
		||||
                        registry,
 | 
			
		||||
                        cpu_addr};
 | 
			
		||||
    WorkerParams params{
 | 
			
		||||
        .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
 | 
			
		||||
        .device = &device,
 | 
			
		||||
        .shader_type = shader_type,
 | 
			
		||||
        .uid = uid,
 | 
			
		||||
        .code = std::move(code),
 | 
			
		||||
        .code_b = std::move(code_b),
 | 
			
		||||
        .main_offset = main_offset,
 | 
			
		||||
        .compiler_settings = compiler_settings,
 | 
			
		||||
        .registry = registry,
 | 
			
		||||
        .cpu_address = cpu_addr,
 | 
			
		||||
    };
 | 
			
		||||
    std::unique_lock lock(queue_mutex);
 | 
			
		||||
    pending_queue.push_back(std::move(params));
 | 
			
		||||
    pending_queue.push(std::move(params));
 | 
			
		||||
    cv.notify_one();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
 | 
			
		||||
                                     const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
 | 
			
		||||
                                     Vulkan::VKDescriptorPool& descriptor_pool,
 | 
			
		||||
                                     Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
 | 
			
		||||
                                     Vulkan::VKRenderPassCache& renderpass_cache,
 | 
			
		||||
                                     std::vector<VkDescriptorSetLayoutBinding> bindings,
 | 
			
		||||
                                     Vulkan::SPIRVProgram program,
 | 
			
		||||
                                     Vulkan::GraphicsPipelineCacheKey key) {
 | 
			
		||||
    WorkerParams params{
 | 
			
		||||
        .backend = Backend::Vulkan,
 | 
			
		||||
        .pp_cache = pp_cache,
 | 
			
		||||
        .vk_device = &device,
 | 
			
		||||
        .scheduler = &scheduler,
 | 
			
		||||
        .descriptor_pool = &descriptor_pool,
 | 
			
		||||
        .update_descriptor_queue = &update_descriptor_queue,
 | 
			
		||||
        .renderpass_cache = &renderpass_cache,
 | 
			
		||||
        .bindings = bindings,
 | 
			
		||||
        .program = program,
 | 
			
		||||
        .key = key,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    std::unique_lock lock(queue_mutex);
 | 
			
		||||
    pending_queue.push(std::move(params));
 | 
			
		||||
    cv.notify_one();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
 | 
			
		||||
    using namespace std::chrono_literals;
 | 
			
		||||
    while (!is_thread_exiting.load(std::memory_order_relaxed)) {
 | 
			
		||||
        std::unique_lock lock{queue_mutex};
 | 
			
		||||
        cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
 | 
			
		||||
@ -144,18 +178,17 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
 | 
			
		||||
        if (pending_queue.empty()) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Pull work from queue
 | 
			
		||||
        WorkerParams work = std::move(pending_queue.front());
 | 
			
		||||
        pending_queue.pop_front();
 | 
			
		||||
 | 
			
		||||
        pending_queue.pop();
 | 
			
		||||
        lock.unlock();
 | 
			
		||||
 | 
			
		||||
        if (work.backend == AsyncShaders::Backend::OpenGL ||
 | 
			
		||||
            work.backend == AsyncShaders::Backend::GLASM) {
 | 
			
		||||
            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry);
 | 
			
		||||
        if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
 | 
			
		||||
            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
 | 
			
		||||
            const auto scope = context->Acquire();
 | 
			
		||||
            auto program =
 | 
			
		||||
                OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry);
 | 
			
		||||
                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
 | 
			
		||||
            Result result{};
 | 
			
		||||
            result.backend = work.backend;
 | 
			
		||||
            result.cpu_address = work.cpu_address;
 | 
			
		||||
@ -164,9 +197,9 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
 | 
			
		||||
            result.code_b = std::move(work.code_b);
 | 
			
		||||
            result.shader_type = work.shader_type;
 | 
			
		||||
 | 
			
		||||
            if (work.backend == AsyncShaders::Backend::OpenGL) {
 | 
			
		||||
            if (work.backend == Backend::OpenGL) {
 | 
			
		||||
                result.program.opengl = std::move(program->source_program);
 | 
			
		||||
            } else if (work.backend == AsyncShaders::Backend::GLASM) {
 | 
			
		||||
            } else if (work.backend == Backend::GLASM) {
 | 
			
		||||
                result.program.glasm = std::move(program->assembly_program);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
@ -174,6 +207,13 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
 | 
			
		||||
                std::unique_lock complete_lock(completed_mutex);
 | 
			
		||||
                finished_work.push_back(std::move(result));
 | 
			
		||||
            }
 | 
			
		||||
        } else if (work.backend == Backend::Vulkan) {
 | 
			
		||||
            auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
 | 
			
		||||
                *work.vk_device, *work.scheduler, *work.descriptor_pool,
 | 
			
		||||
                *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
 | 
			
		||||
                work.program);
 | 
			
		||||
 | 
			
		||||
            work.pp_cache->EmplacePipeline(std::move(pipeline));
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -14,6 +14,10 @@
 | 
			
		||||
#include "video_core/renderer_opengl/gl_device.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_device.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
 | 
			
		||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
 | 
			
		||||
 | 
			
		||||
namespace Core::Frontend {
 | 
			
		||||
class EmuWindow;
 | 
			
		||||
@ -24,6 +28,10 @@ namespace Tegra {
 | 
			
		||||
class GPU;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace Vulkan {
 | 
			
		||||
class VKPipelineCache;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace VideoCommon::Shader {
 | 
			
		||||
 | 
			
		||||
class AsyncShaders {
 | 
			
		||||
@ -31,6 +39,7 @@ public:
 | 
			
		||||
    enum class Backend {
 | 
			
		||||
        OpenGL,
 | 
			
		||||
        GLASM,
 | 
			
		||||
        Vulkan,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    struct ResultPrograms {
 | 
			
		||||
@ -52,7 +61,7 @@ public:
 | 
			
		||||
    ~AsyncShaders();
 | 
			
		||||
 | 
			
		||||
    /// Start up shader worker threads
 | 
			
		||||
    void AllocateWorkers(std::size_t num_workers);
 | 
			
		||||
    void AllocateWorkers();
 | 
			
		||||
 | 
			
		||||
    /// Clear the shader queue and kill all worker threads
 | 
			
		||||
    void FreeWorkers();
 | 
			
		||||
@ -76,6 +85,14 @@ public:
 | 
			
		||||
                           VideoCommon::Shader::CompilerSettings compiler_settings,
 | 
			
		||||
                           const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
 | 
			
		||||
 | 
			
		||||
    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
 | 
			
		||||
                           Vulkan::VKScheduler& scheduler,
 | 
			
		||||
                           Vulkan::VKDescriptorPool& descriptor_pool,
 | 
			
		||||
                           Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
 | 
			
		||||
                           Vulkan::VKRenderPassCache& renderpass_cache,
 | 
			
		||||
                           std::vector<VkDescriptorSetLayoutBinding> bindings,
 | 
			
		||||
                           Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
 | 
			
		||||
 | 
			
		||||
@ -83,16 +100,28 @@ private:
 | 
			
		||||
    bool HasWorkQueued();
 | 
			
		||||
 | 
			
		||||
    struct WorkerParams {
 | 
			
		||||
        AsyncShaders::Backend backend;
 | 
			
		||||
        OpenGL::Device device;
 | 
			
		||||
        Backend backend;
 | 
			
		||||
        // For OGL
 | 
			
		||||
        const OpenGL::Device* device;
 | 
			
		||||
        Tegra::Engines::ShaderType shader_type;
 | 
			
		||||
        u64 uid;
 | 
			
		||||
        std::vector<u64> code;
 | 
			
		||||
        std::vector<u64> code_b;
 | 
			
		||||
        u32 main_offset;
 | 
			
		||||
        VideoCommon::Shader::CompilerSettings compiler_settings;
 | 
			
		||||
        VideoCommon::Shader::Registry registry;
 | 
			
		||||
        std::optional<VideoCommon::Shader::Registry> registry;
 | 
			
		||||
        VAddr cpu_address;
 | 
			
		||||
 | 
			
		||||
        // For Vulkan
 | 
			
		||||
        Vulkan::VKPipelineCache* pp_cache;
 | 
			
		||||
        const Vulkan::VKDevice* vk_device;
 | 
			
		||||
        Vulkan::VKScheduler* scheduler;
 | 
			
		||||
        Vulkan::VKDescriptorPool* descriptor_pool;
 | 
			
		||||
        Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
 | 
			
		||||
        Vulkan::VKRenderPassCache* renderpass_cache;
 | 
			
		||||
        std::vector<VkDescriptorSetLayoutBinding> bindings;
 | 
			
		||||
        Vulkan::SPIRVProgram program;
 | 
			
		||||
        Vulkan::GraphicsPipelineCacheKey key;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    std::condition_variable cv;
 | 
			
		||||
@ -101,7 +130,7 @@ private:
 | 
			
		||||
    std::atomic<bool> is_thread_exiting{};
 | 
			
		||||
    std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
 | 
			
		||||
    std::vector<std::thread> worker_threads;
 | 
			
		||||
    std::deque<WorkerParams> pending_queue;
 | 
			
		||||
    std::queue<WorkerParams> pending_queue;
 | 
			
		||||
    std::vector<AsyncShaders::Result> finished_work;
 | 
			
		||||
    Core::Frontend::EmuWindow& emu_window;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -92,7 +92,7 @@
 | 
			
		||||
           <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
 | 
			
		||||
          </property>
 | 
			
		||||
          <property name="text">
 | 
			
		||||
           <string>Use asynchronous shader building (experimental, OpenGL or Assembly shaders only)</string>
 | 
			
		||||
           <string>Use asynchronous shader building (experimental)</string>
 | 
			
		||||
          </property>
 | 
			
		||||
         </widget>
 | 
			
		||||
        </item>
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user