Merge pull request #3677 from FernandoS27/better-sync
Introduce Predictive Flushing and Improve ASYNC GPU
This commit is contained in:
		
						commit
						bf2ddb8fd5
					
				| @ -92,7 +92,7 @@ void LogSettings() { | ||||
|     LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); | ||||
|     LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); | ||||
|     LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); | ||||
|     LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); | ||||
|     LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy); | ||||
|     LogSetting("Renderer_UseAsynchronousGpuEmulation", | ||||
|                Settings::values.use_asynchronous_gpu_emulation); | ||||
|     LogSetting("Renderer_UseVsync", Settings::values.use_vsync); | ||||
| @ -109,4 +109,12 @@ void LogSettings() { | ||||
|     LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local); | ||||
| } | ||||
| 
 | ||||
| bool IsGPULevelExtreme() { | ||||
|     return values.gpu_accuracy == GPUAccuracy::Extreme; | ||||
| } | ||||
| 
 | ||||
| bool IsGPULevelHigh() { | ||||
|     return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High; | ||||
| } | ||||
| 
 | ||||
| } // namespace Settings
 | ||||
|  | ||||
| @ -376,6 +376,12 @@ enum class RendererBackend { | ||||
|     Vulkan = 1, | ||||
| }; | ||||
| 
 | ||||
| enum class GPUAccuracy : u32 { | ||||
|     Normal = 0, | ||||
|     High = 1, | ||||
|     Extreme = 2, | ||||
| }; | ||||
| 
 | ||||
| struct Values { | ||||
|     // System
 | ||||
|     bool use_docked_mode; | ||||
| @ -436,7 +442,7 @@ struct Values { | ||||
|     bool use_frame_limit; | ||||
|     u16 frame_limit; | ||||
|     bool use_disk_shader_cache; | ||||
|     bool use_accurate_gpu_emulation; | ||||
|     GPUAccuracy gpu_accuracy; | ||||
|     bool use_asynchronous_gpu_emulation; | ||||
|     bool use_vsync; | ||||
|     bool force_30fps_mode; | ||||
| @ -480,6 +486,9 @@ struct Values { | ||||
|     std::map<u64, std::vector<std::string>> disabled_addons; | ||||
| } extern values; | ||||
| 
 | ||||
| bool IsGPULevelExtreme(); | ||||
| bool IsGPULevelHigh(); | ||||
| 
 | ||||
| void Apply(); | ||||
| void LogSettings(); | ||||
| } // namespace Settings
 | ||||
|  | ||||
| @ -56,6 +56,18 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) { | ||||
|     return "Unknown"; | ||||
| } | ||||
| 
 | ||||
| static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) { | ||||
|     switch (backend) { | ||||
|     case Settings::GPUAccuracy::Normal: | ||||
|         return "Normal"; | ||||
|     case Settings::GPUAccuracy::High: | ||||
|         return "High"; | ||||
|     case Settings::GPUAccuracy::Extreme: | ||||
|         return "Extreme"; | ||||
|     } | ||||
|     return "Unknown"; | ||||
| } | ||||
| 
 | ||||
| u64 GetTelemetryId() { | ||||
|     u64 telemetry_id{}; | ||||
|     const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + | ||||
| @ -184,8 +196,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { | ||||
|     AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit); | ||||
|     AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit); | ||||
|     AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); | ||||
|     AddField(field_type, "Renderer_UseAccurateGpuEmulation", | ||||
|              Settings::values.use_accurate_gpu_emulation); | ||||
|     AddField(field_type, "Renderer_GPUAccuracyLevel", | ||||
|              TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy)); | ||||
|     AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", | ||||
|              Settings::values.use_asynchronous_gpu_emulation); | ||||
|     AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); | ||||
|  | ||||
| @ -23,6 +23,7 @@ add_library(video_core STATIC | ||||
|     engines/shader_bytecode.h | ||||
|     engines/shader_header.h | ||||
|     engines/shader_type.h | ||||
|     fence_manager.h | ||||
|     gpu.cpp | ||||
|     gpu.h | ||||
|     gpu_asynch.cpp | ||||
| @ -51,6 +52,8 @@ add_library(video_core STATIC | ||||
|     renderer_opengl/gl_buffer_cache.h | ||||
|     renderer_opengl/gl_device.cpp | ||||
|     renderer_opengl/gl_device.h | ||||
|     renderer_opengl/gl_fence_manager.cpp | ||||
|     renderer_opengl/gl_fence_manager.h | ||||
|     renderer_opengl/gl_framebuffer_cache.cpp | ||||
|     renderer_opengl/gl_framebuffer_cache.h | ||||
|     renderer_opengl/gl_rasterizer.cpp | ||||
| @ -176,6 +179,8 @@ if (ENABLE_VULKAN) | ||||
|         renderer_vulkan/vk_descriptor_pool.h | ||||
|         renderer_vulkan/vk_device.cpp | ||||
|         renderer_vulkan/vk_device.h | ||||
|         renderer_vulkan/vk_fence_manager.cpp | ||||
|         renderer_vulkan/vk_fence_manager.h | ||||
|         renderer_vulkan/vk_graphics_pipeline.cpp | ||||
|         renderer_vulkan/vk_graphics_pipeline.h | ||||
|         renderer_vulkan/vk_image.cpp | ||||
|  | ||||
| @ -5,6 +5,7 @@ | ||||
| #pragma once | ||||
| 
 | ||||
| #include <array> | ||||
| #include <list> | ||||
| #include <memory> | ||||
| #include <mutex> | ||||
| #include <unordered_map> | ||||
| @ -18,8 +19,10 @@ | ||||
| 
 | ||||
| #include "common/alignment.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "core/core.h" | ||||
| #include "core/memory.h" | ||||
| #include "core/settings.h" | ||||
| #include "video_core/buffer_cache/buffer_block.h" | ||||
| #include "video_core/buffer_cache/map_interval.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| @ -79,6 +82,9 @@ public: | ||||
|         auto map = MapAddress(block, gpu_addr, cpu_addr, size); | ||||
|         if (is_written) { | ||||
|             map->MarkAsModified(true, GetModifiedTicks()); | ||||
|             if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { | ||||
|                 MarkForAsyncFlush(map); | ||||
|             } | ||||
|             if (!map->IsWritten()) { | ||||
|                 map->MarkAsWritten(true); | ||||
|                 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | ||||
| @ -137,11 +143,22 @@ public: | ||||
|         }); | ||||
|         for (auto& object : objects) { | ||||
|             if (object->IsModified() && object->IsRegistered()) { | ||||
|                 mutex.unlock(); | ||||
|                 FlushMap(object); | ||||
|                 mutex.lock(); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     bool MustFlushRegion(VAddr addr, std::size_t size) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         const std::vector<MapInterval> objects = GetMapsInRange(addr, size); | ||||
|         return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) { | ||||
|             return map->IsModified() && map->IsRegistered(); | ||||
|         }); | ||||
|     } | ||||
| 
 | ||||
|     /// Mark the specified region as being invalidated
 | ||||
|     void InvalidateRegion(VAddr addr, u64 size) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| @ -154,6 +171,77 @@ public: | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void OnCPUWrite(VAddr addr, std::size_t size) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         for (const auto& object : GetMapsInRange(addr, size)) { | ||||
|             if (object->IsMemoryMarked() && object->IsRegistered()) { | ||||
|                 UnmarkMemory(object); | ||||
|                 object->SetSyncPending(true); | ||||
|                 marked_for_unregister.emplace_back(object); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void SyncGuestHost() { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         for (const auto& object : marked_for_unregister) { | ||||
|             if (object->IsRegistered()) { | ||||
|                 object->SetSyncPending(false); | ||||
|                 Unregister(object); | ||||
|             } | ||||
|         } | ||||
|         marked_for_unregister.clear(); | ||||
|     } | ||||
| 
 | ||||
|     void CommitAsyncFlushes() { | ||||
|         if (uncommitted_flushes) { | ||||
|             auto commit_list = std::make_shared<std::list<MapInterval>>(); | ||||
|             for (auto& map : *uncommitted_flushes) { | ||||
|                 if (map->IsRegistered() && map->IsModified()) { | ||||
|                     // TODO(Blinkhawk): Implement backend asynchronous flushing
 | ||||
|                     // AsyncFlushMap(map)
 | ||||
|                     commit_list->push_back(map); | ||||
|                 } | ||||
|             } | ||||
|             if (!commit_list->empty()) { | ||||
|                 committed_flushes.push_back(commit_list); | ||||
|             } else { | ||||
|                 committed_flushes.emplace_back(); | ||||
|             } | ||||
|         } else { | ||||
|             committed_flushes.emplace_back(); | ||||
|         } | ||||
|         uncommitted_flushes.reset(); | ||||
|     } | ||||
| 
 | ||||
|     bool ShouldWaitAsyncFlushes() const { | ||||
|         return !committed_flushes.empty() && committed_flushes.front() != nullptr; | ||||
|     } | ||||
| 
 | ||||
|     bool HasUncommittedFlushes() const { | ||||
|         return uncommitted_flushes != nullptr; | ||||
|     } | ||||
| 
 | ||||
|     void PopAsyncFlushes() { | ||||
|         if (committed_flushes.empty()) { | ||||
|             return; | ||||
|         } | ||||
|         auto& flush_list = committed_flushes.front(); | ||||
|         if (!flush_list) { | ||||
|             committed_flushes.pop_front(); | ||||
|             return; | ||||
|         } | ||||
|         for (MapInterval& map : *flush_list) { | ||||
|             if (map->IsRegistered()) { | ||||
|                 // TODO(Blinkhawk): Replace this for reading the asynchronous flush
 | ||||
|                 FlushMap(map); | ||||
|             } | ||||
|         } | ||||
|         committed_flushes.pop_front(); | ||||
|     } | ||||
| 
 | ||||
|     virtual BufferType GetEmptyBuffer(std::size_t size) = 0; | ||||
| 
 | ||||
| protected: | ||||
| @ -196,17 +284,30 @@ protected: | ||||
|         const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; | ||||
|         mapped_addresses.insert({interval, new_map}); | ||||
|         rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||||
|         new_map->SetMemoryMarked(true); | ||||
|         if (inherit_written) { | ||||
|             MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); | ||||
|             new_map->MarkAsWritten(true); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Unregisters an object from the cache
 | ||||
|     void Unregister(MapInterval& map) { | ||||
|     void UnmarkMemory(const MapInterval& map) { | ||||
|         if (!map->IsMemoryMarked()) { | ||||
|             return; | ||||
|         } | ||||
|         const std::size_t size = map->GetEnd() - map->GetStart(); | ||||
|         rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); | ||||
|         map->SetMemoryMarked(false); | ||||
|     } | ||||
| 
 | ||||
|     /// Unregisters an object from the cache
 | ||||
|     void Unregister(const MapInterval& map) { | ||||
|         UnmarkMemory(map); | ||||
|         map->MarkAsRegistered(false); | ||||
|         if (map->IsSyncPending()) { | ||||
|             marked_for_unregister.remove(map); | ||||
|             map->SetSyncPending(false); | ||||
|         } | ||||
|         if (map->IsWritten()) { | ||||
|             UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | ||||
|         } | ||||
| @ -264,6 +365,9 @@ private: | ||||
|         MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); | ||||
|         if (modified_inheritance) { | ||||
|             new_map->MarkAsModified(true, GetModifiedTicks()); | ||||
|             if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { | ||||
|                 MarkForAsyncFlush(new_map); | ||||
|             } | ||||
|         } | ||||
|         Register(new_map, write_inheritance); | ||||
|         return new_map; | ||||
| @ -450,6 +554,13 @@ private: | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     void MarkForAsyncFlush(MapInterval& map) { | ||||
|         if (!uncommitted_flushes) { | ||||
|             uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>(); | ||||
|         } | ||||
|         uncommitted_flushes->insert(map); | ||||
|     } | ||||
| 
 | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     Core::System& system; | ||||
| 
 | ||||
| @ -479,6 +590,10 @@ private: | ||||
|     u64 modified_ticks = 0; | ||||
| 
 | ||||
|     std::vector<u8> staging_buffer; | ||||
|     std::list<MapInterval> marked_for_unregister; | ||||
| 
 | ||||
|     std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{}; | ||||
|     std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes; | ||||
| 
 | ||||
|     std::recursive_mutex mutex; | ||||
| }; | ||||
|  | ||||
| @ -46,6 +46,22 @@ public: | ||||
|         return is_registered; | ||||
|     } | ||||
| 
 | ||||
|     void SetMemoryMarked(bool is_memory_marked_) { | ||||
|         is_memory_marked = is_memory_marked_; | ||||
|     } | ||||
| 
 | ||||
|     bool IsMemoryMarked() const { | ||||
|         return is_memory_marked; | ||||
|     } | ||||
| 
 | ||||
|     void SetSyncPending(bool is_sync_pending_) { | ||||
|         is_sync_pending = is_sync_pending_; | ||||
|     } | ||||
| 
 | ||||
|     bool IsSyncPending() const { | ||||
|         return is_sync_pending; | ||||
|     } | ||||
| 
 | ||||
|     VAddr GetStart() const { | ||||
|         return start; | ||||
|     } | ||||
| @ -83,6 +99,8 @@ private: | ||||
|     bool is_written{}; | ||||
|     bool is_modified{}; | ||||
|     bool is_registered{}; | ||||
|     bool is_memory_marked{}; | ||||
|     bool is_sync_pending{}; | ||||
|     u64 ticks{}; | ||||
| }; | ||||
| 
 | ||||
|  | ||||
| @ -21,6 +21,7 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128, | ||||
| void DmaPusher::DispatchCalls() { | ||||
|     MICROPROFILE_SCOPE(DispatchCalls); | ||||
| 
 | ||||
|     gpu.SyncGuestHost(); | ||||
|     // On entering GPU code, assume all memory may be touched by the ARM core.
 | ||||
|     gpu.Maxwell3D().OnMemoryWrite(); | ||||
| 
 | ||||
| @ -32,6 +33,8 @@ void DmaPusher::DispatchCalls() { | ||||
|         } | ||||
|     } | ||||
|     gpu.FlushCommands(); | ||||
|     gpu.SyncGuestHost(); | ||||
|     gpu.OnCommandListEnd(); | ||||
| } | ||||
| 
 | ||||
| bool DmaPusher::Step() { | ||||
|  | ||||
| @ -404,7 +404,11 @@ void Maxwell3D::ProcessQueryGet() { | ||||
| 
 | ||||
|     switch (regs.query.query_get.operation) { | ||||
|     case Regs::QueryOperation::Release: | ||||
|         StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); | ||||
|         if (regs.query.query_get.fence == 1) { | ||||
|             rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); | ||||
|         } else { | ||||
|             StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); | ||||
|         } | ||||
|         break; | ||||
|     case Regs::QueryOperation::Acquire: | ||||
|         // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
 | ||||
| @ -483,7 +487,7 @@ void Maxwell3D::ProcessSyncPoint() { | ||||
|     const u32 increment = regs.sync_info.increment.Value(); | ||||
|     [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); | ||||
|     if (increment) { | ||||
|         system.GPU().IncrementSyncPoint(sync_point); | ||||
|         rasterizer.SignalSyncPoint(sync_point); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -104,8 +104,13 @@ void MaxwellDMA::HandleCopy() { | ||||
|             write_buffer.resize(dst_size); | ||||
|         } | ||||
| 
 | ||||
|         memory_manager.ReadBlock(source, read_buffer.data(), src_size); | ||||
|         memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||||
|         if (Settings::IsGPULevelExtreme()) { | ||||
|             memory_manager.ReadBlock(source, read_buffer.data(), src_size); | ||||
|             memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||||
|         } else { | ||||
|             memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); | ||||
|             memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | ||||
|         } | ||||
| 
 | ||||
|         Texture::UnswizzleSubrect( | ||||
|             regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, | ||||
| @ -136,7 +141,7 @@ void MaxwellDMA::HandleCopy() { | ||||
|             write_buffer.resize(dst_size); | ||||
|         } | ||||
| 
 | ||||
|         if (Settings::values.use_accurate_gpu_emulation) { | ||||
|         if (Settings::IsGPULevelExtreme()) { | ||||
|             memory_manager.ReadBlock(source, read_buffer.data(), src_size); | ||||
|             memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||||
|         } else { | ||||
|  | ||||
							
								
								
									
										170
									
								
								src/video_core/fence_manager.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								src/video_core/fence_manager.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,170 @@ | ||||
| // Copyright 2020 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <array> | ||||
| #include <memory> | ||||
| #include <queue> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "core/core.h" | ||||
| #include "core/memory.h" | ||||
| #include "core/settings.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| class FenceBase { | ||||
| public: | ||||
|     FenceBase(u32 payload, bool is_stubbed) | ||||
|         : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {} | ||||
| 
 | ||||
|     FenceBase(GPUVAddr address, u32 payload, bool is_stubbed) | ||||
|         : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {} | ||||
| 
 | ||||
|     GPUVAddr GetAddress() const { | ||||
|         return address; | ||||
|     } | ||||
| 
 | ||||
|     u32 GetPayload() const { | ||||
|         return payload; | ||||
|     } | ||||
| 
 | ||||
|     bool IsSemaphore() const { | ||||
|         return is_semaphore; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     GPUVAddr address; | ||||
|     u32 payload; | ||||
|     bool is_semaphore; | ||||
| 
 | ||||
| protected: | ||||
|     bool is_stubbed; | ||||
| }; | ||||
| 
 | ||||
| template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> | ||||
| class FenceManager { | ||||
| public: | ||||
|     void SignalSemaphore(GPUVAddr addr, u32 value) { | ||||
|         TryReleasePendingFences(); | ||||
|         const bool should_flush = ShouldFlush(); | ||||
|         CommitAsyncFlushes(); | ||||
|         TFence new_fence = CreateFence(addr, value, !should_flush); | ||||
|         fences.push(new_fence); | ||||
|         QueueFence(new_fence); | ||||
|         if (should_flush) { | ||||
|             rasterizer.FlushCommands(); | ||||
|         } | ||||
|         rasterizer.SyncGuestHost(); | ||||
|     } | ||||
| 
 | ||||
|     void SignalSyncPoint(u32 value) { | ||||
|         TryReleasePendingFences(); | ||||
|         const bool should_flush = ShouldFlush(); | ||||
|         CommitAsyncFlushes(); | ||||
|         TFence new_fence = CreateFence(value, !should_flush); | ||||
|         fences.push(new_fence); | ||||
|         QueueFence(new_fence); | ||||
|         if (should_flush) { | ||||
|             rasterizer.FlushCommands(); | ||||
|         } | ||||
|         rasterizer.SyncGuestHost(); | ||||
|     } | ||||
| 
 | ||||
|     void WaitPendingFences() { | ||||
|         auto& gpu{system.GPU()}; | ||||
|         auto& memory_manager{gpu.MemoryManager()}; | ||||
|         while (!fences.empty()) { | ||||
|             TFence& current_fence = fences.front(); | ||||
|             if (ShouldWait()) { | ||||
|                 WaitFence(current_fence); | ||||
|             } | ||||
|             PopAsyncFlushes(); | ||||
|             if (current_fence->IsSemaphore()) { | ||||
|                 memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload()); | ||||
|             } else { | ||||
|                 gpu.IncrementSyncPoint(current_fence->GetPayload()); | ||||
|             } | ||||
|             fences.pop(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| protected: | ||||
|     FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||
|                  TTextureCache& texture_cache, TTBufferCache& buffer_cache, | ||||
|                  TQueryCache& query_cache) | ||||
|         : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache}, | ||||
|           buffer_cache{buffer_cache}, query_cache{query_cache} {} | ||||
| 
 | ||||
|     virtual ~FenceManager() {} | ||||
| 
 | ||||
|     /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
 | ||||
|     /// true
 | ||||
|     virtual TFence CreateFence(u32 value, bool is_stubbed) = 0; | ||||
|     /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
 | ||||
|     virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0; | ||||
|     /// Queues a fence into the backend if the fence isn't stubbed.
 | ||||
|     virtual void QueueFence(TFence& fence) = 0; | ||||
|     /// Notifies that the backend fence has been signaled/reached in host GPU.
 | ||||
|     virtual bool IsFenceSignaled(TFence& fence) const = 0; | ||||
|     /// Waits until a fence has been signalled by the host GPU.
 | ||||
|     virtual void WaitFence(TFence& fence) = 0; | ||||
| 
 | ||||
|     Core::System& system; | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     TTextureCache& texture_cache; | ||||
|     TTBufferCache& buffer_cache; | ||||
|     TQueryCache& query_cache; | ||||
| 
 | ||||
| private: | ||||
|     void TryReleasePendingFences() { | ||||
|         auto& gpu{system.GPU()}; | ||||
|         auto& memory_manager{gpu.MemoryManager()}; | ||||
|         while (!fences.empty()) { | ||||
|             TFence& current_fence = fences.front(); | ||||
|             if (ShouldWait() && !IsFenceSignaled(current_fence)) { | ||||
|                 return; | ||||
|             } | ||||
|             PopAsyncFlushes(); | ||||
|             if (current_fence->IsSemaphore()) { | ||||
|                 memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload()); | ||||
|             } else { | ||||
|                 gpu.IncrementSyncPoint(current_fence->GetPayload()); | ||||
|             } | ||||
|             fences.pop(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     bool ShouldWait() const { | ||||
|         return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || | ||||
|                query_cache.ShouldWaitAsyncFlushes(); | ||||
|     } | ||||
| 
 | ||||
|     bool ShouldFlush() const { | ||||
|         return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() || | ||||
|                query_cache.HasUncommittedFlushes(); | ||||
|     } | ||||
| 
 | ||||
|     void PopAsyncFlushes() { | ||||
|         texture_cache.PopAsyncFlushes(); | ||||
|         buffer_cache.PopAsyncFlushes(); | ||||
|         query_cache.PopAsyncFlushes(); | ||||
|     } | ||||
| 
 | ||||
|     void CommitAsyncFlushes() { | ||||
|         texture_cache.CommitAsyncFlushes(); | ||||
|         buffer_cache.CommitAsyncFlushes(); | ||||
|         query_cache.CommitAsyncFlushes(); | ||||
|     } | ||||
| 
 | ||||
|     std::queue<TFence> fences; | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
| @ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| u64 GPU::RequestFlush(VAddr addr, std::size_t size) { | ||||
|     std::unique_lock lck{flush_request_mutex}; | ||||
|     const u64 fence = ++last_flush_fence; | ||||
|     flush_requests.emplace_back(fence, addr, size); | ||||
|     return fence; | ||||
| } | ||||
| 
 | ||||
| void GPU::TickWork() { | ||||
|     std::unique_lock lck{flush_request_mutex}; | ||||
|     while (!flush_requests.empty()) { | ||||
|         auto& request = flush_requests.front(); | ||||
|         const u64 fence = request.fence; | ||||
|         const VAddr addr = request.addr; | ||||
|         const std::size_t size = request.size; | ||||
|         flush_requests.pop_front(); | ||||
|         flush_request_mutex.unlock(); | ||||
|         renderer->Rasterizer().FlushRegion(addr, size); | ||||
|         current_flush_fence.store(fence); | ||||
|         flush_request_mutex.lock(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| u64 GPU::GetTicks() const { | ||||
|     // This values were reversed engineered by fincs from NVN
 | ||||
|     // The gpu clock is reported in units of 385/625 nanoseconds
 | ||||
| @ -142,6 +164,13 @@ void GPU::FlushCommands() { | ||||
|     renderer->Rasterizer().FlushCommands(); | ||||
| } | ||||
| 
 | ||||
| void GPU::SyncGuestHost() { | ||||
|     renderer->Rasterizer().SyncGuestHost(); | ||||
| } | ||||
| 
 | ||||
| void GPU::OnCommandListEnd() { | ||||
|     renderer->Rasterizer().ReleaseFences(); | ||||
| } | ||||
| // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 | ||||
| // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
 | ||||
| // So the values you see in docs might be multiplied by 4.
 | ||||
|  | ||||
| @ -155,7 +155,23 @@ public: | ||||
|     /// Calls a GPU method.
 | ||||
|     void CallMethod(const MethodCall& method_call); | ||||
| 
 | ||||
|     /// Flush all current written commands into the host GPU for execution.
 | ||||
|     void FlushCommands(); | ||||
|     /// Synchronizes CPU writes with Host GPU memory.
 | ||||
|     void SyncGuestHost(); | ||||
|     /// Signal the ending of command list.
 | ||||
|     virtual void OnCommandListEnd(); | ||||
| 
 | ||||
|     /// Request a host GPU memory flush from the CPU.
 | ||||
|     u64 RequestFlush(VAddr addr, std::size_t size); | ||||
| 
 | ||||
|     /// Obtains current flush request fence id.
 | ||||
|     u64 CurrentFlushRequestFence() const { | ||||
|         return current_flush_fence.load(std::memory_order_relaxed); | ||||
|     } | ||||
| 
 | ||||
|     /// Tick pending requests within the GPU.
 | ||||
|     void TickWork(); | ||||
| 
 | ||||
|     /// Returns a reference to the Maxwell3D GPU engine.
 | ||||
|     Engines::Maxwell3D& Maxwell3D(); | ||||
| @ -325,6 +341,19 @@ private: | ||||
| 
 | ||||
|     std::condition_variable sync_cv; | ||||
| 
 | ||||
|     struct FlushRequest { | ||||
|         FlushRequest(u64 fence, VAddr addr, std::size_t size) | ||||
|             : fence{fence}, addr{addr}, size{size} {} | ||||
|         u64 fence; | ||||
|         VAddr addr; | ||||
|         std::size_t size; | ||||
|     }; | ||||
| 
 | ||||
|     std::list<FlushRequest> flush_requests; | ||||
|     std::atomic<u64> current_flush_fence{}; | ||||
|     u64 last_flush_fence{}; | ||||
|     std::mutex flush_request_mutex; | ||||
| 
 | ||||
|     const bool is_async; | ||||
| }; | ||||
| 
 | ||||
|  | ||||
| @ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const { | ||||
|     gpu_thread.WaitIdle(); | ||||
| } | ||||
| 
 | ||||
| void GPUAsynch::OnCommandListEnd() { | ||||
|     gpu_thread.OnCommandListEnd(); | ||||
| } | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
|  | ||||
| @ -32,6 +32,8 @@ public: | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void WaitIdle() const override; | ||||
| 
 | ||||
|     void OnCommandListEnd() override; | ||||
| 
 | ||||
| protected: | ||||
|     void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | ||||
| 
 | ||||
|  | ||||
| @ -6,6 +6,7 @@ | ||||
| #include "common/microprofile.h" | ||||
| #include "core/core.h" | ||||
| #include "core/frontend/emu_window.h" | ||||
| #include "core/settings.h" | ||||
| #include "video_core/dma_pusher.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/gpu_thread.h" | ||||
| @ -14,8 +15,9 @@ | ||||
| namespace VideoCommon::GPUThread { | ||||
| 
 | ||||
| /// Runs the GPU thread
 | ||||
| static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, | ||||
|                       Tegra::DmaPusher& dma_pusher, SynchState& state) { | ||||
| static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | ||||
|                       Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, | ||||
|                       SynchState& state) { | ||||
|     MicroProfileOnThreadCreate("GpuThread"); | ||||
| 
 | ||||
|     // Wait for first GPU command before acquiring the window context
 | ||||
| @ -37,10 +39,14 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic | ||||
|             dma_pusher.DispatchCalls(); | ||||
|         } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { | ||||
|             renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); | ||||
|         } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) { | ||||
|             renderer.Rasterizer().ReleaseFences(); | ||||
|         } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) { | ||||
|             system.GPU().TickWork(); | ||||
|         } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { | ||||
|             renderer.Rasterizer().FlushRegion(data->addr, data->size); | ||||
|         } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { | ||||
|             renderer.Rasterizer().InvalidateRegion(data->addr, data->size); | ||||
|             renderer.Rasterizer().OnCPUWrite(data->addr, data->size); | ||||
|         } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { | ||||
|             return; | ||||
|         } else { | ||||
| @ -65,8 +71,8 @@ ThreadManager::~ThreadManager() { | ||||
| void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | ||||
|                                 Core::Frontend::GraphicsContext& context, | ||||
|                                 Tegra::DmaPusher& dma_pusher) { | ||||
|     thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), | ||||
|                          std::ref(state)}; | ||||
|     thread = std::thread{RunThread,         std::ref(system),     std::ref(renderer), | ||||
|                          std::ref(context), std::ref(dma_pusher), std::ref(state)}; | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | ||||
| @ -78,16 +84,29 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::FlushRegion(VAddr addr, u64 size) { | ||||
|     PushCommand(FlushRegionCommand(addr, size)); | ||||
|     if (!Settings::IsGPULevelHigh()) { | ||||
|         PushCommand(FlushRegionCommand(addr, size)); | ||||
|         return; | ||||
|     } | ||||
|     if (!Settings::IsGPULevelExtreme()) { | ||||
|         return; | ||||
|     } | ||||
|     if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { | ||||
|         auto& gpu = system.GPU(); | ||||
|         u64 fence = gpu.RequestFlush(addr, size); | ||||
|         PushCommand(GPUTickCommand()); | ||||
|         while (fence > gpu.CurrentFlushRequestFence()) { | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     system.Renderer().Rasterizer().InvalidateRegion(addr, size); | ||||
|     system.Renderer().Rasterizer().OnCPUWrite(addr, size); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
 | ||||
|     InvalidateRegion(addr, size); | ||||
|     system.Renderer().Rasterizer().OnCPUWrite(addr, size); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::WaitIdle() const { | ||||
| @ -95,6 +114,10 @@ void ThreadManager::WaitIdle() const { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::OnCommandListEnd() { | ||||
|     PushCommand(OnCommandListEndCommand()); | ||||
| } | ||||
| 
 | ||||
| u64 ThreadManager::PushCommand(CommandData&& command_data) { | ||||
|     const u64 fence{++state.last_fence}; | ||||
|     state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | ||||
|  | ||||
| @ -70,9 +70,16 @@ struct FlushAndInvalidateRegionCommand final { | ||||
|     u64 size; | ||||
| }; | ||||
| 
 | ||||
| /// Command called within the gpu, to schedule actions after a command list end
 | ||||
| struct OnCommandListEndCommand final {}; | ||||
| 
 | ||||
| /// Command to make the gpu look into pending requests
 | ||||
| struct GPUTickCommand final {}; | ||||
| 
 | ||||
| using CommandData = | ||||
|     std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | ||||
|                  InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | ||||
|                  InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand, | ||||
|                  GPUTickCommand>; | ||||
| 
 | ||||
| struct CommandDataContainer { | ||||
|     CommandDataContainer() = default; | ||||
| @ -122,6 +129,8 @@ public: | ||||
|     // Wait until the gpu thread is idle.
 | ||||
|     void WaitIdle() const; | ||||
| 
 | ||||
|     void OnCommandListEnd(); | ||||
| 
 | ||||
| private: | ||||
|     /// Pushes a command to be executed by the GPU thread
 | ||||
|     u64 PushCommand(CommandData&& command_data); | ||||
|  | ||||
| @ -12,10 +12,12 @@ | ||||
| #include <mutex> | ||||
| #include <optional> | ||||
| #include <unordered_map> | ||||
| #include <unordered_set> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "core/core.h" | ||||
| #include "core/settings.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| @ -130,6 +132,9 @@ public: | ||||
|         } | ||||
| 
 | ||||
|         query->BindCounter(Stream(type).Current(), timestamp); | ||||
|         if (Settings::values.use_asynchronous_gpu_emulation) { | ||||
|             AsyncFlushQuery(cpu_addr); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
 | ||||
| @ -170,6 +175,37 @@ public: | ||||
|         return streams[static_cast<std::size_t>(type)]; | ||||
|     } | ||||
| 
 | ||||
|     void CommitAsyncFlushes() { | ||||
|         committed_flushes.push_back(uncommitted_flushes); | ||||
|         uncommitted_flushes.reset(); | ||||
|     } | ||||
| 
 | ||||
|     bool HasUncommittedFlushes() const { | ||||
|         return uncommitted_flushes != nullptr; | ||||
|     } | ||||
| 
 | ||||
|     bool ShouldWaitAsyncFlushes() const { | ||||
|         if (committed_flushes.empty()) { | ||||
|             return false; | ||||
|         } | ||||
|         return committed_flushes.front() != nullptr; | ||||
|     } | ||||
| 
 | ||||
|     void PopAsyncFlushes() { | ||||
|         if (committed_flushes.empty()) { | ||||
|             return; | ||||
|         } | ||||
|         auto& flush_list = committed_flushes.front(); | ||||
|         if (!flush_list) { | ||||
|             committed_flushes.pop_front(); | ||||
|             return; | ||||
|         } | ||||
|         for (VAddr query_address : *flush_list) { | ||||
|             FlushAndRemoveRegion(query_address, 4); | ||||
|         } | ||||
|         committed_flushes.pop_front(); | ||||
|     } | ||||
| 
 | ||||
| protected: | ||||
|     std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; | ||||
| 
 | ||||
| @ -224,6 +260,13 @@ private: | ||||
|         return found != std::end(contents) ? &*found : nullptr; | ||||
|     } | ||||
| 
 | ||||
|     void AsyncFlushQuery(VAddr addr) { | ||||
|         if (!uncommitted_flushes) { | ||||
|             uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>(); | ||||
|         } | ||||
|         uncommitted_flushes->insert(addr); | ||||
|     } | ||||
| 
 | ||||
|     static constexpr std::uintptr_t PAGE_SIZE = 4096; | ||||
|     static constexpr unsigned PAGE_SHIFT = 12; | ||||
| 
 | ||||
| @ -235,6 +278,9 @@ private: | ||||
|     std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | ||||
| 
 | ||||
|     std::array<CounterStream, VideoCore::NumQueryTypes> streams; | ||||
| 
 | ||||
|     std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{}; | ||||
|     std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes; | ||||
| }; | ||||
| 
 | ||||
| template <class QueryCache, class HostCounter> | ||||
|  | ||||
| @ -49,15 +49,33 @@ public: | ||||
|     /// Records a GPU query and caches it
 | ||||
|     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; | ||||
| 
 | ||||
|     /// Signal a GPU based semaphore as a fence
 | ||||
|     virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; | ||||
| 
 | ||||
|     /// Signal a GPU based syncpoint as a fence
 | ||||
|     virtual void SignalSyncPoint(u32 value) = 0; | ||||
| 
 | ||||
|     /// Release all pending fences.
 | ||||
|     virtual void ReleaseFences() = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that all caches should be flushed to Switch memory
 | ||||
|     virtual void FlushAll() = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     virtual void FlushRegion(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Check if the the specified memory area requires flushing to CPU Memory.
 | ||||
|     virtual bool MustFlushRegion(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||
|     virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region are desync with guest
 | ||||
|     virtual void OnCPUWrite(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Sync memory between guest and host.
 | ||||
|     virtual void SyncGuestHost() = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     /// and invalidated
 | ||||
|     virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | ||||
|  | ||||
| @ -52,7 +52,7 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | ||||
| } | ||||
| 
 | ||||
| void OGLBufferCache::WriteBarrier() { | ||||
|     glMemoryBarrier(GL_ALL_BARRIER_BITS); | ||||
|     glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); | ||||
| } | ||||
| 
 | ||||
| GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { | ||||
| @ -72,6 +72,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s | ||||
| void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||||
|                                        u8* data) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | ||||
|     glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | ||||
|     glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), | ||||
|                             static_cast<GLsizeiptr>(size), data); | ||||
| } | ||||
|  | ||||
							
								
								
									
										72
									
								
								src/video_core/renderer_opengl/gl_fence_manager.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								src/video_core/renderer_opengl/gl_fence_manager.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,72 @@ | ||||
| // Copyright 2020 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| 
 | ||||
| #include "video_core/renderer_opengl/gl_fence_manager.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) | ||||
|     : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {} | ||||
| 
 | ||||
| GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed) | ||||
|     : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {} | ||||
| 
 | ||||
| GLInnerFence::~GLInnerFence() = default; | ||||
| 
 | ||||
| void GLInnerFence::Queue() { | ||||
|     if (is_stubbed) { | ||||
|         return; | ||||
|     } | ||||
|     ASSERT(sync_object.handle == 0); | ||||
|     sync_object.Create(); | ||||
| } | ||||
| 
 | ||||
| bool GLInnerFence::IsSignaled() const { | ||||
|     if (is_stubbed) { | ||||
|         return true; | ||||
|     } | ||||
|     ASSERT(sync_object.handle != 0); | ||||
|     GLsizei length; | ||||
|     GLint sync_status; | ||||
|     glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status); | ||||
|     return sync_status == GL_SIGNALED; | ||||
| } | ||||
| 
 | ||||
| void GLInnerFence::Wait() { | ||||
|     if (is_stubbed) { | ||||
|         return; | ||||
|     } | ||||
|     ASSERT(sync_object.handle != 0); | ||||
|     glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED); | ||||
| } | ||||
| 
 | ||||
| FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system, | ||||
|                                        VideoCore::RasterizerInterface& rasterizer, | ||||
|                                        TextureCacheOpenGL& texture_cache, | ||||
|                                        OGLBufferCache& buffer_cache, QueryCache& query_cache) | ||||
|     : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {} | ||||
| 
 | ||||
| Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { | ||||
|     return std::make_shared<GLInnerFence>(value, is_stubbed); | ||||
| } | ||||
| 
 | ||||
| Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { | ||||
|     return std::make_shared<GLInnerFence>(addr, value, is_stubbed); | ||||
| } | ||||
| 
 | ||||
| void FenceManagerOpenGL::QueueFence(Fence& fence) { | ||||
|     fence->Queue(); | ||||
| } | ||||
| 
 | ||||
| bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const { | ||||
|     return fence->IsSignaled(); | ||||
| } | ||||
| 
 | ||||
| void FenceManagerOpenGL::WaitFence(Fence& fence) { | ||||
|     fence->Wait(); | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
							
								
								
									
										53
									
								
								src/video_core/renderer_opengl/gl_fence_manager.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								src/video_core/renderer_opengl/gl_fence_manager.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,53 @@ | ||||
| // Copyright 2020 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <memory> | ||||
| #include <glad/glad.h> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/fence_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_query_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_texture_cache.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| class GLInnerFence : public VideoCommon::FenceBase { | ||||
| public: | ||||
|     GLInnerFence(u32 payload, bool is_stubbed); | ||||
|     GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed); | ||||
|     ~GLInnerFence(); | ||||
| 
 | ||||
|     void Queue(); | ||||
| 
 | ||||
|     bool IsSignaled() const; | ||||
| 
 | ||||
|     void Wait(); | ||||
| 
 | ||||
| private: | ||||
|     OGLSync sync_object; | ||||
| }; | ||||
| 
 | ||||
| using Fence = std::shared_ptr<GLInnerFence>; | ||||
| using GenericFenceManager = | ||||
|     VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>; | ||||
| 
 | ||||
| class FenceManagerOpenGL final : public GenericFenceManager { | ||||
| public: | ||||
|     FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||
|                        TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, | ||||
|                        QueryCache& query_cache); | ||||
| 
 | ||||
| protected: | ||||
|     Fence CreateFence(u32 value, bool is_stubbed) override; | ||||
|     Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override; | ||||
|     void QueueFence(Fence& fence) override; | ||||
|     bool IsFenceSignaled(Fence& fence) const override; | ||||
|     void WaitFence(Fence& fence) override; | ||||
| }; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
| @ -99,9 +99,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | ||||
|                                    ScreenInfo& info, GLShader::ProgramManager& program_manager, | ||||
|                                    StateTracker& state_tracker) | ||||
|     : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, | ||||
|       shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, | ||||
|       screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, | ||||
|       buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { | ||||
|       shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, | ||||
|       buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, | ||||
|       fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, | ||||
|       screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { | ||||
|     CheckExtensions(); | ||||
| } | ||||
| 
 | ||||
| @ -599,6 +600,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | ||||
|     EndTransformFeedback(); | ||||
| 
 | ||||
|     ++num_queued_commands; | ||||
| 
 | ||||
|     system.GPU().TickWork(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | ||||
| @ -649,6 +652,13 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | ||||
|     query_cache.FlushRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { | ||||
|     if (!Settings::IsGPULevelHigh()) { | ||||
|         return buffer_cache.MustFlushRegion(addr, size); | ||||
|     } | ||||
|     return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     if (addr == 0 || size == 0) { | ||||
| @ -660,8 +670,52 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     query_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|     texture_cache.OnCPUWrite(addr, size); | ||||
|     shader_cache.InvalidateRegion(addr, size); | ||||
|     buffer_cache.OnCPUWrite(addr, size); | ||||
|     query_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SyncGuestHost() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     texture_cache.SyncGuestHost(); | ||||
|     buffer_cache.SyncGuestHost(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | ||||
|     auto& gpu{system.GPU()}; | ||||
|     if (!gpu.IsAsync()) { | ||||
|         auto& memory_manager{gpu.MemoryManager()}; | ||||
|         memory_manager.Write<u32>(addr, value); | ||||
|         return; | ||||
|     } | ||||
|     fence_manager.SignalSemaphore(addr, value); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SignalSyncPoint(u32 value) { | ||||
|     auto& gpu{system.GPU()}; | ||||
|     if (!gpu.IsAsync()) { | ||||
|         gpu.IncrementSyncPoint(value); | ||||
|         return; | ||||
|     } | ||||
|     fence_manager.SignalSyncPoint(value); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::ReleaseFences() { | ||||
|     auto& gpu{system.GPU()}; | ||||
|     if (!gpu.IsAsync()) { | ||||
|         return; | ||||
|     } | ||||
|     fence_manager.WaitPendingFences(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     if (Settings::values.use_accurate_gpu_emulation) { | ||||
|     if (Settings::IsGPULevelExtreme()) { | ||||
|         FlushRegion(addr, size); | ||||
|     } | ||||
|     InvalidateRegion(addr, size); | ||||
|  | ||||
| @ -23,6 +23,7 @@ | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
| #include "video_core/renderer_opengl/gl_fence_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_query_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| @ -66,7 +67,13 @@ public: | ||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||
|     void FlushAll() override; | ||||
|     void FlushRegion(VAddr addr, u64 size) override; | ||||
|     bool MustFlushRegion(VAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void SyncGuestHost() override; | ||||
|     void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||||
|     void SignalSyncPoint(u32 value) override; | ||||
|     void ReleaseFences() override; | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void FlushCommands() override; | ||||
|     void TickFrame() override; | ||||
| @ -222,6 +229,8 @@ private: | ||||
|     SamplerCacheOpenGL sampler_cache; | ||||
|     FramebufferCacheOpenGL framebuffer_cache; | ||||
|     QueryCache query_cache; | ||||
|     OGLBufferCache buffer_cache; | ||||
|     FenceManagerOpenGL fence_manager; | ||||
| 
 | ||||
|     Core::System& system; | ||||
|     ScreenInfo& screen_info; | ||||
| @ -229,7 +238,6 @@ private: | ||||
|     StateTracker& state_tracker; | ||||
| 
 | ||||
|     static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||||
|     OGLBufferCache buffer_cache; | ||||
| 
 | ||||
|     GLint vertex_binding = 0; | ||||
| 
 | ||||
|  | ||||
| @ -448,7 +448,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | ||||
| 
 | ||||
|     // Look up shader in the cache based on address
 | ||||
|     const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; | ||||
|     Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; | ||||
|     Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader}; | ||||
|     if (shader) { | ||||
|         return last_shaders[static_cast<std::size_t>(program)] = shader; | ||||
|     } | ||||
| @ -477,7 +477,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | ||||
|         const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||||
|         shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||||
|     } | ||||
|     Register(shader); | ||||
| 
 | ||||
|     if (cpu_addr) { | ||||
|         Register(shader); | ||||
|     } else { | ||||
|         null_shader = shader; | ||||
|     } | ||||
| 
 | ||||
|     return last_shaders[static_cast<std::size_t>(program)] = shader; | ||||
| } | ||||
| @ -486,7 +491,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||||
|     auto& memory_manager{system.GPU().MemoryManager()}; | ||||
|     const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; | ||||
| 
 | ||||
|     auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||||
|     auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel; | ||||
|     if (kernel) { | ||||
|         return kernel; | ||||
|     } | ||||
| @ -507,7 +512,11 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||||
|         kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||||
|     } | ||||
| 
 | ||||
|     Register(kernel); | ||||
|     if (cpu_addr) { | ||||
|         Register(kernel); | ||||
|     } else { | ||||
|         null_kernel = kernel; | ||||
|     } | ||||
|     return kernel; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -125,6 +125,9 @@ private: | ||||
|     ShaderDiskCacheOpenGL disk_cache; | ||||
|     std::unordered_map<u64, PrecompiledShader> runtime_cache; | ||||
| 
 | ||||
|     Shader null_shader{}; | ||||
|     Shader null_kernel{}; | ||||
| 
 | ||||
|     std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | ||||
| }; | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										101
									
								
								src/video_core/renderer_vulkan/vk_fence_manager.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								src/video_core/renderer_vulkan/vk_fence_manager.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,101 @@ | ||||
| // Copyright 2020 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <memory> | ||||
| #include <thread> | ||||
| 
 | ||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_device.h" | ||||
| #include "video_core/renderer_vulkan/vk_fence_manager.h" | ||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||
| #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||||
| #include "video_core/renderer_vulkan/wrapper.h" | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed) | ||||
|     : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {} | ||||
| 
 | ||||
| InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, | ||||
|                        u32 payload, bool is_stubbed) | ||||
|     : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {} | ||||
| 
 | ||||
| InnerFence::~InnerFence() = default; | ||||
| 
 | ||||
| void InnerFence::Queue() { | ||||
|     if (is_stubbed) { | ||||
|         return; | ||||
|     } | ||||
|     ASSERT(!event); | ||||
| 
 | ||||
|     event = device.GetLogical().CreateEvent(); | ||||
|     ticks = scheduler.Ticks(); | ||||
| 
 | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| bool InnerFence::IsSignaled() const { | ||||
|     if (is_stubbed) { | ||||
|         return true; | ||||
|     } | ||||
|     ASSERT(event); | ||||
|     return IsEventSignalled(); | ||||
| } | ||||
| 
 | ||||
| void InnerFence::Wait() { | ||||
|     if (is_stubbed) { | ||||
|         return; | ||||
|     } | ||||
|     ASSERT(event); | ||||
| 
 | ||||
|     if (ticks >= scheduler.Ticks()) { | ||||
|         scheduler.Flush(); | ||||
|     } | ||||
|     while (!IsEventSignalled()) { | ||||
|         std::this_thread::yield(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| bool InnerFence::IsEventSignalled() const { | ||||
|     switch (const VkResult result = event.GetStatus()) { | ||||
|     case VK_EVENT_SET: | ||||
|         return true; | ||||
|     case VK_EVENT_RESET: | ||||
|         return false; | ||||
|     default: | ||||
|         throw vk::Exception(result); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||
|                                const VKDevice& device, VKScheduler& scheduler, | ||||
|                                VKTextureCache& texture_cache, VKBufferCache& buffer_cache, | ||||
|                                VKQueryCache& query_cache) | ||||
|     : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache), | ||||
|       device{device}, scheduler{scheduler} {} | ||||
| 
 | ||||
| Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) { | ||||
|     return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed); | ||||
| } | ||||
| 
 | ||||
| Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { | ||||
|     return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed); | ||||
| } | ||||
| 
 | ||||
| void VKFenceManager::QueueFence(Fence& fence) { | ||||
|     fence->Queue(); | ||||
| } | ||||
| 
 | ||||
| bool VKFenceManager::IsFenceSignaled(Fence& fence) const { | ||||
|     return fence->IsSignaled(); | ||||
| } | ||||
| 
 | ||||
| void VKFenceManager::WaitFence(Fence& fence) { | ||||
|     fence->Wait(); | ||||
| } | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
							
								
								
									
										74
									
								
								src/video_core/renderer_vulkan/vk_fence_manager.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								src/video_core/renderer_vulkan/vk_fence_manager.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,74 @@ | ||||
| // Copyright 2020 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <memory> | ||||
| 
 | ||||
| #include "video_core/fence_manager.h" | ||||
| #include "video_core/renderer_vulkan/wrapper.h" | ||||
| 
 | ||||
| namespace Core { | ||||
| class System; | ||||
| } | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| class RasterizerInterface; | ||||
| } | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| class VKBufferCache; | ||||
| class VKDevice; | ||||
| class VKQueryCache; | ||||
| class VKScheduler; | ||||
| class VKTextureCache; | ||||
| 
 | ||||
| class InnerFence : public VideoCommon::FenceBase { | ||||
| public: | ||||
|     explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, | ||||
|                         bool is_stubbed); | ||||
|     explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, | ||||
|                         u32 payload, bool is_stubbed); | ||||
|     ~InnerFence(); | ||||
| 
 | ||||
|     void Queue(); | ||||
| 
 | ||||
|     bool IsSignaled() const; | ||||
| 
 | ||||
|     void Wait(); | ||||
| 
 | ||||
| private: | ||||
|     bool IsEventSignalled() const; | ||||
| 
 | ||||
|     const VKDevice& device; | ||||
|     VKScheduler& scheduler; | ||||
|     vk::Event event; | ||||
|     u64 ticks = 0; | ||||
| }; | ||||
| using Fence = std::shared_ptr<InnerFence>; | ||||
| 
 | ||||
| using GenericFenceManager = | ||||
|     VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>; | ||||
| 
 | ||||
| class VKFenceManager final : public GenericFenceManager { | ||||
| public: | ||||
|     explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||
|                             const VKDevice& device, VKScheduler& scheduler, | ||||
|                             VKTextureCache& texture_cache, VKBufferCache& buffer_cache, | ||||
|                             VKQueryCache& query_cache); | ||||
| 
 | ||||
| protected: | ||||
|     Fence CreateFence(u32 value, bool is_stubbed) override; | ||||
|     Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override; | ||||
|     void QueueFence(Fence& fence) override; | ||||
|     bool IsFenceSignaled(Fence& fence) const override; | ||||
|     void WaitFence(Fence& fence) override; | ||||
| 
 | ||||
| private: | ||||
|     const VKDevice& device; | ||||
|     VKScheduler& scheduler; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
| @ -207,7 +207,7 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | ||||
|         const GPUVAddr program_addr{GetShaderAddress(system, program)}; | ||||
|         const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||||
|         ASSERT(cpu_addr); | ||||
|         auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||||
|         auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; | ||||
|         if (!shader) { | ||||
|             const auto host_ptr{memory_manager.GetPointer(program_addr)}; | ||||
| 
 | ||||
| @ -218,7 +218,11 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | ||||
| 
 | ||||
|             shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, | ||||
|                                                     std::move(code), stage_offset); | ||||
|             Register(shader); | ||||
|             if (cpu_addr) { | ||||
|                 Register(shader); | ||||
|             } else { | ||||
|                 null_shader = shader; | ||||
|             } | ||||
|         } | ||||
|         shaders[index] = std::move(shader); | ||||
|     } | ||||
| @ -261,7 +265,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | ||||
|     const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||||
|     ASSERT(cpu_addr); | ||||
| 
 | ||||
|     auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||||
|     auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel; | ||||
|     if (!shader) { | ||||
|         // No shader found - create a new one
 | ||||
|         const auto host_ptr = memory_manager.GetPointer(program_addr); | ||||
| @ -271,7 +275,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | ||||
|         shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, | ||||
|                                                 program_addr, *cpu_addr, std::move(code), | ||||
|                                                 kernel_main_offset); | ||||
|         Register(shader); | ||||
|         if (cpu_addr) { | ||||
|             Register(shader); | ||||
|         } else { | ||||
|             null_kernel = shader; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     Specialization specialization; | ||||
|  | ||||
| @ -182,6 +182,9 @@ private: | ||||
|     VKUpdateDescriptorQueue& update_descriptor_queue; | ||||
|     VKRenderPassCache& renderpass_cache; | ||||
| 
 | ||||
|     Shader null_shader{}; | ||||
|     Shader null_kernel{}; | ||||
| 
 | ||||
|     std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | ||||
| 
 | ||||
|     GraphicsPipelineCacheKey last_graphics_key; | ||||
|  | ||||
| @ -17,6 +17,7 @@ | ||||
| #include "common/microprofile.h" | ||||
| #include "core/core.h" | ||||
| #include "core/memory.h" | ||||
| #include "core/settings.h" | ||||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||||
| @ -299,7 +300,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind | ||||
|       pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, | ||||
|                      renderpass_cache), | ||||
|       buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | ||||
|       sampler_cache(device), query_cache(system, *this, device, scheduler) { | ||||
|       sampler_cache(device), | ||||
|       fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), | ||||
|       query_cache(system, *this, device, scheduler) { | ||||
|     scheduler.SetQueryCache(query_cache); | ||||
| } | ||||
| 
 | ||||
| @ -360,6 +363,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | ||||
|     }); | ||||
| 
 | ||||
|     EndTransformFeedback(); | ||||
| 
 | ||||
|     system.GPU().TickWork(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::Clear() { | ||||
| @ -504,6 +509,13 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | ||||
|     query_cache.FlushRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { | ||||
|     if (!Settings::IsGPULevelHigh()) { | ||||
|         return buffer_cache.MustFlushRegion(addr, size); | ||||
|     } | ||||
|     return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
| @ -514,6 +526,47 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     query_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|     texture_cache.OnCPUWrite(addr, size); | ||||
|     pipeline_cache.InvalidateRegion(addr, size); | ||||
|     buffer_cache.OnCPUWrite(addr, size); | ||||
|     query_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SyncGuestHost() { | ||||
|     texture_cache.SyncGuestHost(); | ||||
|     buffer_cache.SyncGuestHost(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | ||||
|     auto& gpu{system.GPU()}; | ||||
|     if (!gpu.IsAsync()) { | ||||
|         gpu.MemoryManager().Write<u32>(addr, value); | ||||
|         return; | ||||
|     } | ||||
|     fence_manager.SignalSemaphore(addr, value); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SignalSyncPoint(u32 value) { | ||||
|     auto& gpu{system.GPU()}; | ||||
|     if (!gpu.IsAsync()) { | ||||
|         gpu.IncrementSyncPoint(value); | ||||
|         return; | ||||
|     } | ||||
|     fence_manager.SignalSyncPoint(value); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::ReleaseFences() { | ||||
|     auto& gpu{system.GPU()}; | ||||
|     if (!gpu.IsAsync()) { | ||||
|         return; | ||||
|     } | ||||
|     fence_manager.WaitPendingFences(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     FlushRegion(addr, size); | ||||
|     InvalidateRegion(addr, size); | ||||
|  | ||||
| @ -21,6 +21,7 @@ | ||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||
| #include "video_core/renderer_vulkan/vk_fence_manager.h" | ||||
| #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_query_cache.h" | ||||
| @ -118,7 +119,13 @@ public: | ||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||
|     void FlushAll() override; | ||||
|     void FlushRegion(VAddr addr, u64 size) override; | ||||
|     bool MustFlushRegion(VAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void SyncGuestHost() override; | ||||
|     void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||||
|     void SignalSyncPoint(u32 value) override; | ||||
|     void ReleaseFences() override; | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void FlushCommands() override; | ||||
|     void TickFrame() override; | ||||
| @ -261,6 +268,7 @@ private: | ||||
|     VKPipelineCache pipeline_cache; | ||||
|     VKBufferCache buffer_cache; | ||||
|     VKSamplerCache sampler_cache; | ||||
|     VKFenceManager fence_manager; | ||||
|     VKQueryCache query_cache; | ||||
| 
 | ||||
|     std::array<View, Maxwell::NumRenderTargets> color_attachments; | ||||
|  | ||||
| @ -63,6 +63,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | ||||
|     X(vkCmdSetBlendConstants); | ||||
|     X(vkCmdSetDepthBias); | ||||
|     X(vkCmdSetDepthBounds); | ||||
|     X(vkCmdSetEvent); | ||||
|     X(vkCmdSetScissor); | ||||
|     X(vkCmdSetStencilCompareMask); | ||||
|     X(vkCmdSetStencilReference); | ||||
| @ -75,6 +76,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | ||||
|     X(vkCreateDescriptorPool); | ||||
|     X(vkCreateDescriptorSetLayout); | ||||
|     X(vkCreateDescriptorUpdateTemplateKHR); | ||||
|     X(vkCreateEvent); | ||||
|     X(vkCreateFence); | ||||
|     X(vkCreateFramebuffer); | ||||
|     X(vkCreateGraphicsPipelines); | ||||
| @ -93,6 +95,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | ||||
|     X(vkDestroyDescriptorPool); | ||||
|     X(vkDestroyDescriptorSetLayout); | ||||
|     X(vkDestroyDescriptorUpdateTemplateKHR); | ||||
|     X(vkDestroyEvent); | ||||
|     X(vkDestroyFence); | ||||
|     X(vkDestroyFramebuffer); | ||||
|     X(vkDestroyImage); | ||||
| @ -112,6 +115,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | ||||
|     X(vkFreeMemory); | ||||
|     X(vkGetBufferMemoryRequirements); | ||||
|     X(vkGetDeviceQueue); | ||||
|     X(vkGetEventStatus); | ||||
|     X(vkGetFenceStatus); | ||||
|     X(vkGetImageMemoryRequirements); | ||||
|     X(vkGetQueryPoolResults); | ||||
| @ -269,6 +273,10 @@ void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld) | ||||
|     dld.vkFreeMemory(device, handle, nullptr); | ||||
| } | ||||
| 
 | ||||
| void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept { | ||||
|     dld.vkDestroyEvent(device, handle, nullptr); | ||||
| } | ||||
| 
 | ||||
| void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept { | ||||
|     dld.vkDestroyFence(device, handle, nullptr); | ||||
| } | ||||
| @ -599,6 +607,16 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons | ||||
|     return ShaderModule(object, handle, *dld); | ||||
| } | ||||
| 
 | ||||
| Event Device::CreateEvent() const { | ||||
|     VkEventCreateInfo ci; | ||||
|     ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; | ||||
|     ci.pNext = nullptr; | ||||
|     ci.flags = 0; | ||||
|     VkEvent object; | ||||
|     Check(dld->vkCreateEvent(handle, &ci, nullptr, &object)); | ||||
|     return Event(object, handle, *dld); | ||||
| } | ||||
| 
 | ||||
| SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { | ||||
|     VkSwapchainKHR object; | ||||
|     Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); | ||||
|  | ||||
| @ -199,6 +199,7 @@ struct DeviceDispatch : public InstanceDispatch { | ||||
|     PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; | ||||
|     PFN_vkCmdSetDepthBias vkCmdSetDepthBias; | ||||
|     PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; | ||||
|     PFN_vkCmdSetEvent vkCmdSetEvent; | ||||
|     PFN_vkCmdSetScissor vkCmdSetScissor; | ||||
|     PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask; | ||||
|     PFN_vkCmdSetStencilReference vkCmdSetStencilReference; | ||||
| @ -211,6 +212,7 @@ struct DeviceDispatch : public InstanceDispatch { | ||||
|     PFN_vkCreateDescriptorPool vkCreateDescriptorPool; | ||||
|     PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; | ||||
|     PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; | ||||
|     PFN_vkCreateEvent vkCreateEvent; | ||||
|     PFN_vkCreateFence vkCreateFence; | ||||
|     PFN_vkCreateFramebuffer vkCreateFramebuffer; | ||||
|     PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; | ||||
| @ -229,6 +231,7 @@ struct DeviceDispatch : public InstanceDispatch { | ||||
|     PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; | ||||
|     PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; | ||||
|     PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; | ||||
|     PFN_vkDestroyEvent vkDestroyEvent; | ||||
|     PFN_vkDestroyFence vkDestroyFence; | ||||
|     PFN_vkDestroyFramebuffer vkDestroyFramebuffer; | ||||
|     PFN_vkDestroyImage vkDestroyImage; | ||||
| @ -248,6 +251,7 @@ struct DeviceDispatch : public InstanceDispatch { | ||||
|     PFN_vkFreeMemory vkFreeMemory; | ||||
|     PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; | ||||
|     PFN_vkGetDeviceQueue vkGetDeviceQueue; | ||||
|     PFN_vkGetEventStatus vkGetEventStatus; | ||||
|     PFN_vkGetFenceStatus vkGetFenceStatus; | ||||
|     PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; | ||||
|     PFN_vkGetQueryPoolResults vkGetQueryPoolResults; | ||||
| @ -279,6 +283,7 @@ void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept; | ||||
| void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept; | ||||
| void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept; | ||||
| void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept; | ||||
| void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept; | ||||
| void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept; | ||||
| void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; | ||||
| void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; | ||||
| @ -648,6 +653,15 @@ public: | ||||
|     std::vector<VkImage> GetImages() const; | ||||
| }; | ||||
| 
 | ||||
| class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> { | ||||
|     using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle; | ||||
| 
 | ||||
| public: | ||||
|     VkResult GetStatus() const noexcept { | ||||
|         return dld->vkGetEventStatus(owner, handle); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { | ||||
|     using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; | ||||
| 
 | ||||
| @ -695,6 +709,8 @@ public: | ||||
| 
 | ||||
|     ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; | ||||
| 
 | ||||
|     Event CreateEvent() const; | ||||
| 
 | ||||
|     SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; | ||||
| 
 | ||||
|     DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; | ||||
| @ -938,6 +954,10 @@ public: | ||||
|         dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); | ||||
|     } | ||||
| 
 | ||||
|     void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept { | ||||
|         dld->vkCmdSetEvent(handle, event, stage_flags); | ||||
|     } | ||||
| 
 | ||||
|     void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, | ||||
|                                          const VkDeviceSize* offsets, | ||||
|                                          const VkDeviceSize* sizes) const noexcept { | ||||
|  | ||||
| @ -192,6 +192,22 @@ public: | ||||
|         index = index_; | ||||
|     } | ||||
| 
 | ||||
|     void SetMemoryMarked(bool is_memory_marked_) { | ||||
|         is_memory_marked = is_memory_marked_; | ||||
|     } | ||||
| 
 | ||||
|     bool IsMemoryMarked() const { | ||||
|         return is_memory_marked; | ||||
|     } | ||||
| 
 | ||||
|     void SetSyncPending(bool is_sync_pending_) { | ||||
|         is_sync_pending = is_sync_pending_; | ||||
|     } | ||||
| 
 | ||||
|     bool IsSyncPending() const { | ||||
|         return is_sync_pending; | ||||
|     } | ||||
| 
 | ||||
|     void MarkAsPicked(bool is_picked_) { | ||||
|         is_picked = is_picked_; | ||||
|     } | ||||
| @ -303,6 +319,8 @@ private: | ||||
|     bool is_target{}; | ||||
|     bool is_registered{}; | ||||
|     bool is_picked{}; | ||||
|     bool is_memory_marked{}; | ||||
|     bool is_sync_pending{}; | ||||
|     u32 index{NO_RT}; | ||||
|     u64 modification_tick{}; | ||||
| }; | ||||
|  | ||||
| @ -6,6 +6,7 @@ | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <array> | ||||
| #include <list> | ||||
| #include <memory> | ||||
| #include <mutex> | ||||
| #include <set> | ||||
| @ -62,6 +63,30 @@ public: | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void OnCPUWrite(VAddr addr, std::size_t size) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         for (const auto& surface : GetSurfacesInRegion(addr, size)) { | ||||
|             if (surface->IsMemoryMarked()) { | ||||
|                 UnmarkMemory(surface); | ||||
|                 surface->SetSyncPending(true); | ||||
|                 marked_for_unregister.emplace_back(surface); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void SyncGuestHost() { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         for (const auto& surface : marked_for_unregister) { | ||||
|             if (surface->IsRegistered()) { | ||||
|                 surface->SetSyncPending(false); | ||||
|                 Unregister(surface); | ||||
|             } | ||||
|         } | ||||
|         marked_for_unregister.clear(); | ||||
|     } | ||||
| 
 | ||||
|     /**
 | ||||
|      * Guarantees that rendertargets don't unregister themselves if the | ||||
|      * collide. Protection is currently only done on 3D slices. | ||||
| @ -85,10 +110,20 @@ public: | ||||
|             return a->GetModificationTick() < b->GetModificationTick(); | ||||
|         }); | ||||
|         for (const auto& surface : surfaces) { | ||||
|             mutex.unlock(); | ||||
|             FlushSurface(surface); | ||||
|             mutex.lock(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     bool MustFlushRegion(VAddr addr, std::size_t size) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         const auto surfaces = GetSurfacesInRegion(addr, size); | ||||
|         return std::any_of(surfaces.cbegin(), surfaces.cend(), | ||||
|                            [](const TSurface& surface) { return surface->IsModified(); }); | ||||
|     } | ||||
| 
 | ||||
|     TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, | ||||
|                             const VideoCommon::Shader::Sampler& entry) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| @ -206,8 +241,14 @@ public: | ||||
| 
 | ||||
|         auto surface_view = GetSurface(gpu_addr, *cpu_addr, | ||||
|                                        SurfaceParams::CreateForFramebuffer(system, index), true); | ||||
|         if (render_targets[index].target) | ||||
|             render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | ||||
|         if (render_targets[index].target) { | ||||
|             auto& surface = render_targets[index].target; | ||||
|             surface->MarkAsRenderTarget(false, NO_RT); | ||||
|             const auto& cr_params = surface->GetSurfaceParams(); | ||||
|             if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation) { | ||||
|                 AsyncFlushSurface(surface); | ||||
|             } | ||||
|         } | ||||
|         render_targets[index].target = surface_view.first; | ||||
|         render_targets[index].view = surface_view.second; | ||||
|         if (render_targets[index].target) | ||||
| @ -284,6 +325,34 @@ public: | ||||
|         return ++ticks; | ||||
|     } | ||||
| 
 | ||||
|     void CommitAsyncFlushes() { | ||||
|         committed_flushes.push_back(uncommitted_flushes); | ||||
|         uncommitted_flushes.reset(); | ||||
|     } | ||||
| 
 | ||||
|     bool HasUncommittedFlushes() const { | ||||
|         return uncommitted_flushes != nullptr; | ||||
|     } | ||||
| 
 | ||||
|     bool ShouldWaitAsyncFlushes() const { | ||||
|         return !committed_flushes.empty() && committed_flushes.front() != nullptr; | ||||
|     } | ||||
| 
 | ||||
|     void PopAsyncFlushes() { | ||||
|         if (committed_flushes.empty()) { | ||||
|             return; | ||||
|         } | ||||
|         auto& flush_list = committed_flushes.front(); | ||||
|         if (!flush_list) { | ||||
|             committed_flushes.pop_front(); | ||||
|             return; | ||||
|         } | ||||
|         for (TSurface& surface : *flush_list) { | ||||
|             FlushSurface(surface); | ||||
|         } | ||||
|         committed_flushes.pop_front(); | ||||
|     } | ||||
| 
 | ||||
| protected: | ||||
|     explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||
|                           bool is_astc_supported) | ||||
| @ -345,9 +414,20 @@ protected: | ||||
|         surface->SetCpuAddr(*cpu_addr); | ||||
|         RegisterInnerCache(surface); | ||||
|         surface->MarkAsRegistered(true); | ||||
|         surface->SetMemoryMarked(true); | ||||
|         rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | ||||
|     } | ||||
| 
 | ||||
|     void UnmarkMemory(TSurface surface) { | ||||
|         if (!surface->IsMemoryMarked()) { | ||||
|             return; | ||||
|         } | ||||
|         const std::size_t size = surface->GetSizeInBytes(); | ||||
|         const VAddr cpu_addr = surface->GetCpuAddr(); | ||||
|         rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||||
|         surface->SetMemoryMarked(false); | ||||
|     } | ||||
| 
 | ||||
|     void Unregister(TSurface surface) { | ||||
|         if (guard_render_targets && surface->IsProtected()) { | ||||
|             return; | ||||
| @ -355,9 +435,11 @@ protected: | ||||
|         if (!guard_render_targets && surface->IsRenderTarget()) { | ||||
|             ManageRenderTargetUnregister(surface); | ||||
|         } | ||||
|         const std::size_t size = surface->GetSizeInBytes(); | ||||
|         const VAddr cpu_addr = surface->GetCpuAddr(); | ||||
|         rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||||
|         UnmarkMemory(surface); | ||||
|         if (surface->IsSyncPending()) { | ||||
|             marked_for_unregister.remove(surface); | ||||
|             surface->SetSyncPending(false); | ||||
|         } | ||||
|         UnregisterInnerCache(surface); | ||||
|         surface->MarkAsRegistered(false); | ||||
|         ReserveSurface(surface->GetSurfaceParams(), surface); | ||||
| @ -417,7 +499,7 @@ private: | ||||
|      **/ | ||||
|     RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | ||||
|                                  const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | ||||
|         if (Settings::values.use_accurate_gpu_emulation) { | ||||
|         if (Settings::IsGPULevelExtreme()) { | ||||
|             return RecycleStrategy::Flush; | ||||
|         } | ||||
|         // 3D Textures decision
 | ||||
| @ -461,7 +543,7 @@ private: | ||||
|         } | ||||
|         switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { | ||||
|         case RecycleStrategy::Ignore: { | ||||
|             return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation); | ||||
|             return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme()); | ||||
|         } | ||||
|         case RecycleStrategy::Flush: { | ||||
|             std::sort(overlaps.begin(), overlaps.end(), | ||||
| @ -509,7 +591,7 @@ private: | ||||
|         } | ||||
|         const auto& final_params = new_surface->GetSurfaceParams(); | ||||
|         if (cr_params.type != final_params.type) { | ||||
|             if (Settings::values.use_accurate_gpu_emulation) { | ||||
|             if (Settings::IsGPULevelExtreme()) { | ||||
|                 BufferCopy(current_surface, new_surface); | ||||
|             } | ||||
|         } else { | ||||
| @ -598,7 +680,7 @@ private: | ||||
|         if (passed_tests == 0) { | ||||
|             return {}; | ||||
|             // In Accurate GPU all tests should pass, else we recycle
 | ||||
|         } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { | ||||
|         } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { | ||||
|             return {}; | ||||
|         } | ||||
|         for (const auto& surface : overlaps) { | ||||
| @ -668,7 +750,7 @@ private: | ||||
|             for (const auto& surface : overlaps) { | ||||
|                 if (!surface->MatchTarget(params.target)) { | ||||
|                     if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { | ||||
|                         if (Settings::values.use_accurate_gpu_emulation) { | ||||
|                         if (Settings::IsGPULevelExtreme()) { | ||||
|                             return std::nullopt; | ||||
|                         } | ||||
|                         Unregister(surface); | ||||
| @ -1106,6 +1188,13 @@ private: | ||||
|         TView view; | ||||
|     }; | ||||
| 
 | ||||
|     void AsyncFlushSurface(TSurface& surface) { | ||||
|         if (!uncommitted_flushes) { | ||||
|             uncommitted_flushes = std::make_shared<std::list<TSurface>>(); | ||||
|         } | ||||
|         uncommitted_flushes->push_back(surface); | ||||
|     } | ||||
| 
 | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
| 
 | ||||
|     FormatLookupTable format_lookup_table; | ||||
| @ -1150,6 +1239,11 @@ private: | ||||
|     std::unordered_map<u32, TSurface> invalid_cache; | ||||
|     std::vector<u8> invalid_memory; | ||||
| 
 | ||||
|     std::list<TSurface> marked_for_unregister; | ||||
| 
 | ||||
|     std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; | ||||
|     std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; | ||||
| 
 | ||||
|     StagingCache staging_cache; | ||||
|     std::recursive_mutex mutex; | ||||
| }; | ||||
|  | ||||
| @ -639,8 +639,8 @@ void Config::ReadRendererValues() { | ||||
|     Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); | ||||
|     Settings::values.use_disk_shader_cache = | ||||
|         ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); | ||||
|     Settings::values.use_accurate_gpu_emulation = | ||||
|         ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool(); | ||||
|     const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); | ||||
|     Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); | ||||
|     Settings::values.use_asynchronous_gpu_emulation = | ||||
|         ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); | ||||
|     Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); | ||||
| @ -1080,8 +1080,8 @@ void Config::SaveRendererValues() { | ||||
|     WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); | ||||
|     WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, | ||||
|                  true); | ||||
|     WriteSetting(QStringLiteral("use_accurate_gpu_emulation"), | ||||
|                  Settings::values.use_accurate_gpu_emulation, false); | ||||
|     WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy), | ||||
|                  0); | ||||
|     WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), | ||||
|                  Settings::values.use_asynchronous_gpu_emulation, false); | ||||
|     WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); | ||||
|  | ||||
| @ -19,7 +19,7 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; | ||||
| 
 | ||||
| void ConfigureGraphicsAdvanced::SetConfiguration() { | ||||
|     const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); | ||||
|     ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); | ||||
|     ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); | ||||
|     ui->use_vsync->setEnabled(runtime_lock); | ||||
|     ui->use_vsync->setChecked(Settings::values.use_vsync); | ||||
|     ui->force_30fps_mode->setEnabled(runtime_lock); | ||||
| @ -29,7 +29,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | ||||
| } | ||||
| 
 | ||||
| void ConfigureGraphicsAdvanced::ApplyConfiguration() { | ||||
|     Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); | ||||
|     auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); | ||||
|     Settings::values.gpu_accuracy = gpu_accuracy; | ||||
|     Settings::values.use_vsync = ui->use_vsync->isChecked(); | ||||
|     Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); | ||||
|     Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); | ||||
|  | ||||
| @ -23,11 +23,34 @@ | ||||
|        </property> | ||||
|        <layout class="QVBoxLayout" name="verticalLayout_3"> | ||||
|         <item> | ||||
|          <widget class="QCheckBox" name="use_accurate_gpu_emulation"> | ||||
|           <property name="text"> | ||||
|            <string>Use accurate GPU emulation (slow)</string> | ||||
|           </property> | ||||
|          </widget> | ||||
|          <layout class="QHBoxLayout" name="horizontalLayout_2"> | ||||
|           <item> | ||||
|            <widget class="QLabel" name="label_gpu_accuracy"> | ||||
|             <property name="text"> | ||||
|              <string>Accuracy Level:</string> | ||||
|             </property> | ||||
|            </widget> | ||||
|           </item> | ||||
|           <item> | ||||
|            <widget class="QComboBox" name="gpu_accuracy"> | ||||
|             <item> | ||||
|              <property name="text"> | ||||
|               <string notr="true">Normal</string> | ||||
|              </property> | ||||
|             </item> | ||||
|             <item> | ||||
|              <property name="text"> | ||||
|               <string notr="true">High</string> | ||||
|              </property> | ||||
|             </item> | ||||
|             <item> | ||||
|              <property name="text"> | ||||
|               <string notr="true">Extreme(very slow)</string> | ||||
|              </property> | ||||
|             </item> | ||||
|            </widget> | ||||
|           </item> | ||||
|          </layout> | ||||
|         </item> | ||||
|         <item> | ||||
|          <widget class="QCheckBox" name="use_vsync"> | ||||
|  | ||||
| @ -388,8 +388,8 @@ void Config::ReadValues() { | ||||
|         static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); | ||||
|     Settings::values.use_disk_shader_cache = | ||||
|         sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); | ||||
|     Settings::values.use_accurate_gpu_emulation = | ||||
|         sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); | ||||
|     const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); | ||||
|     Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); | ||||
|     Settings::values.use_asynchronous_gpu_emulation = | ||||
|         sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | ||||
|     Settings::values.use_vsync = | ||||
|  | ||||
| @ -146,9 +146,9 @@ frame_limit = | ||||
| # 0 (default): Off, 1 : On | ||||
| use_disk_shader_cache = | ||||
| 
 | ||||
| # Whether to use accurate GPU emulation | ||||
| # 0 (default): Off (fast), 1 : On (slow) | ||||
| use_accurate_gpu_emulation = | ||||
| # Which gpu accuracy level to use | ||||
| # 0 (Normal), 1 (High), 2 (Extreme) | ||||
| gpu_accuracy = | ||||
| 
 | ||||
| # Whether to use asynchronous GPU emulation | ||||
| # 0 : Off (slow), 1 (default): On (fast) | ||||
|  | ||||
| @ -126,8 +126,8 @@ void Config::ReadValues() { | ||||
|     Settings::values.frame_limit = 100; | ||||
|     Settings::values.use_disk_shader_cache = | ||||
|         sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); | ||||
|     Settings::values.use_accurate_gpu_emulation = | ||||
|         sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); | ||||
|     const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); | ||||
|     Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); | ||||
|     Settings::values.use_asynchronous_gpu_emulation = | ||||
|         sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 bunnei
						bunnei