Merge pull request #2912 from FernandoS27/async-fixes
General fixes to Async GPU
This commit is contained in:
		
						commit
						ef9b31783d
					
				| @ -256,6 +256,8 @@ struct System::Impl { | ||||
|         is_powered_on = false; | ||||
|         exit_lock = false; | ||||
| 
 | ||||
|         gpu_core->WaitIdle(); | ||||
| 
 | ||||
|         // Shutdown emulation session
 | ||||
|         renderer.reset(); | ||||
|         GDBStub::Shutdown(); | ||||
|  | ||||
| @ -5,6 +5,7 @@ | ||||
| #include "common/assert.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "core/core.h" | ||||
| #include "core/core_timing.h" | ||||
| #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" | ||||
| #include "core/hle/service/nvdrv/devices/nvmap.h" | ||||
| #include "core/perf_stats.h" | ||||
| @ -38,7 +39,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 | ||||
|         transform, crop_rect}; | ||||
| 
 | ||||
|     system.GetPerfStats().EndGameFrame(); | ||||
|     system.GetPerfStats().EndSystemFrame(); | ||||
|     system.GPU().SwapBuffers(&framebuffer); | ||||
|     system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); | ||||
|     system.GetPerfStats().BeginSystemFrame(); | ||||
| } | ||||
| 
 | ||||
| } // namespace Service::Nvidia::Devices
 | ||||
|  | ||||
| @ -63,16 +63,26 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | ||||
|         return NvResult::BadParameter; | ||||
|     } | ||||
| 
 | ||||
|     u32 event_id = params.value & 0x00FF; | ||||
| 
 | ||||
|     if (event_id >= MaxNvEvents) { | ||||
|         std::memcpy(output.data(), ¶ms, sizeof(params)); | ||||
|         return NvResult::BadParameter; | ||||
|     } | ||||
| 
 | ||||
|     auto event = events_interface.events[event_id]; | ||||
|     auto& gpu = system.GPU(); | ||||
|     // This is mostly to take into account unimplemented features. As synced
 | ||||
|     // gpu is always synced.
 | ||||
|     if (!gpu.IsAsync()) { | ||||
|         event.writable->Signal(); | ||||
|         return NvResult::Success; | ||||
|     } | ||||
|     auto lock = gpu.LockSync(); | ||||
|     const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); | ||||
|     const s32 diff = current_syncpoint_value - params.threshold; | ||||
|     if (diff >= 0) { | ||||
|         event.writable->Signal(); | ||||
|         params.value = current_syncpoint_value; | ||||
|         std::memcpy(output.data(), ¶ms, sizeof(params)); | ||||
|         return NvResult::Success; | ||||
| @ -88,27 +98,6 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | ||||
|         return NvResult::Timeout; | ||||
|     } | ||||
| 
 | ||||
|     u32 event_id; | ||||
|     if (is_async) { | ||||
|         event_id = params.value & 0x00FF; | ||||
|         if (event_id >= MaxNvEvents) { | ||||
|             std::memcpy(output.data(), ¶ms, sizeof(params)); | ||||
|             return NvResult::BadParameter; | ||||
|         } | ||||
|     } else { | ||||
|         if (ctrl.fresh_call) { | ||||
|             const auto result = events_interface.GetFreeEvent(); | ||||
|             if (result) { | ||||
|                 event_id = *result; | ||||
|             } else { | ||||
|                 LOG_CRITICAL(Service_NVDRV, "No Free Events available!"); | ||||
|                 event_id = params.value & 0x00FF; | ||||
|             } | ||||
|         } else { | ||||
|             event_id = ctrl.event_id; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     EventState status = events_interface.status[event_id]; | ||||
|     if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { | ||||
|         events_interface.SetEventStatus(event_id, EventState::Waiting); | ||||
| @ -120,7 +109,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | ||||
|             params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; | ||||
|         } | ||||
|         params.value |= event_id; | ||||
|         events_interface.events[event_id].writable->Clear(); | ||||
|         event.writable->Clear(); | ||||
|         gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); | ||||
|         if (!is_async && ctrl.fresh_call) { | ||||
|             ctrl.must_delay = true; | ||||
|  | ||||
| @ -134,7 +134,9 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { | ||||
|     IPC::ResponseBuilder rb{ctx, 3, 1}; | ||||
|     rb.Push(RESULT_SUCCESS); | ||||
|     if (event_id < MaxNvEvents) { | ||||
|         rb.PushCopyObjects(nvdrv->GetEvent(event_id)); | ||||
|         auto event = nvdrv->GetEvent(event_id); | ||||
|         event->Clear(); | ||||
|         rb.PushCopyObjects(event); | ||||
|         rb.Push<u32>(NvResult::Success); | ||||
|     } else { | ||||
|         rb.Push<u32>(0); | ||||
|  | ||||
| @ -40,8 +40,8 @@ Module::Module(Core::System& system) { | ||||
|     auto& kernel = system.Kernel(); | ||||
|     for (u32 i = 0; i < MaxNvEvents; i++) { | ||||
|         std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); | ||||
|         events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( | ||||
|             kernel, Kernel::ResetType::Automatic, event_label); | ||||
|         events_interface.events[i] = | ||||
|             Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, event_label); | ||||
|         events_interface.status[i] = EventState::Free; | ||||
|         events_interface.registered[i] = false; | ||||
|     } | ||||
|  | ||||
| @ -187,14 +187,18 @@ void NVFlinger::Compose() { | ||||
|         MicroProfileFlip(); | ||||
| 
 | ||||
|         if (!buffer) { | ||||
|             // There was no queued buffer to draw, render previous frame
 | ||||
|             system.GetPerfStats().EndGameFrame(); | ||||
|             system.GPU().SwapBuffers({}); | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         const auto& igbp_buffer = buffer->get().igbp_buffer; | ||||
| 
 | ||||
|         const auto& gpu = system.GPU(); | ||||
|         const auto& multi_fence = buffer->get().multi_fence; | ||||
|         for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { | ||||
|             const auto& fence = multi_fence.fences[fence_id]; | ||||
|             gpu.WaitFence(fence.id, fence.value); | ||||
|         } | ||||
| 
 | ||||
|         // Now send the buffer to the GPU for drawing.
 | ||||
|         // TODO(Subv): Support more than just disp0. The display device selection is probably based
 | ||||
|         // on which display we're drawing (Default, Internal, External, etc)
 | ||||
|  | ||||
| @ -3,6 +3,7 @@ | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "core/core.h" | ||||
| #include "core/core_timing.h" | ||||
| #include "core/memory.h" | ||||
| @ -17,6 +18,8 @@ | ||||
| 
 | ||||
| namespace Tegra { | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | ||||
| 
 | ||||
| GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) | ||||
|     : system{system}, renderer{renderer}, is_async{is_async} { | ||||
|     auto& rasterizer{renderer.Rasterizer()}; | ||||
| @ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { | ||||
|     return *dma_pusher; | ||||
| } | ||||
| 
 | ||||
| void GPU::WaitFence(u32 syncpoint_id, u32 value) const { | ||||
|     // Synced GPU, is always in sync
 | ||||
|     if (!is_async) { | ||||
|         return; | ||||
|     } | ||||
|     MICROPROFILE_SCOPE(GPU_wait); | ||||
|     while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | ||||
|     syncpoints[syncpoint_id]++; | ||||
|     std::lock_guard lock{sync_mutex}; | ||||
|  | ||||
| @ -177,6 +177,12 @@ public: | ||||
|     /// Returns a reference to the GPU DMA pusher.
 | ||||
|     Tegra::DmaPusher& DmaPusher(); | ||||
| 
 | ||||
|     // Waits for the GPU to finish working
 | ||||
|     virtual void WaitIdle() const = 0; | ||||
| 
 | ||||
|     /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
 | ||||
|     void WaitFence(u32 syncpoint_id, u32 value) const; | ||||
| 
 | ||||
|     void IncrementSyncPoint(u32 syncpoint_id); | ||||
| 
 | ||||
|     u32 GetSyncpointValue(u32 syncpoint_id) const; | ||||
|  | ||||
| @ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con | ||||
|     interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||||
| } | ||||
| 
 | ||||
| void GPUAsynch::WaitIdle() const { | ||||
|     gpu_thread.WaitIdle(); | ||||
| } | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
|  | ||||
| @ -25,6 +25,7 @@ public: | ||||
|     void FlushRegion(CacheAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void WaitIdle() const override; | ||||
| 
 | ||||
| protected: | ||||
|     void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | ||||
|  | ||||
| @ -24,6 +24,7 @@ public: | ||||
|     void FlushRegion(CacheAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void WaitIdle() const override {} | ||||
| 
 | ||||
| protected: | ||||
|     void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, | ||||
|  | ||||
| @ -5,8 +5,6 @@ | ||||
| #include "common/assert.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "core/core.h" | ||||
| #include "core/core_timing.h" | ||||
| #include "core/core_timing_util.h" | ||||
| #include "core/frontend/scope_acquire_window_context.h" | ||||
| #include "video_core/dma_pusher.h" | ||||
| #include "video_core/gpu.h" | ||||
| @ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { | ||||
| 
 | ||||
| void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { | ||||
|     thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; | ||||
|     synchronization_event = system.CoreTiming().RegisterEvent( | ||||
|         "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | ||||
|     const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; | ||||
|     const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; | ||||
|     system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); | ||||
|     PushCommand(SubmitListCommand(std::move(entries))); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||||
| @ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||||
|     InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::WaitIdle() const { | ||||
|     while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| u64 ThreadManager::PushCommand(CommandData&& command_data) { | ||||
|     const u64 fence{++state.last_fence}; | ||||
|     state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | ||||
|     return fence; | ||||
| } | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | ||||
| void SynchState::WaitForSynchronization(u64 fence) { | ||||
|     while (signaled_fence.load() < fence) | ||||
|         ; | ||||
| } | ||||
| 
 | ||||
| } // namespace VideoCommon::GPUThread
 | ||||
|  | ||||
| @ -21,9 +21,6 @@ class DmaPusher; | ||||
| 
 | ||||
| namespace Core { | ||||
| class System; | ||||
| namespace Timing { | ||||
| struct EventType; | ||||
| } // namespace Timing
 | ||||
| } // namespace Core
 | ||||
| 
 | ||||
| namespace VideoCommon::GPUThread { | ||||
| @ -89,8 +86,6 @@ struct CommandDataContainer { | ||||
| struct SynchState final { | ||||
|     std::atomic_bool is_running{true}; | ||||
| 
 | ||||
|     void WaitForSynchronization(u64 fence); | ||||
| 
 | ||||
|     using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | ||||
|     CommandQueue queue; | ||||
|     u64 last_fence{}; | ||||
| @ -121,6 +116,9 @@ public: | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | ||||
|     void FlushAndInvalidateRegion(CacheAddr addr, u64 size); | ||||
| 
 | ||||
|     // Wait until the gpu thread is idle.
 | ||||
|     void WaitIdle() const; | ||||
| 
 | ||||
| private: | ||||
|     /// Pushes a command to be executed by the GPU thread
 | ||||
|     u64 PushCommand(CommandData&& command_data); | ||||
| @ -128,7 +126,6 @@ private: | ||||
| private: | ||||
|     SynchState state; | ||||
|     Core::System& system; | ||||
|     Core::Timing::EventType* synchronization_event{}; | ||||
|     std::thread thread; | ||||
|     std::thread::id thread_id; | ||||
| }; | ||||
|  | ||||
| @ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||||
|     std::lock_guard lock{pages_mutex}; | ||||
|     const u64 page_start{addr >> Memory::PAGE_BITS}; | ||||
|     const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; | ||||
| 
 | ||||
|  | ||||
| @ -9,6 +9,7 @@ | ||||
| #include <cstddef> | ||||
| #include <map> | ||||
| #include <memory> | ||||
| #include <mutex> | ||||
| #include <optional> | ||||
| #include <tuple> | ||||
| #include <utility> | ||||
| @ -230,6 +231,8 @@ private: | ||||
| 
 | ||||
|     using CachedPageMap = boost::icl::interval_map<u64, int>; | ||||
|     CachedPageMap cached_pages; | ||||
| 
 | ||||
|     std::mutex pages_mutex; | ||||
| }; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  | ||||
| @ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst | ||||
| RendererOpenGL::~RendererOpenGL() = default; | ||||
| 
 | ||||
| void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||||
|     system.GetPerfStats().EndSystemFrame(); | ||||
| 
 | ||||
|     // Maintain the rasterizer's state as a priority
 | ||||
|     OpenGLState prev_state = OpenGLState::GetCurState(); | ||||
|     state.AllDirty(); | ||||
| @ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||||
| 
 | ||||
|     render_window.PollEvents(); | ||||
| 
 | ||||
|     system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); | ||||
|     system.GetPerfStats().BeginSystemFrame(); | ||||
| 
 | ||||
|     // Restore the rasterizer state
 | ||||
|     prev_state.AllDirty(); | ||||
|     prev_state.Apply(); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 bunnei
						bunnei