From 57aa18f52ea35ca74cd1a6c406a4abf04049b44e Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 12 May 2020 22:48:30 +0200 Subject: [PATCH] Improve core timing accuracy (#5257) * Improve core timing accuracy * remove wrong global_ticks, use biggest ticks over all cores for GetGlobalTicks * merge max slice length change --- src/core/core.cpp | 27 +++++++++++++++++++-------- src/core/core_timing.cpp | 21 +++++++++++++++------ src/core/core_timing.h | 22 +++++++++++++--------- src/core/hle/kernel/thread.cpp | 4 ++-- src/tests/core/core_timing.cpp | 16 +++++++++++++--- 5 files changed, 62 insertions(+), 28 deletions(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index 47fafebe68..b22c288c14 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -148,7 +148,11 @@ System::ResultStatus System::RunLoop(bool tight_loop) { for (auto& cpu_core : cpu_cores) { if (cpu_core->GetTimer().GetTicks() < global_ticks) { s64 delay = global_ticks - cpu_core->GetTimer().GetTicks(); - cpu_core->GetTimer().Advance(delay); + kernel->SetRunningCPU(cpu_core.get()); + cpu_core->GetTimer().Advance(); + cpu_core->PrepareReschedule(); + kernel->GetThreadManager(cpu_core->GetID()).Reschedule(); + cpu_core->GetTimer().SetNextSlice(delay); if (max_delay < delay) { max_delay = delay; current_core_to_execute = cpu_core.get(); @@ -156,10 +160,14 @@ System::ResultStatus System::RunLoop(bool tight_loop) { } } - if (max_delay > 0) { + // jit sometimes overshoot by a few ticks which might lead to a minimal desync in the cores. + // This small difference shouldn't make it necessary to sync the cores and would only cost + // performance. Thus we don't sync delays below min_delay + static constexpr s64 min_delay = 100; + if (max_delay > min_delay) { LOG_TRACE(Core_ARM11, "Core {} running (delayed) for {} ticks", - current_core_to_execute->GetID(), - current_core_to_execute->GetTimer().GetDowncount()); + current_core_to_execute->GetID(), + current_core_to_execute->GetTimer().GetDowncount()); if (running_core != current_core_to_execute) { running_core = current_core_to_execute; kernel->SetRunningCPU(running_core); @@ -181,12 +189,15 @@ System::ResultStatus System::RunLoop(bool tight_loop) { // TODO: Make special check for idle since we can easily revert the time of idle cores s64 max_slice = Timing::MAX_SLICE_LENGTH; for (const auto& cpu_core : cpu_cores) { + kernel->SetRunningCPU(cpu_core.get()); + cpu_core->GetTimer().Advance(); + cpu_core->PrepareReschedule(); + kernel->GetThreadManager(cpu_core->GetID()).Reschedule(); max_slice = std::min(max_slice, cpu_core->GetTimer().GetMaxSliceLength()); } for (auto& cpu_core : cpu_cores) { - cpu_core->GetTimer().Advance(max_slice); - } - for (auto& cpu_core : cpu_cores) { + cpu_core->GetTimer().SetNextSlice(max_slice); + auto start_ticks = cpu_core->GetTimer().GetTicks(); LOG_TRACE(Core_ARM11, "Core {} running for {} ticks", cpu_core->GetID(), cpu_core->GetTimer().GetDowncount()); running_core = cpu_core.get(); @@ -204,8 +215,8 @@ System::ResultStatus System::RunLoop(bool tight_loop) { cpu_core->Step(); } } + max_slice = cpu_core->GetTimer().GetTicks() - start_ticks; } - timing->AddToGlobalTicks(max_slice); } if (GDBStub::IsServerEnabled()) { diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 875412e83f..3d77932b6d 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -111,11 +111,15 @@ s64 Timing::GetTicks() const { } s64 Timing::GetGlobalTicks() const { - return global_timer; + const auto& timer = + std::max_element(timers.cbegin(), timers.cend(), [](const auto& a, const auto& b) { + return a->GetTicks() < b->GetTicks(); + }); + return (*timer)->GetTicks(); } std::chrono::microseconds Timing::GetGlobalTimeUs() const { - return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE_ARM11}; + return std::chrono::microseconds{GetGlobalTicks() * 1000000 / BASE_CLOCK_RATE_ARM11}; } std::shared_ptr Timing::GetTimer(std::size_t cpu_id) { @@ -161,21 +165,22 @@ void Timing::Timer::MoveEvents() { } s64 Timing::Timer::GetMaxSliceLength() const { - auto next_event = std::find_if(event_queue.begin(), event_queue.end(), - [&](const Event& e) { return e.time - executed_ticks > 0; }); + const auto& next_event = event_queue.begin(); if (next_event != event_queue.end()) { + ASSERT(next_event->time - executed_ticks > 0); return next_event->time - executed_ticks; } return MAX_SLICE_LENGTH; } -void Timing::Timer::Advance(s64 max_slice_length) { +void Timing::Timer::Advance() { MoveEvents(); s64 cycles_executed = slice_length - downcount; idled_cycles = 0; executed_ticks += cycles_executed; - slice_length = max_slice_length; + slice_length = 0; + downcount = 0; is_timer_sane = true; @@ -191,6 +196,10 @@ void Timing::Timer::Advance(s64 max_slice_length) { } is_timer_sane = false; +} + +void Timing::Timer::SetNextSlice(s64 max_slice_length) { + slice_length = max_slice_length; // Still events left (scheduled in the future) if (!event_queue.empty()) { diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 73b226cde5..dadbe16e39 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -171,7 +171,13 @@ public: BOOST_SERIALIZATION_SPLIT_MEMBER() }; - static constexpr int MAX_SLICE_LENGTH = 20000; + // currently Service::HID::pad_update_ticks is the smallest interval for an event that gets + // always scheduled. Therfore we use this as orientation for the MAX_SLICE_LENGTH + // For performance bigger slice length are desired, though this will lead to cores desync + // But we never want to schedule events into the current slice, because then cores might to + // run small slices to sync up again. This is especially important for events that are always + // scheduled and repated. + static constexpr int MAX_SLICE_LENGTH = BASE_CLOCK_RATE_ARM11 / 234; class Timer { public: @@ -180,7 +186,9 @@ public: s64 GetMaxSliceLength() const; - void Advance(s64 max_slice_length = MAX_SLICE_LENGTH); + void Advance(); + + void SetNextSlice(s64 max_slice_length = MAX_SLICE_LENGTH); void Idle(); @@ -227,6 +235,9 @@ public: void serialize(Archive& ar, const unsigned int) { MoveEvents(); // NOTE: ts_queue should be empty now + // TODO(SaveState): Remove the next two lines when we break compatibility + s64 x; + ar& x; // to keep compatibility with old save states that stored global_timer ar& event_queue; ar& event_fifo_id; ar& slice_length; @@ -260,10 +271,6 @@ public: s64 GetGlobalTicks() const; - void AddToGlobalTicks(s64 ticks) { - global_timer += ticks; - } - /** * Updates the value of the cpu clock scaling to the new percentage. */ @@ -274,8 +281,6 @@ public: std::shared_ptr GetTimer(std::size_t cpu_id); private: - s64 global_timer = 0; - // unordered_map stores each element separately as a linked list node so pointers to // elements remain stable regardless of rehashes/resizing. std::unordered_map event_types = {}; @@ -290,7 +295,6 @@ private: template void serialize(Archive& ar, const unsigned int file_version) { // event_types set during initialization of other things - ar& global_timer; ar& timers; if (file_version == 0) { std::shared_ptr x; diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 60d2026315..80111c24c2 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -111,7 +111,7 @@ void ThreadManager::SwitchContext(Thread* new_thread) { // Save context for previous thread if (previous_thread) { previous_process = previous_thread->owner_process; - previous_thread->last_running_ticks = timing.GetGlobalTicks(); + previous_thread->last_running_ticks = cpu->GetTimer().GetTicks(); cpu->SaveContext(previous_thread->context); if (previous_thread->status == ThreadStatus::Running) { @@ -344,7 +344,7 @@ ResultVal> KernelSystem::CreateThread( thread->entry_point = entry_point; thread->stack_top = stack_top; thread->nominal_priority = thread->current_priority = priority; - thread->last_running_ticks = timing.GetGlobalTicks(); + thread->last_running_ticks = timing.GetTimer(processor_id)->GetTicks(); thread->processor_id = processor_id; thread->wait_objects.clear(); thread->wait_address = 0; diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index 8b34ba9d79..b3247aed84 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -13,7 +13,8 @@ // Numbers are chosen randomly to make sure the correct one is given. static constexpr std::array CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; -static constexpr int MAX_SLICE_LENGTH = 20000; // Copied from CoreTiming internals +static constexpr int MAX_SLICE_LENGTH = + BASE_CLOCK_RATE_ARM11 / 234; // Copied from CoreTiming internals static std::bitset callbacks_ran_flags; static u64 expected_callback = 0; @@ -36,7 +37,9 @@ static void AdvanceAndCheck(Core::Timing& timing, u32 idx, int downcount, int ex timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount() - cpu_downcount); // Pretend we executed X cycles of instructions. + timing.GetTimer(0)->Advance(); + timing.GetTimer(0)->SetNextSlice(); REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); REQUIRE(downcount == timing.GetTimer(0)->GetDowncount()); @@ -53,6 +56,7 @@ TEST_CASE("CoreTiming[BasicOrder]", "[core]") { // Enter slice 0 timing.GetTimer(0)->Advance(); + timing.GetTimer(0)->SetNextSlice(); // D -> B -> C -> A -> E timing.ScheduleEvent(1000, cb_a, CB_IDS[0], 0); @@ -106,6 +110,7 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") { // Enter slice 0 timing.GetTimer(0)->Advance(); + timing.GetTimer(0)->SetNextSlice(); REQUIRE(1000 == timing.GetTimer(0)->GetDowncount()); callbacks_ran_flags = 0; @@ -113,6 +118,7 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") { lateness = 0; timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount()); timing.GetTimer(0)->Advance(); + timing.GetTimer(0)->SetNextSlice(); REQUIRE(MAX_SLICE_LENGTH == timing.GetTimer(0)->GetDowncount()); REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong()); } @@ -125,6 +131,7 @@ TEST_CASE("CoreTiming[PredictableLateness]", "[core]") { // Enter slice 0 timing.GetTimer(0)->Advance(); + timing.GetTimer(0)->SetNextSlice(); timing.ScheduleEvent(100, cb_a, CB_IDS[0], 0); timing.ScheduleEvent(200, cb_b, CB_IDS[1], 0); @@ -161,6 +168,7 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") { // Enter slice 0 timing.GetTimer(0)->Advance(); + timing.GetTimer(0)->SetNextSlice(); timing.ScheduleEvent(800, cb_a, CB_IDS[0], 0); timing.ScheduleEvent(1000, cb_b, CB_IDS[1], 0); @@ -174,14 +182,16 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") { REQUIRE(2 == reschedules); timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount()); - timing.GetTimer(0)->Advance(); // cb_rs + timing.GetTimer(0)->Advance(); + timing.GetTimer(0)->SetNextSlice(); // cb_rs REQUIRE(1 == reschedules); REQUIRE(200 == timing.GetTimer(0)->GetDowncount()); AdvanceAndCheck(timing, 2, 800); // cb_c timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount()); - timing.GetTimer(0)->Advance(); // cb_rs + timing.GetTimer(0)->Advance(); + timing.GetTimer(0)->SetNextSlice(); // cb_rs REQUIRE(0 == reschedules); REQUIRE(MAX_SLICE_LENGTH == timing.GetTimer(0)->GetDowncount()); }