From a9877c8f65b3cefe12d8ec5da6d6acf4ea27923b Mon Sep 17 00:00:00 2001
From: Subv <>
Date: Sun, 12 Aug 2018 20:38:37 -0500
Subject: [PATCH 1/2] Kernel/SVC: Don't reschedule the current core when
 creating a new thread.

The current core may have nothing to do with the core where the new thread was scheduled to run. In case it's the same core, then the following PrepareReshedule call will take care of that.
 src/core/hle/kernel/svc.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 5db2db6872..4ca4815139 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -532,7 +532,6 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
     CASCADE_RESULT(thread->guest_handle, g_handle_table.Create(thread));
     *out_handle = thread->guest_handle;
-    Core::System::GetInstance().PrepareReschedule();

From d9237660429adaa9f549d10e79252a713f1da874 Mon Sep 17 00:00:00 2001
From: Subv <>
Date: Sun, 12 Aug 2018 20:41:28 -0500
Subject: [PATCH 2/2] CPU/Timing: Use an approximated amortized amount of ticks
 when advancing timing.

We divide the number of ticks to add by the number of cores (4) to obtain a more or less rough estimate of the actual number of ticks added. This assumes that all 4 cores are doing similar work. Previously we were adding ~4 times the number of ticks, thus making the games think that time was going way too fast.

This lets us bypass certain hangs in some games like Breath of the Wild.

We should modify our CoreTiming to support multiple cores (both running in a single thread, and in multiple host threads).
 src/core/arm/dynarmic/arm_dynarmic.cpp | 11 ++++++++++-
 src/core/core_cpu.cpp                  |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index ceb3f76835..0996f129c5 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -86,7 +86,16 @@ public:
     void AddTicks(u64 ticks) override {
-        CoreTiming::AddTicks(ticks - num_interpreted_instructions);
+        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
+        // rough approximation of the amount of executed ticks in the system, it may be thrown off
+        // if not all cores are doing a similar amount of work. Instead of doing this, we should
+        // device a way so that timing is consistent across all cores without increasing the ticks 4
+        // times.
+        u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES;
+        // Always execute at least one tick.
+        amortized_ticks = std::max<u64>(amortized_ticks, 1);
+        CoreTiming::AddTicks(amortized_ticks);
         num_interpreted_instructions = 0;
     u64 GetTicksRemaining() override {
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 46a522fcd4..3f1c706244 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -90,6 +90,7 @@ void Cpu::RunLoop(bool tight_loop) {
         LOG_TRACE(Core, "Core-{} idling", core_index);
         if (IsMainCore()) {
+            // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.