SingleCore: Use Cycle Timing instead of Host Timing.

2020-03-28 15:23:28 -04:00 · 2020-03-28 15:23:28 -04:00 · f5e32935ca
commit f5e32935ca
parent 9bde28d7b1
15 changed files with 152 additions and 80 deletions
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@ -26,8 +26,9 @@ using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CO
 /// Generic ARMv8 CPU interface
 class ARM_Interface : NonCopyable {
 public:
-    explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers)
-        : system{system_}, interrupt_handlers{interrupt_handlers} {}
+    explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers, bool uses_wall_clock)
+        : system{system_}, interrupt_handlers{interrupt_handlers}, uses_wall_clock{
+                                                                       uses_wall_clock} {}
    virtual ~ARM_Interface() = default;

    struct ThreadContext32 {
@ -186,6 +187,7 @@ protected:
    /// System context that this ARM interface is running under.
    System& system;
    CPUInterrupts& interrupt_handlers;
+    bool uses_wall_clock;
 };

 } // namespace Core
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@ -72,23 +72,35 @@ public:
    }

    void AddTicks(u64 ticks) override {
-        this->ticks -= ticks;
+        if (parent.uses_wall_clock) {
+            return;
+        }
+        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
+        // rough approximation of the amount of executed ticks in the system, it may be thrown off
+        // if not all cores are doing a similar amount of work. Instead of doing this, we should
+        // device a way so that timing is consistent across all cores without increasing the ticks 4
+        // times.
+        u64 amortized_ticks =
+            (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
+        // Always execute at least one tick.
+        amortized_ticks = std::max<u64>(amortized_ticks, 1);
+
+        parent.system.CoreTiming().AddTicks(amortized_ticks);
+        num_interpreted_instructions = 0;
    }

    u64 GetTicksRemaining() override {
-        if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
-            return std::max<s64>(ticks, 0);
+        if (parent.uses_wall_clock) {
+            if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
+                return std::max<s64>(1000U, 0);
+            }
+            return 0ULL;
        }
-        return 0ULL;
-    }
-
-    void ResetTicks() {
-        ticks = 1000LL;
+        return std::max(parent.system.CoreTiming().GetDowncount(), 0LL);
    }

    ARM_Dynarmic_32& parent;
    std::size_t num_interpreted_instructions{};
-    s64 ticks{};
 };

 std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
@ -103,7 +115,6 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
 }

 void ARM_Dynarmic_32::Run() {
-    cb->ResetTicks();
    jit->Run();
 }

@ -112,8 +123,10 @@ void ARM_Dynarmic_32::Step() {
 }

 ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers,
-                                 ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : ARM_Interface{system, interrupt_handlers}, cb(std::make_unique<DynarmicCallbacks32>(*this)),
+                                 bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
+                                 std::size_t core_index)
+    : ARM_Interface{system, interrupt_handlers, uses_wall_clock},
+      cb(std::make_unique<DynarmicCallbacks32>(*this)),
      cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},
      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}

--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@ -29,7 +29,7 @@ class System;

 class ARM_Dynarmic_32 final : public ARM_Interface {
 public:
-    ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers,
+    ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
                    ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
    ~ARM_Dynarmic_32() override;

--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@ -124,29 +124,41 @@ public:
    }

    void AddTicks(u64 ticks) override {
-        this->ticks -= ticks;
+        if (parent.uses_wall_clock) {
+            return;
+        }
+        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
+        // rough approximation of the amount of executed ticks in the system, it may be thrown off
+        // if not all cores are doing a similar amount of work. Instead of doing this, we should
+        // device a way so that timing is consistent across all cores without increasing the ticks 4
+        // times.
+        u64 amortized_ticks =
+            (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
+        // Always execute at least one tick.
+        amortized_ticks = std::max<u64>(amortized_ticks, 1);
+
+        parent.system.CoreTiming().AddTicks(amortized_ticks);
+        num_interpreted_instructions = 0;
    }

    u64 GetTicksRemaining() override {
-        if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
-            return std::max<s64>(ticks, 0);
+        if (parent.uses_wall_clock) {
+            if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
+                return std::max<s64>(1000U, 0);
+            }
+            return 0ULL;
        }
-        return 0ULL;
+        return std::max(parent.system.CoreTiming().GetDowncount(), 0LL);
    }

    u64 GetCNTPCT() override {
        return parent.system.CoreTiming().GetClockTicks();
    }

-    void ResetTicks() {
-        ticks = 1000LL;
-    }
-
    ARM_Dynarmic_64& parent;
    std::size_t num_interpreted_instructions = 0;
    u64 tpidrro_el0 = 0;
    u64 tpidr_el0 = 0;
-    s64 ticks{};
 };

 std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table,
@ -185,13 +197,12 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
    }

    // CNTPCT uses wall clock.
-    config.wall_clock_cntpct = true;
+    config.wall_clock_cntpct = uses_wall_clock;

    return std::make_shared<Dynarmic::A64::Jit>(config);
 }

 void ARM_Dynarmic_64::Run() {
-    cb->ResetTicks();
    jit->Run();
 }

@ -200,9 +211,11 @@ void ARM_Dynarmic_64::Step() {
 }

 ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers,
-                                 ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : ARM_Interface{system, interrupt_handler},
+                                 bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
+                                 std::size_t core_index)
+    : ARM_Interface{system, interrupt_handler, uses_wall_clock},
      cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system, interrupt_handler,
+                                                                      uses_wall_clock,
                                                                      ARM_Unicorn::Arch::AArch64,
                                                                      core_index},
      core_index{core_index}, exclusive_monitor{
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@ -28,7 +28,7 @@ class System;

 class ARM_Dynarmic_64 final : public ARM_Interface {
 public:
-    ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers,
+    ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
                    ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
    ~ARM_Dynarmic_64() override;

--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@ -63,9 +63,9 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
    return false;
 }

-ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture,
-                         std::size_t core_index)
-    : ARM_Interface{system, interrupt_handler}, core_index{core_index} {
+ARM_Unicorn::ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler,
+                         bool uses_wall_clock, Arch architecture, std::size_t core_index)
+    : ARM_Interface{system, interrupt_handler, uses_wall_clock}, core_index{core_index} {
    const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
    CHECKED(uc_open(arch, UC_MODE_ARM, &uc));

--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@ -20,8 +20,8 @@ public:
        AArch64, // 64-bit ARM
    };

-    explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler, Arch architecture,
-                         std::size_t core_index);
+    explicit ARM_Unicorn(System& system, CPUInterruptHandler& interrupt_handler,
+                         bool uses_wall_clock, Arch architecture, std::size_t core_index);
    ~ARM_Unicorn() override;

    void SetPC(u64 pc) override;
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@ -14,6 +14,8 @@

 namespace Core::Timing {

+constexpr u64 MAX_SLICE_LENGTH = 4000;
+
 std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
    return std::make_shared<EventType>(std::move(callback), std::move(name));
 }
@ -53,6 +55,7 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) {
 void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) {
    on_thread_init = std::move(on_thread_init_);
    event_fifo_id = 0;
+    ticks = 0;
    const auto empty_timed_callback = [](u64, s64) {};
    ev_lost = CreateEvent("_lost_event", empty_timed_callback);
    if (is_multicore) {
@ -126,20 +129,36 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
    basic_lock.unlock();
 }

-void CoreTiming::AddTicks(std::size_t core_index, u64 ticks) {
-    ticks_count[core_index] += ticks;
+void CoreTiming::AddTicks(u64 ticks) {
+    this->ticks += ticks;
+    downcount -= ticks;
 }

-void CoreTiming::ResetTicks(std::size_t core_index) {
-    ticks_count[core_index] = 0;
+void CoreTiming::Idle() {
+    if (!event_queue.empty()) {
+        u64 next_event_time = event_queue.front().time;
+        ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U;
+        return;
+    }
+    ticks += 1000U;
+}
+
+void CoreTiming::ResetTicks() {
+    downcount = MAX_SLICE_LENGTH;
 }

 u64 CoreTiming::GetCPUTicks() const {
-    return clock->GetCPUCycles();
+    if (is_multicore) {
+        return clock->GetCPUCycles();
+    }
+    return ticks;
 }

 u64 CoreTiming::GetClockTicks() const {
-    return clock->GetClockCycles();
+    if (is_multicore) {
+        return clock->GetClockCycles();
+    }
+    return CpuCyclesToClockCycles(ticks);
 }

 void CoreTiming::ClearPendingEvents() {
@ -217,11 +236,17 @@ void CoreTiming::ThreadLoop() {
 }

 std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
-    return clock->GetTimeNS();
+    if (is_multicore) {
+        return clock->GetTimeNS();
+    }
+    return CyclesToNs(ticks);
 }

 std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
-    return clock->GetTimeUS();
+    if (is_multicore) {
+        return clock->GetTimeUS();
+    }
+    return CyclesToUs(ticks);
 }

 } // namespace Core::Timing
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@ -98,9 +98,15 @@ public:
    /// We only permit one event of each type in the queue at a time.
    void RemoveEvent(const std::shared_ptr<EventType>& event_type);

-    void AddTicks(std::size_t core_index, u64 ticks);
+    void AddTicks(u64 ticks);

-    void ResetTicks(std::size_t core_index);
+    void ResetTicks();
+
+    void Idle();
+
+    s64 GetDowncount() const {
+        return downcount;
+    }

    /// Returns current time in emulated CPU cycles
    u64 GetCPUTicks() const;
@ -154,7 +160,9 @@ private:

    bool is_multicore{};

-    std::array<std::atomic<u64>, Core::Hardware::NUM_CPU_CORES> ticks_count{};
+    /// Cycle timing
+    u64 ticks{};
+    s64 downcount{};
 };

 /// Creates a core timing event with the given name and callback.
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@ -38,15 +38,8 @@ s64 usToCycles(std::chrono::microseconds us) {
 }

 s64 nsToCycles(std::chrono::nanoseconds ns) {
-    if (static_cast<u64>(ns.count() / 1000000000) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_ERROR(Core_Timing, "Integer overflow, use max value");
-        return std::numeric_limits<s64>::max();
-    }
-    if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000);
-    }
-    return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
+    const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE);
+    return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first;
 }

 u64 msToClockCycles(std::chrono::milliseconds ns) {
@ -69,4 +62,22 @@ u64 CpuCyclesToClockCycles(u64 ticks) {
    return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
 }

+std::chrono::milliseconds CyclesToMs(s64 cycles) {
+    const u128 temporal = Common::Multiply64Into128(cycles, 1000);
+    u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
+    return std::chrono::milliseconds(ms);
+}
+
+std::chrono::nanoseconds CyclesToNs(s64 cycles) {
+    const u128 temporal = Common::Multiply64Into128(cycles, 1000000000);
+    u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
+    return std::chrono::nanoseconds(ns);
+}
+
+std::chrono::microseconds CyclesToUs(s64 cycles) {
+    const u128 temporal = Common::Multiply64Into128(cycles, 1000000);
+    u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
+    return std::chrono::microseconds(us);
+}
+
 } // namespace Core::Timing
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@ -16,18 +16,9 @@ s64 nsToCycles(std::chrono::nanoseconds ns);
 u64 msToClockCycles(std::chrono::milliseconds ns);
 u64 usToClockCycles(std::chrono::microseconds ns);
 u64 nsToClockCycles(std::chrono::nanoseconds ns);
-
-inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
-    return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE);
-}
-
-inline std::chrono::nanoseconds CyclesToNs(s64 cycles) {
-    return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE);
-}
-
-inline std::chrono::microseconds CyclesToUs(s64 cycles) {
-    return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE);
-}
+std::chrono::milliseconds CyclesToMs(s64 cycles);
+std::chrono::nanoseconds CyclesToNs(s64 cycles);
+std::chrono::microseconds CyclesToUs(s64 cycles);

 u64 CpuCyclesToClockCycles(u64 ticks);

--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@ -232,13 +232,10 @@ void CpuManager::SingleCoreRunGuestLoop() {
        auto* physical_core = &kernel.CurrentPhysicalCore();
        auto& arm_interface = thread->ArmInterface();
        system.EnterDynarmicProfile();
-        while (!physical_core->IsInterrupted()) {
+        if (!physical_core->IsInterrupted()) {
+            system.CoreTiming().ResetTicks();
            arm_interface.Run();
            physical_core = &kernel.CurrentPhysicalCore();
-            preemption_count++;
-            if (preemption_count % max_cycle_runs == 0) {
-                break;
-            }
        }
        system.ExitDynarmicProfile();
        thread->SetPhantomMode(true);
@ -255,7 +252,7 @@ void CpuManager::SingleCoreRunIdleThread() {
    auto& kernel = system.Kernel();
    while (true) {
        auto& physical_core = kernel.CurrentPhysicalCore();
-        PreemptSingleCore();
+        PreemptSingleCore(false);
        idle_count++;
        auto& scheduler = physical_core.Scheduler();
        scheduler.TryDoContextSwitch();
@ -279,12 +276,15 @@ void CpuManager::SingleCoreRunSuspendThread() {
    }
 }

-void CpuManager::PreemptSingleCore() {
-    preemption_count = 0;
+void CpuManager::PreemptSingleCore(bool from_running_enviroment) {
    std::size_t old_core = current_core;
    auto& scheduler = system.Kernel().Scheduler(old_core);
    Kernel::Thread* current_thread = scheduler.GetCurrentThread();
-    if (idle_count >= 4) {
+    if (idle_count >= 4 || from_running_enviroment) {
+        if (!from_running_enviroment) {
+            system.CoreTiming().Idle();
+            idle_count = 0;
+        }
        current_thread->SetPhantomMode(true);
        system.CoreTiming().Advance();
        current_thread->SetPhantomMode(false);
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@ -55,7 +55,7 @@ public:
    std::function<void(void*)> GetSuspendThreadStartFunc();
    void* GetStartFuncParamater();

-    void PreemptSingleCore();
+    void PreemptSingleCore(bool from_running_enviroment = true);

    std::size_t CurrentCore() const {
        return current_core.load();
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@ -1534,6 +1534,7 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {

    if (is_redundant && !system.Kernel().IsMulticore()) {
        system.Kernel().ExitSVCProfile();
+        system.CoreTiming().AddTicks(1000U);
        system.GetCpuManager().PreemptSingleCore();
        system.Kernel().EnterSVCProfile();
    }
@ -1762,6 +1763,10 @@ static u64 GetSystemTick(Core::System& system) {
    // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick)
    const u64 result{system.CoreTiming().GetClockTicks()};

+    if (!system.Kernel().IsMulticore()) {
+        core_timing.AddTicks(400U);
+    }
+
    return result;
 }

--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@ -246,19 +246,23 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadTy
 #ifdef ARCHITECTURE_x86_64
        if (owner_process && !owner_process->Is64BitProcess()) {
            thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_32>(
-                system, kernel.Interrupts(), kernel.GetExclusiveMonitor(), processor_id);
+                system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
+                processor_id);
        } else {
            thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
-                system, kernel.Interrupts(), kernel.GetExclusiveMonitor(), processor_id);
+                system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
+                processor_id);
        }

 #else
        if (owner_process && !owner_process->Is64BitProcess()) {
            thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
-                system, kernel.Interrupts(), ARM_Unicorn::Arch::AArch32, processor_id);
+                system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch32,
+                processor_id);
        } else {
            thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
-                system, kernel.Interrupts(), ARM_Unicorn::Arch::AArch64, processor_id);
+                system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch64,
+                processor_id);
        }
        LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif