From 208ed712f42cfd277405a22663197dc1c5e84cfe Mon Sep 17 00:00:00 2001
From: Liam <byteslice@airmail.cc>
Date: Mon, 6 Jun 2022 12:56:01 -0400
Subject: [PATCH] core/debugger: memory breakpoint support

---
 externals/dynarmic                        |   2 +-
 src/common/page_table.h                   |   3 +
 src/core/arm/arm_interface.cpp            |  41 +++++-
 src/core/arm/arm_interface.h              |  13 +-
 src/core/arm/dynarmic/arm_dynarmic_32.cpp |  69 ++++++++--
 src/core/arm/dynarmic/arm_dynarmic_32.h   |   6 +
 src/core/arm/dynarmic/arm_dynarmic_64.cpp |  79 +++++++++--
 src/core/arm/dynarmic/arm_dynarmic_64.h   |   6 +
 src/core/debugger/debugger.cpp            |  19 ++-
 src/core/debugger/debugger.h              |   8 +-
 src/core/debugger/debugger_interface.h    |   8 +-
 src/core/debugger/gdbstub.cpp             | 157 +++++++++++++++++-----
 src/core/debugger/gdbstub.h               |   3 +
 src/core/hardware_properties.h            |   3 +
 src/core/hle/kernel/k_process.cpp         |  46 +++++++
 src/core/hle/kernel/k_process.h           |  30 +++++
 src/core/hle/kernel/k_scheduler.cpp       |   1 +
 src/core/memory.cpp                       |  79 ++++++++++-
 src/core/memory.h                         |  11 ++
 19 files changed, 520 insertions(+), 64 deletions(-)

diff --git a/externals/dynarmic b/externals/dynarmic
index 57af72a567..5ad1d02351 160000
--- a/externals/dynarmic
+++ b/externals/dynarmic
@@ -1 +1 @@
-Subproject commit 57af72a567454b93c757e087b4510a24b81911b1
+Subproject commit 5ad1d02351bf4fee681a3d701d210b419f41a505
diff --git a/src/common/page_table.h b/src/common/page_table.h
index fcbd12a438..1ad3a9f8b4 100644
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -15,6 +15,9 @@ enum class PageType : u8 {
     Unmapped,
     /// Page is mapped to regular memory. This is the only type you can get pointers to.
     Memory,
+    /// Page is mapped to regular memory, but inaccessible from CPU fastmem and must use
+    /// the callbacks.
+    DebugMemory,
     /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
     /// invalidation
     RasterizerCachedMemory,
diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp
index 9a285dfc60..6425e131f8 100644
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -121,8 +121,15 @@ void ARM_Interface::Run() {
 
         // Notify the debugger and go to sleep if a breakpoint was hit.
         if (Has(hr, breakpoint)) {
+            RewindBreakpointInstruction();
             system.GetDebugger().NotifyThreadStopped(current_thread);
-            current_thread->RequestSuspend(Kernel::SuspendType::Debug);
+            current_thread->RequestSuspend(SuspendType::Debug);
+            break;
+        }
+        if (Has(hr, watchpoint)) {
+            RewindBreakpointInstruction();
+            system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint());
+            current_thread->RequestSuspend(SuspendType::Debug);
             break;
         }
 
@@ -136,4 +143,36 @@ void ARM_Interface::Run() {
     }
 }
 
+void ARM_Interface::LoadWatchpointArray(const WatchpointArray& wp) {
+    watchpoints = &wp;
+}
+
+const Kernel::DebugWatchpoint* ARM_Interface::MatchingWatchpoint(
+    VAddr addr, u64 size, Kernel::DebugWatchpointType access_type) const {
+    if (!watchpoints) {
+        return nullptr;
+    }
+
+    const VAddr start_address{addr};
+    const VAddr end_address{addr + size};
+
+    for (size_t i = 0; i < Core::Hardware::NUM_WATCHPOINTS; i++) {
+        const auto& watch{(*watchpoints)[i]};
+
+        if (end_address <= watch.start_address) {
+            continue;
+        }
+        if (start_address >= watch.end_address) {
+            continue;
+        }
+        if ((access_type & watch.type) == Kernel::DebugWatchpointType::None) {
+            continue;
+        }
+
+        return &watch;
+    }
+
+    return nullptr;
+}
+
 } // namespace Core
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 66f6107e9f..4e431e27a0 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <span>
 #include <vector>
 
 #include <dynarmic/interface/halt_reason.h>
@@ -19,13 +20,16 @@ struct PageTable;
 
 namespace Kernel {
 enum class VMAPermission : u8;
-}
+enum class DebugWatchpointType : u8;
+struct DebugWatchpoint;
+} // namespace Kernel
 
 namespace Core {
 class System;
 class CPUInterruptHandler;
 
 using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>;
+using WatchpointArray = std::array<Kernel::DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS>;
 
 /// Generic ARMv8 CPU interface
 class ARM_Interface {
@@ -170,6 +174,7 @@ public:
     virtual void SaveContext(ThreadContext64& ctx) = 0;
     virtual void LoadContext(const ThreadContext32& ctx) = 0;
     virtual void LoadContext(const ThreadContext64& ctx) = 0;
+    void LoadWatchpointArray(const WatchpointArray& wp);
 
     /// Clears the exclusive monitor's state.
     virtual void ClearExclusiveState() = 0;
@@ -198,18 +203,24 @@ public:
     static constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
     static constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
     static constexpr Dynarmic::HaltReason breakpoint = Dynarmic::HaltReason::UserDefined4;
+    static constexpr Dynarmic::HaltReason watchpoint = Dynarmic::HaltReason::UserDefined5;
 
 protected:
     /// System context that this ARM interface is running under.
     System& system;
     CPUInterrupts& interrupt_handlers;
+    const WatchpointArray* watchpoints;
     bool uses_wall_clock;
 
     static void SymbolicateBacktrace(Core::System& system, std::vector<BacktraceEntry>& out);
+    const Kernel::DebugWatchpoint* MatchingWatchpoint(
+        VAddr addr, u64 size, Kernel::DebugWatchpointType access_type) const;
 
     virtual Dynarmic::HaltReason RunJit() = 0;
     virtual Dynarmic::HaltReason StepJit() = 0;
     virtual u32 GetSvcNumber() const = 0;
+    virtual const Kernel::DebugWatchpoint* HaltedWatchpoint() const = 0;
+    virtual void RewindBreakpointInstruction() = 0;
 };
 
 } // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 7c82d0b96e..8c90c8be00 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -29,45 +29,62 @@ using namespace Common::Literals;
 class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
 public:
     explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_)
-        : parent{parent_}, memory(parent.system.Memory()) {}
+        : parent{parent_},
+          memory(parent.system.Memory()), debugger_enabled{parent.system.DebuggerEnabled()} {}
 
     u8 MemoryRead8(u32 vaddr) override {
+        CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read);
         return memory.Read8(vaddr);
     }
     u16 MemoryRead16(u32 vaddr) override {
+        CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read);
         return memory.Read16(vaddr);
     }
     u32 MemoryRead32(u32 vaddr) override {
+        CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read);
         return memory.Read32(vaddr);
     }
     u64 MemoryRead64(u32 vaddr) override {
+        CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read);
         return memory.Read64(vaddr);
     }
 
     void MemoryWrite8(u32 vaddr, u8 value) override {
-        memory.Write8(vaddr, value);
+        if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) {
+            memory.Write8(vaddr, value);
+        }
     }
     void MemoryWrite16(u32 vaddr, u16 value) override {
-        memory.Write16(vaddr, value);
+        if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) {
+            memory.Write16(vaddr, value);
+        }
     }
     void MemoryWrite32(u32 vaddr, u32 value) override {
-        memory.Write32(vaddr, value);
+        if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) {
+            memory.Write32(vaddr, value);
+        }
     }
     void MemoryWrite64(u32 vaddr, u64 value) override {
-        memory.Write64(vaddr, value);
+        if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) {
+            memory.Write64(vaddr, value);
+        }
     }
 
     bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override {
-        return memory.WriteExclusive8(vaddr, value, expected);
+        return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) &&
+               memory.WriteExclusive8(vaddr, value, expected);
     }
     bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override {
-        return memory.WriteExclusive16(vaddr, value, expected);
+        return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) &&
+               memory.WriteExclusive16(vaddr, value, expected);
     }
     bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override {
-        return memory.WriteExclusive32(vaddr, value, expected);
+        return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) &&
+               memory.WriteExclusive32(vaddr, value, expected);
     }
     bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override {
-        return memory.WriteExclusive64(vaddr, value, expected);
+        return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) &&
+               memory.WriteExclusive64(vaddr, value, expected);
     }
 
     void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
@@ -77,8 +94,8 @@ public:
     }
 
     void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
-        if (parent.system.DebuggerEnabled()) {
-            parent.jit.load()->Regs()[15] = pc;
+        if (debugger_enabled) {
+            parent.SaveContext(parent.breakpoint_context);
             parent.jit.load()->HaltExecution(ARM_Interface::breakpoint);
             return;
         }
@@ -117,9 +134,26 @@ public:
         return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
     }
 
+    bool CheckMemoryAccess(VAddr addr, u64 size, Kernel::DebugWatchpointType type) {
+        if (!debugger_enabled) {
+            return true;
+        }
+
+        const auto match{parent.MatchingWatchpoint(addr, size, type)};
+        if (match) {
+            parent.SaveContext(parent.breakpoint_context);
+            parent.jit.load()->HaltExecution(ARM_Interface::watchpoint);
+            parent.halted_watchpoint = match;
+            return false;
+        }
+
+        return true;
+    }
+
     ARM_Dynarmic_32& parent;
     Core::Memory::Memory& memory;
     std::size_t num_interpreted_instructions{};
+    bool debugger_enabled{};
     static constexpr u64 minimum_run_cycles = 1000U;
 };
 
@@ -154,6 +188,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
     config.code_cache_size = 512_MiB;
     config.far_code_offset = 400_MiB;
 
+    // Allow memory fault handling to work
+    if (system.DebuggerEnabled()) {
+        config.check_halt_on_memory_access = true;
+    }
+
     // null_jit
     if (!page_table) {
         // Don't waste too much memory on null_jit
@@ -248,6 +287,14 @@ u32 ARM_Dynarmic_32::GetSvcNumber() const {
     return svc_swi;
 }
 
+const Kernel::DebugWatchpoint* ARM_Dynarmic_32::HaltedWatchpoint() const {
+    return halted_watchpoint;
+}
+
+void ARM_Dynarmic_32::RewindBreakpointInstruction() {
+    LoadContext(breakpoint_context);
+}
+
 ARM_Dynarmic_32::ARM_Dynarmic_32(System& system_, CPUInterrupts& interrupt_handlers_,
                                  bool uses_wall_clock_, ExclusiveMonitor& exclusive_monitor_,
                                  std::size_t core_index_)
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 5b1d60005d..fcbe24f0c3 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -72,6 +72,8 @@ protected:
     Dynarmic::HaltReason RunJit() override;
     Dynarmic::HaltReason StepJit() override;
     u32 GetSvcNumber() const override;
+    const Kernel::DebugWatchpoint* HaltedWatchpoint() const override;
+    void RewindBreakpointInstruction() override;
 
 private:
     std::shared_ptr<Dynarmic::A32::Jit> MakeJit(Common::PageTable* page_table) const;
@@ -98,6 +100,10 @@ private:
 
     // SVC callback
     u32 svc_swi{};
+
+    // Watchpoint info
+    const Kernel::DebugWatchpoint* halted_watchpoint;
+    ThreadContext32 breakpoint_context;
 };
 
 } // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index d4c67eafdd..4370ca2945 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -29,55 +29,76 @@ using namespace Common::Literals;
 class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
 public:
     explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent_)
-        : parent{parent_}, memory(parent.system.Memory()) {}
+        : parent{parent_},
+          memory(parent.system.Memory()), debugger_enabled{parent.system.DebuggerEnabled()} {}
 
     u8 MemoryRead8(u64 vaddr) override {
+        CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read);
         return memory.Read8(vaddr);
     }
     u16 MemoryRead16(u64 vaddr) override {
+        CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read);
         return memory.Read16(vaddr);
     }
     u32 MemoryRead32(u64 vaddr) override {
+        CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read);
         return memory.Read32(vaddr);
     }
     u64 MemoryRead64(u64 vaddr) override {
+        CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read);
         return memory.Read64(vaddr);
     }
     Vector MemoryRead128(u64 vaddr) override {
+        CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Read);
         return {memory.Read64(vaddr), memory.Read64(vaddr + 8)};
     }
 
     void MemoryWrite8(u64 vaddr, u8 value) override {
-        memory.Write8(vaddr, value);
+        if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) {
+            memory.Write8(vaddr, value);
+        }
     }
     void MemoryWrite16(u64 vaddr, u16 value) override {
-        memory.Write16(vaddr, value);
+        if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) {
+            memory.Write16(vaddr, value);
+        }
     }
     void MemoryWrite32(u64 vaddr, u32 value) override {
-        memory.Write32(vaddr, value);
+        if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) {
+            memory.Write32(vaddr, value);
+        }
     }
     void MemoryWrite64(u64 vaddr, u64 value) override {
-        memory.Write64(vaddr, value);
+        if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) {
+            memory.Write64(vaddr, value);
+        }
     }
     void MemoryWrite128(u64 vaddr, Vector value) override {
-        memory.Write64(vaddr, value[0]);
-        memory.Write64(vaddr + 8, value[1]);
+        if (CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write)) {
+            memory.Write64(vaddr, value[0]);
+            memory.Write64(vaddr + 8, value[1]);
+        }
     }
 
     bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override {
-        return memory.WriteExclusive8(vaddr, value, expected);
+        return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) &&
+               memory.WriteExclusive8(vaddr, value, expected);
     }
     bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override {
-        return memory.WriteExclusive16(vaddr, value, expected);
+        return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) &&
+               memory.WriteExclusive16(vaddr, value, expected);
     }
     bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override {
-        return memory.WriteExclusive32(vaddr, value, expected);
+        return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) &&
+               memory.WriteExclusive32(vaddr, value, expected);
     }
     bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override {
-        return memory.WriteExclusive64(vaddr, value, expected);
+        return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) &&
+               memory.WriteExclusive64(vaddr, value, expected);
     }
     bool MemoryWriteExclusive128(u64 vaddr, Vector value, Vector expected) override {
-        return memory.WriteExclusive128(vaddr, value, expected);
+        return CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write) &&
+               memory.WriteExclusive128(vaddr, value, expected);
     }
 
     void InterpreterFallback(u64 pc, std::size_t num_instructions) override {
@@ -118,8 +139,8 @@ public:
         case Dynarmic::A64::Exception::Yield:
             return;
         default:
-            if (parent.system.DebuggerEnabled()) {
-                parent.jit.load()->SetPC(pc);
+            if (debugger_enabled) {
+                parent.SaveContext(parent.breakpoint_context);
                 parent.jit.load()->HaltExecution(ARM_Interface::breakpoint);
                 return;
             }
@@ -160,10 +181,27 @@ public:
         return parent.system.CoreTiming().GetClockTicks();
     }
 
+    bool CheckMemoryAccess(VAddr addr, u64 size, Kernel::DebugWatchpointType type) {
+        if (!debugger_enabled) {
+            return true;
+        }
+
+        const auto match{parent.MatchingWatchpoint(addr, size, type)};
+        if (match) {
+            parent.SaveContext(parent.breakpoint_context);
+            parent.jit.load()->HaltExecution(ARM_Interface::watchpoint);
+            parent.halted_watchpoint = match;
+            return false;
+        }
+
+        return true;
+    }
+
     ARM_Dynarmic_64& parent;
     Core::Memory::Memory& memory;
     u64 tpidrro_el0 = 0;
     u64 tpidr_el0 = 0;
+    bool debugger_enabled{};
     static constexpr u64 minimum_run_cycles = 1000U;
 };
 
@@ -214,6 +252,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
     config.code_cache_size = 512_MiB;
     config.far_code_offset = 400_MiB;
 
+    // Allow memory fault handling to work
+    if (system.DebuggerEnabled()) {
+        config.check_halt_on_memory_access = true;
+    }
+
     // null_jit
     if (!page_table) {
         // Don't waste too much memory on null_jit
@@ -308,6 +351,14 @@ u32 ARM_Dynarmic_64::GetSvcNumber() const {
     return svc_swi;
 }
 
+const Kernel::DebugWatchpoint* ARM_Dynarmic_64::HaltedWatchpoint() const {
+    return halted_watchpoint;
+}
+
+void ARM_Dynarmic_64::RewindBreakpointInstruction() {
+    LoadContext(breakpoint_context);
+}
+
 ARM_Dynarmic_64::ARM_Dynarmic_64(System& system_, CPUInterrupts& interrupt_handlers_,
                                  bool uses_wall_clock_, ExclusiveMonitor& exclusive_monitor_,
                                  std::size_t core_index_)
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index abfbc3c3f3..71dbaac5e1 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -66,6 +66,8 @@ protected:
     Dynarmic::HaltReason RunJit() override;
     Dynarmic::HaltReason StepJit() override;
     u32 GetSvcNumber() const override;
+    const Kernel::DebugWatchpoint* HaltedWatchpoint() const override;
+    void RewindBreakpointInstruction() override;
 
 private:
     std::shared_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable* page_table,
@@ -91,6 +93,10 @@ private:
 
     // SVC callback
     u32 svc_swi{};
+
+    // Breakpoint info
+    const Kernel::DebugWatchpoint* halted_watchpoint;
+    ThreadContext64 breakpoint_context;
 };
 
 } // namespace Core
diff --git a/src/core/debugger/debugger.cpp b/src/core/debugger/debugger.cpp
index ab39409223..ac64d2f9d3 100644
--- a/src/core/debugger/debugger.cpp
+++ b/src/core/debugger/debugger.cpp
@@ -44,12 +44,14 @@ static std::span<const u8> ReceiveInto(Readable& r, Buffer& buffer) {
 
 enum class SignalType {
     Stopped,
+    Watchpoint,
     ShuttingDown,
 };
 
 struct SignalInfo {
     SignalType type;
     Kernel::KThread* thread;
+    const Kernel::DebugWatchpoint* watchpoint;
 };
 
 namespace Core {
@@ -157,13 +159,19 @@ private:
     void PipeData(std::span<const u8> data) {
         switch (info.type) {
         case SignalType::Stopped:
+        case SignalType::Watchpoint:
             // Stop emulation.
             PauseEmulation();
 
             // Notify the client.
             active_thread = info.thread;
             UpdateActiveThread();
-            frontend->Stopped(active_thread);
+
+            if (info.type == SignalType::Watchpoint) {
+                frontend->Watchpoint(active_thread, *info.watchpoint);
+            } else {
+                frontend->Stopped(active_thread);
+            }
 
             break;
         case SignalType::ShuttingDown:
@@ -290,12 +298,17 @@ Debugger::Debugger(Core::System& system, u16 port) {
 Debugger::~Debugger() = default;
 
 bool Debugger::NotifyThreadStopped(Kernel::KThread* thread) {
-    return impl && impl->SignalDebugger(SignalInfo{SignalType::Stopped, thread});
+    return impl && impl->SignalDebugger(SignalInfo{SignalType::Stopped, thread, nullptr});
+}
+
+bool Debugger::NotifyThreadWatchpoint(Kernel::KThread* thread,
+                                      const Kernel::DebugWatchpoint& watch) {
+    return impl && impl->SignalDebugger(SignalInfo{SignalType::Watchpoint, thread, &watch});
 }
 
 void Debugger::NotifyShutdown() {
     if (impl) {
-        impl->SignalDebugger(SignalInfo{SignalType::ShuttingDown, nullptr});
+        impl->SignalDebugger(SignalInfo{SignalType::ShuttingDown, nullptr, nullptr});
     }
 }
 
diff --git a/src/core/debugger/debugger.h b/src/core/debugger/debugger.h
index f9738ca3db..b2f5033767 100644
--- a/src/core/debugger/debugger.h
+++ b/src/core/debugger/debugger.h
@@ -9,7 +9,8 @@
 
 namespace Kernel {
 class KThread;
-}
+struct DebugWatchpoint;
+} // namespace Kernel
 
 namespace Core {
 class System;
@@ -40,6 +41,11 @@ public:
      */
     void NotifyShutdown();
 
+    /*
+     * Notify the debugger that the given thread has stopped due to hitting a watchpoint.
+     */
+    bool NotifyThreadWatchpoint(Kernel::KThread* thread, const Kernel::DebugWatchpoint& watch);
+
 private:
     std::unique_ptr<DebuggerImpl> impl;
 };
diff --git a/src/core/debugger/debugger_interface.h b/src/core/debugger/debugger_interface.h
index c0bb4ecafe..5b31edc430 100644
--- a/src/core/debugger/debugger_interface.h
+++ b/src/core/debugger/debugger_interface.h
@@ -11,7 +11,8 @@
 
 namespace Kernel {
 class KThread;
-}
+struct DebugWatchpoint;
+} // namespace Kernel
 
 namespace Core {
 
@@ -71,6 +72,11 @@ public:
      */
     virtual void ShuttingDown() = 0;
 
+    /*
+     * Called when emulation has stopped on a watchpoint.
+     */
+    virtual void Watchpoint(Kernel::KThread* thread, const Kernel::DebugWatchpoint& watch) = 0;
+
     /**
      * Called when new data is asynchronously received on the client socket.
      * A list of actions to perform is returned.
diff --git a/src/core/debugger/gdbstub.cpp b/src/core/debugger/gdbstub.cpp
index 52e76f6590..f5e9a303dd 100644
--- a/src/core/debugger/gdbstub.cpp
+++ b/src/core/debugger/gdbstub.cpp
@@ -112,6 +112,23 @@ void GDBStub::Stopped(Kernel::KThread* thread) {
     SendReply(arch->ThreadStatus(thread, GDB_STUB_SIGTRAP));
 }
 
+void GDBStub::Watchpoint(Kernel::KThread* thread, const Kernel::DebugWatchpoint& watch) {
+    const auto status{arch->ThreadStatus(thread, GDB_STUB_SIGTRAP)};
+
+    switch (watch.type) {
+    case Kernel::DebugWatchpointType::Read:
+        SendReply(fmt::format("{}rwatch:{:x};", status, watch.start_address));
+        break;
+    case Kernel::DebugWatchpointType::Write:
+        SendReply(fmt::format("{}watch:{:x};", status, watch.start_address));
+        break;
+    case Kernel::DebugWatchpointType::ReadOrWrite:
+    default:
+        SendReply(fmt::format("{}awatch:{:x};", status, watch.start_address));
+        break;
+    }
+}
+
 std::vector<DebuggerAction> GDBStub::ClientData(std::span<const u8> data) {
     std::vector<DebuggerAction> actions;
     current_command.insert(current_command.end(), data.begin(), data.end());
@@ -278,44 +295,124 @@ void GDBStub::ExecuteCommand(std::string_view packet, std::vector<DebuggerAction
     case 'c':
         actions.push_back(DebuggerAction::Continue);
         break;
-    case 'Z': {
-        const auto addr_sep{std::find(command.begin(), command.end(), ',') - command.begin() + 1};
-        const size_t addr{static_cast<size_t>(strtoll(command.data() + addr_sep, nullptr, 16))};
-
-        if (system.Memory().IsValidVirtualAddress(addr)) {
-            replaced_instructions[addr] = system.Memory().Read32(addr);
-            system.Memory().Write32(addr, arch->BreakpointInstruction());
-            system.InvalidateCpuInstructionCacheRange(addr, sizeof(u32));
-
-            SendReply(GDB_STUB_REPLY_OK);
-        } else {
-            SendReply(GDB_STUB_REPLY_ERR);
-        }
+    case 'Z':
+        HandleBreakpointInsert(command);
         break;
-    }
-    case 'z': {
-        const auto addr_sep{std::find(command.begin(), command.end(), ',') - command.begin() + 1};
-        const size_t addr{static_cast<size_t>(strtoll(command.data() + addr_sep, nullptr, 16))};
-
-        const auto orig_insn{replaced_instructions.find(addr)};
-        if (system.Memory().IsValidVirtualAddress(addr) &&
-            orig_insn != replaced_instructions.end()) {
-            system.Memory().Write32(addr, orig_insn->second);
-            system.InvalidateCpuInstructionCacheRange(addr, sizeof(u32));
-            replaced_instructions.erase(addr);
-
-            SendReply(GDB_STUB_REPLY_OK);
-        } else {
-            SendReply(GDB_STUB_REPLY_ERR);
-        }
+    case 'z':
+        HandleBreakpointRemove(command);
         break;
-    }
     default:
         SendReply(GDB_STUB_REPLY_EMPTY);
         break;
     }
 }
 
+enum class BreakpointType {
+    Software = 0,
+    Hardware = 1,
+    WriteWatch = 2,
+    ReadWatch = 3,
+    AccessWatch = 4,
+};
+
+void GDBStub::HandleBreakpointInsert(std::string_view command) {
+    const auto type{static_cast<BreakpointType>(strtoll(command.data(), nullptr, 16))};
+    const auto addr_sep{std::find(command.begin(), command.end(), ',') - command.begin() + 1};
+    const auto size_sep{std::find(command.begin() + addr_sep, command.end(), ',') -
+                        command.begin() + 1};
+    const size_t addr{static_cast<size_t>(strtoll(command.data() + addr_sep, nullptr, 16))};
+    const size_t size{static_cast<size_t>(strtoll(command.data() + size_sep, nullptr, 16))};
+
+    if (!system.Memory().IsValidVirtualAddressRange(addr, size)) {
+        SendReply(GDB_STUB_REPLY_ERR);
+        return;
+    }
+
+    bool success{};
+
+    switch (type) {
+    case BreakpointType::Software:
+        replaced_instructions[addr] = system.Memory().Read32(addr);
+        system.Memory().Write32(addr, arch->BreakpointInstruction());
+        system.InvalidateCpuInstructionCacheRange(addr, sizeof(u32));
+        success = true;
+        break;
+    case BreakpointType::WriteWatch:
+        success = system.CurrentProcess()->InsertWatchpoint(system, addr, size,
+                                                            Kernel::DebugWatchpointType::Write);
+        break;
+    case BreakpointType::ReadWatch:
+        success = system.CurrentProcess()->InsertWatchpoint(system, addr, size,
+                                                            Kernel::DebugWatchpointType::Read);
+        break;
+    case BreakpointType::AccessWatch:
+        success = system.CurrentProcess()->InsertWatchpoint(
+            system, addr, size, Kernel::DebugWatchpointType::ReadOrWrite);
+        break;
+    case BreakpointType::Hardware:
+    default:
+        SendReply(GDB_STUB_REPLY_EMPTY);
+        return;
+    }
+
+    if (success) {
+        SendReply(GDB_STUB_REPLY_OK);
+    } else {
+        SendReply(GDB_STUB_REPLY_ERR);
+    }
+}
+
+void GDBStub::HandleBreakpointRemove(std::string_view command) {
+    const auto type{static_cast<BreakpointType>(strtoll(command.data(), nullptr, 16))};
+    const auto addr_sep{std::find(command.begin(), command.end(), ',') - command.begin() + 1};
+    const auto size_sep{std::find(command.begin() + addr_sep, command.end(), ',') -
+                        command.begin() + 1};
+    const size_t addr{static_cast<size_t>(strtoll(command.data() + addr_sep, nullptr, 16))};
+    const size_t size{static_cast<size_t>(strtoll(command.data() + size_sep, nullptr, 16))};
+
+    if (!system.Memory().IsValidVirtualAddressRange(addr, size)) {
+        SendReply(GDB_STUB_REPLY_ERR);
+        return;
+    }
+
+    bool success{};
+
+    switch (type) {
+    case BreakpointType::Software: {
+        const auto orig_insn{replaced_instructions.find(addr)};
+        if (orig_insn != replaced_instructions.end()) {
+            system.Memory().Write32(addr, orig_insn->second);
+            system.InvalidateCpuInstructionCacheRange(addr, sizeof(u32));
+            replaced_instructions.erase(addr);
+            success = true;
+        }
+        break;
+    }
+    case BreakpointType::WriteWatch:
+        success = system.CurrentProcess()->RemoveWatchpoint(system, addr, size,
+                                                            Kernel::DebugWatchpointType::Write);
+        break;
+    case BreakpointType::ReadWatch:
+        success = system.CurrentProcess()->RemoveWatchpoint(system, addr, size,
+                                                            Kernel::DebugWatchpointType::Read);
+        break;
+    case BreakpointType::AccessWatch:
+        success = system.CurrentProcess()->RemoveWatchpoint(
+            system, addr, size, Kernel::DebugWatchpointType::ReadOrWrite);
+        break;
+    case BreakpointType::Hardware:
+    default:
+        SendReply(GDB_STUB_REPLY_EMPTY);
+        return;
+    }
+
+    if (success) {
+        SendReply(GDB_STUB_REPLY_OK);
+    } else {
+        SendReply(GDB_STUB_REPLY_ERR);
+    }
+}
+
 // Structure offsets are from Atmosphere
 // See osdbg_thread_local_region.os.horizon.hpp and osdbg_thread_type.os.horizon.hpp
 
diff --git a/src/core/debugger/gdbstub.h b/src/core/debugger/gdbstub.h
index ec934c77e7..0b0f56e4bf 100644
--- a/src/core/debugger/gdbstub.h
+++ b/src/core/debugger/gdbstub.h
@@ -24,6 +24,7 @@ public:
     void Connected() override;
     void Stopped(Kernel::KThread* thread) override;
     void ShuttingDown() override;
+    void Watchpoint(Kernel::KThread* thread, const Kernel::DebugWatchpoint& watch) override;
     std::vector<DebuggerAction> ClientData(std::span<const u8> data) override;
 
 private:
@@ -31,6 +32,8 @@ private:
     void ExecuteCommand(std::string_view packet, std::vector<DebuggerAction>& actions);
     void HandleVCont(std::string_view command, std::vector<DebuggerAction>& actions);
     void HandleQuery(std::string_view command);
+    void HandleBreakpointInsert(std::string_view command);
+    void HandleBreakpointRemove(std::string_view command);
     std::vector<char>::const_iterator CommandEnd() const;
     std::optional<std::string> DetachCommand();
     Kernel::KThread* GetThreadByID(u64 thread_id);
diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h
index aac362c51e..13cbdb734c 100644
--- a/src/core/hardware_properties.h
+++ b/src/core/hardware_properties.h
@@ -25,6 +25,9 @@ constexpr std::array<s32, Common::BitSize<u64>()> VirtualToPhysicalCoreMap{
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
 };
 
+// Cortex-A57 supports 4 memory watchpoints
+constexpr u64 NUM_WATCHPOINTS = 4;
+
 } // namespace Hardware
 
 } // namespace Core
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index cd863e7156..6a73f67837 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -584,6 +584,52 @@ ResultCode KProcess::DeleteThreadLocalRegion(VAddr addr) {
     return ResultSuccess;
 }
 
+bool KProcess::InsertWatchpoint(Core::System& system, VAddr addr, u64 size,
+                                DebugWatchpointType type) {
+    const auto watch{std::find_if(watchpoints.begin(), watchpoints.end(), [&](const auto& wp) {
+        return wp.type == DebugWatchpointType::None;
+    })};
+
+    if (watch == watchpoints.end()) {
+        return false;
+    }
+
+    watch->start_address = addr;
+    watch->end_address = addr + size;
+    watch->type = type;
+
+    for (VAddr page = Common::AlignDown(addr, PageSize); page < addr + size; page += PageSize) {
+        debug_page_refcounts[page]++;
+        system.Memory().MarkRegionDebug(page, PageSize, true);
+    }
+
+    return true;
+}
+
+bool KProcess::RemoveWatchpoint(Core::System& system, VAddr addr, u64 size,
+                                DebugWatchpointType type) {
+    const auto watch{std::find_if(watchpoints.begin(), watchpoints.end(), [&](const auto& wp) {
+        return wp.start_address == addr && wp.end_address == addr + size && wp.type == type;
+    })};
+
+    if (watch == watchpoints.end()) {
+        return false;
+    }
+
+    watch->start_address = 0;
+    watch->end_address = 0;
+    watch->type = DebugWatchpointType::None;
+
+    for (VAddr page = Common::AlignDown(addr, PageSize); page < addr + size; page += PageSize) {
+        debug_page_refcounts[page]--;
+        if (!debug_page_refcounts[page]) {
+            system.Memory().MarkRegionDebug(page, PageSize, false);
+        }
+    }
+
+    return true;
+}
+
 void KProcess::LoadModule(CodeSet code_set, VAddr base_addr) {
     const auto ReprotectSegment = [&](const CodeSet::Segment& segment,
                                       Svc::MemoryPermission permission) {
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index e562a79b8c..c2086e5baa 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -7,6 +7,7 @@
 #include <array>
 #include <cstddef>
 #include <list>
+#include <map>
 #include <string>
 #include "common/common_types.h"
 #include "core/hle/kernel/k_address_arbiter.h"
@@ -68,6 +69,20 @@ enum class ProcessActivity : u32 {
     Paused,
 };
 
+enum class DebugWatchpointType : u8 {
+    None = 0,
+    Read = 1 << 0,
+    Write = 1 << 1,
+    ReadOrWrite = Read | Write,
+};
+DECLARE_ENUM_FLAG_OPERATORS(DebugWatchpointType);
+
+struct DebugWatchpoint {
+    VAddr start_address;
+    VAddr end_address;
+    DebugWatchpointType type;
+};
+
 class KProcess final : public KAutoObjectWithSlabHeapAndContainer<KProcess, KWorkerTask> {
     KERNEL_AUTOOBJECT_TRAITS(KProcess, KSynchronizationObject);
 
@@ -374,6 +389,19 @@ public:
     // Frees a used TLS slot identified by the given address
     ResultCode DeleteThreadLocalRegion(VAddr addr);
 
+    ///////////////////////////////////////////////////////////////////////////////////////////////
+    // Debug watchpoint management
+
+    // Attempts to insert a watchpoint into a free slot. Returns false if none are available.
+    bool InsertWatchpoint(Core::System& system, VAddr addr, u64 size, DebugWatchpointType type);
+
+    // Attempts to remove the watchpoint specified by the given parameters.
+    bool RemoveWatchpoint(Core::System& system, VAddr addr, u64 size, DebugWatchpointType type);
+
+    const std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS>& GetWatchpoints() const {
+        return watchpoints;
+    }
+
 private:
     void PinThread(s32 core_id, KThread* thread) {
         ASSERT(0 <= core_id && core_id < static_cast<s32>(Core::Hardware::NUM_CPU_CORES));
@@ -478,6 +506,8 @@ private:
     std::array<KThread*, Core::Hardware::NUM_CPU_CORES> running_threads{};
     std::array<u64, Core::Hardware::NUM_CPU_CORES> running_thread_idle_counts{};
     std::array<KThread*, Core::Hardware::NUM_CPU_CORES> pinned_threads{};
+    std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS> watchpoints{};
+    std::map<VAddr, u64> debug_page_refcounts;
 
     KThread* exception_thread{};
 
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp
index 2d4e8637b3..edd0e4eae7 100644
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -710,6 +710,7 @@ void KScheduler::Reload(KThread* thread) {
     Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
     cpu_core.LoadContext(thread->GetContext32());
     cpu_core.LoadContext(thread->GetContext64());
+    cpu_core.LoadWatchpointArray(thread->GetOwnerProcess()->GetWatchpoints());
     cpu_core.SetTlsAddress(thread->GetTLSAddress());
     cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
     cpu_core.ClearExclusiveState();
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 7534de01eb..584808d50d 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -67,6 +67,16 @@ struct Memory::Impl {
         return system.DeviceMemory().GetPointer(paddr) + vaddr;
     }
 
+    [[nodiscard]] u8* GetPointerFromDebugMemory(VAddr vaddr) const {
+        const PAddr paddr{current_page_table->backing_addr[vaddr >> PAGE_BITS]};
+
+        if (paddr == 0) {
+            return {};
+        }
+
+        return system.DeviceMemory().GetPointer(paddr) + vaddr;
+    }
+
     u8 Read8(const VAddr addr) {
         return Read<u8>(addr);
     }
@@ -187,6 +197,12 @@ struct Memory::Impl {
                 on_memory(copy_amount, mem_ptr);
                 break;
             }
+            case Common::PageType::DebugMemory: {
+                DEBUG_ASSERT(pointer);
+                u8* const mem_ptr{GetPointerFromDebugMemory(current_vaddr)};
+                on_memory(copy_amount, mem_ptr);
+                break;
+            }
             case Common::PageType::RasterizerCachedMemory: {
                 u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
                 on_rasterizer(current_vaddr, copy_amount, host_ptr);
@@ -316,6 +332,58 @@ struct Memory::Impl {
             });
     }
 
+    void MarkRegionDebug(VAddr vaddr, u64 size, bool debug) {
+        if (vaddr == 0) {
+            return;
+        }
+
+        // Iterate over a contiguous CPU address space, marking/unmarking the region.
+        // The region is at a granularity of CPU pages.
+
+        const u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
+        for (u64 i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
+            const Common::PageType page_type{
+                current_page_table->pointers[vaddr >> PAGE_BITS].Type()};
+            if (debug) {
+                // Switch page type to debug if now debug
+                switch (page_type) {
+                case Common::PageType::Unmapped:
+                    ASSERT_MSG(false, "Attempted to mark unmapped pages as debug");
+                    break;
+                case Common::PageType::RasterizerCachedMemory:
+                case Common::PageType::DebugMemory:
+                    // Page is already marked.
+                    break;
+                case Common::PageType::Memory:
+                    current_page_table->pointers[vaddr >> PAGE_BITS].Store(
+                        nullptr, Common::PageType::DebugMemory);
+                    break;
+                default:
+                    UNREACHABLE();
+                }
+            } else {
+                // Switch page type to non-debug if now non-debug
+                switch (page_type) {
+                case Common::PageType::Unmapped:
+                    ASSERT_MSG(false, "Attempted to mark unmapped pages as non-debug");
+                    break;
+                case Common::PageType::RasterizerCachedMemory:
+                case Common::PageType::Memory:
+                    // Don't mess with already non-debug or rasterizer memory.
+                    break;
+                case Common::PageType::DebugMemory: {
+                    u8* const pointer{GetPointerFromDebugMemory(vaddr & ~PAGE_MASK)};
+                    current_page_table->pointers[vaddr >> PAGE_BITS].Store(
+                        pointer - (vaddr & ~PAGE_MASK), Common::PageType::Memory);
+                    break;
+                }
+                default:
+                    UNREACHABLE();
+                }
+            }
+        }
+    }
+
     void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
         if (vaddr == 0) {
             return;
@@ -342,6 +410,7 @@ struct Memory::Impl {
                     // It is not necessary for a process to have this region mapped into its address
                     // space, for example, a system module need not have a VRAM mapping.
                     break;
+                case Common::PageType::DebugMemory:
                 case Common::PageType::Memory:
                     current_page_table->pointers[vaddr >> PAGE_BITS].Store(
                         nullptr, Common::PageType::RasterizerCachedMemory);
@@ -360,6 +429,7 @@ struct Memory::Impl {
                     // It is not necessary for a process to have this region mapped into its address
                     // space, for example, a system module need not have a VRAM mapping.
                     break;
+                case Common::PageType::DebugMemory:
                 case Common::PageType::Memory:
                     // There can be more than one GPU region mapped per CPU region, so it's common
                     // that this area is already unmarked as cached.
@@ -460,6 +530,8 @@ struct Memory::Impl {
         case Common::PageType::Memory:
             ASSERT_MSG(false, "Mapped memory page without a pointer @ 0x{:016X}", vaddr);
             return nullptr;
+        case Common::PageType::DebugMemory:
+            return GetPointerFromDebugMemory(vaddr);
         case Common::PageType::RasterizerCachedMemory: {
             u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
             on_rasterizer();
@@ -591,7 +663,8 @@ bool Memory::IsValidVirtualAddress(const VAddr vaddr) const {
         return false;
     }
     const auto [pointer, type] = page_table.pointers[page].PointerType();
-    return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory;
+    return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory ||
+           type == Common::PageType::DebugMemory;
 }
 
 bool Memory::IsValidVirtualAddressRange(VAddr base, u64 size) const {
@@ -707,4 +780,8 @@ void Memory::RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
     impl->RasterizerMarkRegionCached(vaddr, size, cached);
 }
 
+void Memory::MarkRegionDebug(VAddr vaddr, u64 size, bool debug) {
+    impl->MarkRegionDebug(vaddr, size, debug);
+}
+
 } // namespace Core::Memory
diff --git a/src/core/memory.h b/src/core/memory.h
index 58cc27b299..f22c0a2d87 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -446,6 +446,17 @@ public:
      */
     void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
 
+    /**
+     * Marks each page within the specified address range as debug or non-debug.
+     * Debug addresses are not accessible from fastmem pointers.
+     *
+     * @param vaddr The virtual address indicating the start of the address range.
+     * @param size  The size of the address range in bytes.
+     * @param debug Whether or not any pages within the address range should be
+     *              marked as debug or non-debug.
+     */
+    void MarkRegionDebug(VAddr vaddr, u64 size, bool debug);
+
 private:
     Core::System& system;