kernel: Implement host thread register methods without locking

Locks on GetCurrentHostThreadID were causing performance issues
according to Visual Studio's profiler. It was consuming twice the time
as arm_interface.Run(). The cost was not in the function itself but in
the lockinig it required.

Reimplement these functions using atomics and static storage instead of
an unordered_map. This is a side effect to avoid locking and using linked
lists for reads.

Replace unordered_map with a linear search.
This commit is contained in:
ReinUsesLisp 2020-10-13 18:00:25 -03:00
parent d291fc1a51
commit b9a9b83bee

View File

@ -7,7 +7,6 @@
#include <bitset> #include <bitset>
#include <functional> #include <functional>
#include <memory> #include <memory>
#include <mutex>
#include <thread> #include <thread>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
@ -107,7 +106,11 @@ struct KernelCore::Impl {
cores.clear(); cores.clear();
exclusive_monitor.reset(); exclusive_monitor.reset();
host_thread_ids.clear();
num_host_threads = 0;
std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(),
std::thread::id{});
std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0);
} }
void InitializePhysicalCores() { void InitializePhysicalCores() {
@ -177,54 +180,56 @@ struct KernelCore::Impl {
void MakeCurrentProcess(Process* process) { void MakeCurrentProcess(Process* process) {
current_process = process; current_process = process;
if (process == nullptr) { if (process == nullptr) {
return; return;
} }
const u32 core_id = GetCurrentHostThreadID();
u32 core_id = GetCurrentHostThreadID();
if (core_id < Core::Hardware::NUM_CPU_CORES) { if (core_id < Core::Hardware::NUM_CPU_CORES) {
system.Memory().SetCurrentPageTable(*process, core_id); system.Memory().SetCurrentPageTable(*process, core_id);
} }
} }
void RegisterCoreThread(std::size_t core_id) { void RegisterCoreThread(std::size_t core_id) {
std::unique_lock lock{register_thread_mutex};
if (!is_multicore) {
single_core_thread_id = std::this_thread::get_id();
}
const std::thread::id this_id = std::this_thread::get_id(); const std::thread::id this_id = std::this_thread::get_id();
const auto it = host_thread_ids.find(this_id); if (!is_multicore) {
single_core_thread_id = this_id;
}
const auto end = register_host_thread_keys.begin() + num_host_threads;
const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
ASSERT(it == host_thread_ids.end()); ASSERT(it == end);
ASSERT(!registered_core_threads[core_id]); ASSERT(!registered_core_threads[core_id]);
host_thread_ids[this_id] = static_cast<u32>(core_id); InsertHostThread(static_cast<u32>(core_id));
registered_core_threads.set(core_id); registered_core_threads.set(core_id);
} }
void RegisterHostThread() { void RegisterHostThread() {
std::unique_lock lock{register_thread_mutex};
const std::thread::id this_id = std::this_thread::get_id(); const std::thread::id this_id = std::this_thread::get_id();
const auto it = host_thread_ids.find(this_id); const auto end = register_host_thread_keys.begin() + num_host_threads;
if (it != host_thread_ids.end()) { const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
return; if (it == end) {
InsertHostThread(registered_thread_ids++);
} }
host_thread_ids[this_id] = registered_thread_ids++;
} }
u32 GetCurrentHostThreadID() const { void InsertHostThread(u32 value) {
const size_t index = num_host_threads++;
ASSERT_MSG(index < NUM_REGISTRABLE_HOST_THREADS, "Too many host threads");
register_host_thread_values[index] = value;
register_host_thread_keys[index] = std::this_thread::get_id();
}
[[nodiscard]] u32 GetCurrentHostThreadID() const {
const std::thread::id this_id = std::this_thread::get_id(); const std::thread::id this_id = std::this_thread::get_id();
if (!is_multicore) { if (!is_multicore && single_core_thread_id == this_id) {
if (single_core_thread_id == this_id) { return static_cast<u32>(system.GetCpuManager().CurrentCore());
return static_cast<u32>(system.GetCpuManager().CurrentCore());
}
} }
std::unique_lock lock{register_thread_mutex}; const auto end = register_host_thread_keys.begin() + num_host_threads;
const auto it = host_thread_ids.find(this_id); const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
if (it == host_thread_ids.end()) { if (it == end) {
return Core::INVALID_HOST_THREAD_ID; return Core::INVALID_HOST_THREAD_ID;
} }
return it->second; return register_host_thread_values[std::distance(register_host_thread_keys.begin(), it)];
} }
Core::EmuThreadHandle GetCurrentEmuThreadID() const { Core::EmuThreadHandle GetCurrentEmuThreadID() const {
@ -322,10 +327,15 @@ struct KernelCore::Impl {
std::vector<Kernel::PhysicalCore> cores; std::vector<Kernel::PhysicalCore> cores;
// 0-3 IDs represent core threads, >3 represent others // 0-3 IDs represent core threads, >3 represent others
std::unordered_map<std::thread::id, u32> host_thread_ids; std::atomic<u32> registered_thread_ids{Core::Hardware::NUM_CPU_CORES};
u32 registered_thread_ids{Core::Hardware::NUM_CPU_CORES};
std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads; std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads;
mutable std::mutex register_thread_mutex;
// Number of host threads is a relatively high number to avoid overflowing
static constexpr size_t NUM_REGISTRABLE_HOST_THREADS = 64;
std::atomic<size_t> num_host_threads{0};
std::array<std::atomic<std::thread::id>, NUM_REGISTRABLE_HOST_THREADS>
register_host_thread_keys{};
std::array<std::atomic<u32>, NUM_REGISTRABLE_HOST_THREADS> register_host_thread_values{};
// Kernel memory management // Kernel memory management
std::unique_ptr<Memory::MemoryManager> memory_manager; std::unique_ptr<Memory::MemoryManager> memory_manager;