Jit: Use RangeSet for physical_addresses

This makes JitBaseBlockCache::ErasePhysicalRange around 50% faster and
PPCAnalyzer::Analyze around 40% faster. Rogue Squadron 2's notoriously
laggy action of switching to and from cockpit view is made something
like 20-30% faster by this, though this is a very rough measurement.
This commit is contained in:
JosJuice 2026-02-14 14:42:42 +01:00
parent 36f45dce44
commit c1a26808ce
8 changed files with 67 additions and 24 deletions

View File

@ -24,6 +24,8 @@ public:
const T& to() const { return It->second; }
std::pair<T, T> operator*() { return {from(), to()}; }
const_iterator& operator++()
{
++It;
@ -243,6 +245,18 @@ public:
return get_from(it) <= value && value < get_to(it);
}
bool overlaps(T from, T to) const
{
if (from >= to)
return false;
auto it = Map.lower_bound(to);
if (it == Map.begin())
return false;
--it;
return get_from(it) < to && from < get_to(it);
}
std::size_t size() const { return Map.size(); }
bool empty() const { return Map.empty(); }

View File

@ -69,6 +69,8 @@ public:
const T& to() const { return It->second.To; }
std::pair<T, T> operator*() { return {from(), to()}; }
const_iterator& operator++()
{
++It;
@ -351,6 +353,18 @@ public:
return get_from(it) <= value && value < get_to(it);
}
bool overlaps(T from, T to) const
{
if (from >= to)
return false;
auto it = Map.lower_bound(to);
if (it == Map.begin())
return false;
--it;
return get_from(it) < to && from < get_to(it);
}
std::size_t size() const { return Map.size(); }
bool empty() const { return Map.empty(); }

View File

@ -7,9 +7,8 @@
#include <map>
#include <optional>
#include <rangeset/rangesizeset.h>
#include "Common/Arm64Emitter.h"
#include "Common/RangeSizeSet.h"
#include "Core/PowerPC/JitArm64/JitArm64Cache.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
@ -431,10 +430,10 @@ protected:
u8* m_near_code_end = nullptr;
bool m_near_code_write_failed = false;
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_near_0;
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_near_1;
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_far_0;
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_far_1;
Common::RangeSizeSet<u8*> m_free_ranges_near_0;
Common::RangeSizeSet<u8*> m_free_ranges_near_1;
Common::RangeSizeSet<u8*> m_free_ranges_far_0;
Common::RangeSizeSet<u8*> m_free_ranges_far_1;
std::unique_ptr<HostDisassembler> m_disassembler;
};

View File

@ -31,8 +31,7 @@ using namespace Gen;
bool JitBlock::OverlapsPhysicalRange(u32 address, u32 length) const
{
return physical_addresses.lower_bound(address) !=
physical_addresses.lower_bound(address + length);
return physical_addresses.overlaps(address, address + length);
}
void JitBlock::ProfileData::BeginProfiling(ProfileData* data)
@ -171,10 +170,13 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link,
original_buffer_transform_view.end());
}
for (u32 addr : block.physical_addresses)
for (auto [range_start, range_end] : block.physical_addresses)
{
valid_block.Set(addr / 32);
block_range_map[addr & BLOCK_RANGE_MAP_MASK].insert(&block);
for (u32 i = range_start & ~31; i < range_end; i += 32)
valid_block.Set(i / 32);
for (u32 i = range_start & BLOCK_RANGE_MAP_MASK; i < range_end; i += BLOCK_RANGE_SIZE)
block_range_map[i].insert(&block);
}
if (block_link)
@ -362,9 +364,15 @@ void JitBaseBlockCache::ErasePhysicalRange(u32 address, u32 length)
{
// If the block overlaps, also remove all other occupied slots in the other macro blocks.
// This will leak empty macro blocks, but they may be reused or cleared later on.
for (u32 addr : block->physical_addresses)
if ((addr & BLOCK_RANGE_MAP_MASK) != start->first)
block_range_map[addr & BLOCK_RANGE_MAP_MASK].erase(block);
for (auto [range_start, range_end] : block->physical_addresses)
{
DEBUG_ASSERT(range_start != range_end);
for (u32 i = range_start & BLOCK_RANGE_MAP_MASK; i < range_end; i += BLOCK_RANGE_SIZE)
{
if (i != start->first)
block_range_map[i].erase(block);
}
}
// And remove the block.
DestroyBlock(*block);
@ -404,8 +412,11 @@ void JitBaseBlockCache::EraseSingleBlock(const JitBlock& block)
JitBlock& mutable_block = block_map_iter->second;
for (const u32 addr : mutable_block.physical_addresses)
block_range_map[addr & BLOCK_RANGE_MAP_MASK].erase(&mutable_block);
for (auto [range_start, range_end] : mutable_block.physical_addresses)
{
for (u32 i = range_start & BLOCK_RANGE_MAP_MASK; i < range_end; i += BLOCK_RANGE_SIZE)
block_range_map[i].erase(&mutable_block);
}
DestroyBlock(mutable_block);
block_map.erase(block_map_iter); // The original JitBlock reference is now dangling.

View File

@ -10,13 +10,13 @@
#include <functional>
#include <map>
#include <memory>
#include <set>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "Common/CommonTypes.h"
#include "Common/RangeSet.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/PPCAnalyst.h"
@ -101,7 +101,7 @@ struct JitBlock : public JitBlockData
std::vector<LinkData> linkData;
// This set stores all physical addresses of all occupied instructions.
std::set<u32> physical_addresses;
Common::RangeSet<u32> physical_addresses;
// This is only available when debugging is enabled. It is a trimmed-down copy of the
// PPCAnalyst::CodeBuffer used to recompile this block, including repeat instructions.
@ -218,7 +218,8 @@ private:
// Range of overlapping code indexed by a masked physical address.
// This is used for invalidation of memory regions. The range is grouped
// in macro blocks of each 0x100 bytes.
static constexpr u32 BLOCK_RANGE_MAP_MASK = ~(0x100 - 1);
static constexpr u32 BLOCK_RANGE_SIZE = 0x100;
static constexpr u32 BLOCK_RANGE_MAP_MASK = ~(BLOCK_RANGE_SIZE - 1);
std::map<u32, std::unordered_set<JitBlock*>> block_range_map;
// This bitsets shows which cachelines overlap with any blocks.

View File

@ -849,7 +849,8 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
code[i].inst = inst;
code[i].skip = false;
block->m_stats->numCycles += opinfo->num_cycles;
block->m_physical_addresses.insert(result.physical_address);
block->m_physical_addresses.insert(result.physical_address,
result.physical_address + sizeof(UGeckoInstruction));
SetInstructionStats(block, &code[i], opinfo);

View File

@ -5,11 +5,11 @@
#include <algorithm>
#include <cstddef>
#include <set>
#include <vector>
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/RangeSet.h"
#include "Core/PowerPC/PPCTables.h"
class PPCSymbolDB;
@ -129,7 +129,7 @@ struct CodeBlock
BitSet32 m_gpr_inputs;
// Which memory locations are occupied by this block.
std::set<u32> m_physical_addresses;
Common::RangeSet<u32> m_physical_addresses;
};
class PPCAnalyzer

View File

@ -236,7 +236,8 @@ QVariant JitBlockTableModel::DisplayRoleData(const QModelIndex& index) const
case Column::CodeBufferSize:
return QString::number(jit_block.originalSize * sizeof(UGeckoInstruction));
case Column::RepeatInstructions:
return QString::number(jit_block.originalSize - jit_block.physical_addresses.size());
return QString::number(jit_block.originalSize - jit_block.physical_addresses.get_stats().first /
sizeof(UGeckoInstruction));
case Column::HostNearCodeSize:
return QString::number(jit_block.near_end - jit_block.near_begin);
case Column::HostFarCodeSize:
@ -329,7 +330,9 @@ QVariant JitBlockTableModel::SortRoleData(const QModelIndex& index) const
case Column::CodeBufferSize:
return static_cast<qulonglong>(jit_block.originalSize);
case Column::RepeatInstructions:
return static_cast<qulonglong>(jit_block.originalSize - jit_block.physical_addresses.size());
return static_cast<qulonglong>(jit_block.originalSize -
jit_block.physical_addresses.get_stats().first /
sizeof(UGeckoInstruction));
case Column::HostNearCodeSize:
return static_cast<qulonglong>(jit_block.near_end - jit_block.near_begin);
case Column::HostFarCodeSize: