mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2026-03-18 19:32:41 +00:00
Jit: Use RangeSet for physical_addresses
This makes JitBaseBlockCache::ErasePhysicalRange around 50% faster and PPCAnalyzer::Analyze around 40% faster. Rogue Squadron 2's notoriously laggy action of switching to and from cockpit view is made something like 20-30% faster by this, though this is a very rough measurement.
This commit is contained in:
parent
36f45dce44
commit
c1a26808ce
@ -24,6 +24,8 @@ public:
|
||||
|
||||
const T& to() const { return It->second; }
|
||||
|
||||
std::pair<T, T> operator*() { return {from(), to()}; }
|
||||
|
||||
const_iterator& operator++()
|
||||
{
|
||||
++It;
|
||||
@ -243,6 +245,18 @@ public:
|
||||
return get_from(it) <= value && value < get_to(it);
|
||||
}
|
||||
|
||||
bool overlaps(T from, T to) const
|
||||
{
|
||||
if (from >= to)
|
||||
return false;
|
||||
|
||||
auto it = Map.lower_bound(to);
|
||||
if (it == Map.begin())
|
||||
return false;
|
||||
--it;
|
||||
return get_from(it) < to && from < get_to(it);
|
||||
}
|
||||
|
||||
std::size_t size() const { return Map.size(); }
|
||||
|
||||
bool empty() const { return Map.empty(); }
|
||||
|
||||
@ -69,6 +69,8 @@ public:
|
||||
|
||||
const T& to() const { return It->second.To; }
|
||||
|
||||
std::pair<T, T> operator*() { return {from(), to()}; }
|
||||
|
||||
const_iterator& operator++()
|
||||
{
|
||||
++It;
|
||||
@ -351,6 +353,18 @@ public:
|
||||
return get_from(it) <= value && value < get_to(it);
|
||||
}
|
||||
|
||||
bool overlaps(T from, T to) const
|
||||
{
|
||||
if (from >= to)
|
||||
return false;
|
||||
|
||||
auto it = Map.lower_bound(to);
|
||||
if (it == Map.begin())
|
||||
return false;
|
||||
--it;
|
||||
return get_from(it) < to && from < get_to(it);
|
||||
}
|
||||
|
||||
std::size_t size() const { return Map.size(); }
|
||||
|
||||
bool empty() const { return Map.empty(); }
|
||||
|
||||
@ -7,9 +7,8 @@
|
||||
#include <map>
|
||||
#include <optional>
|
||||
|
||||
#include <rangeset/rangesizeset.h>
|
||||
|
||||
#include "Common/Arm64Emitter.h"
|
||||
#include "Common/RangeSizeSet.h"
|
||||
|
||||
#include "Core/PowerPC/JitArm64/JitArm64Cache.h"
|
||||
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
|
||||
@ -431,10 +430,10 @@ protected:
|
||||
u8* m_near_code_end = nullptr;
|
||||
bool m_near_code_write_failed = false;
|
||||
|
||||
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_near_0;
|
||||
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_near_1;
|
||||
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_far_0;
|
||||
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_far_1;
|
||||
Common::RangeSizeSet<u8*> m_free_ranges_near_0;
|
||||
Common::RangeSizeSet<u8*> m_free_ranges_near_1;
|
||||
Common::RangeSizeSet<u8*> m_free_ranges_far_0;
|
||||
Common::RangeSizeSet<u8*> m_free_ranges_far_1;
|
||||
|
||||
std::unique_ptr<HostDisassembler> m_disassembler;
|
||||
};
|
||||
|
||||
@ -31,8 +31,7 @@ using namespace Gen;
|
||||
|
||||
bool JitBlock::OverlapsPhysicalRange(u32 address, u32 length) const
|
||||
{
|
||||
return physical_addresses.lower_bound(address) !=
|
||||
physical_addresses.lower_bound(address + length);
|
||||
return physical_addresses.overlaps(address, address + length);
|
||||
}
|
||||
|
||||
void JitBlock::ProfileData::BeginProfiling(ProfileData* data)
|
||||
@ -171,10 +170,13 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link,
|
||||
original_buffer_transform_view.end());
|
||||
}
|
||||
|
||||
for (u32 addr : block.physical_addresses)
|
||||
for (auto [range_start, range_end] : block.physical_addresses)
|
||||
{
|
||||
valid_block.Set(addr / 32);
|
||||
block_range_map[addr & BLOCK_RANGE_MAP_MASK].insert(&block);
|
||||
for (u32 i = range_start & ~31; i < range_end; i += 32)
|
||||
valid_block.Set(i / 32);
|
||||
|
||||
for (u32 i = range_start & BLOCK_RANGE_MAP_MASK; i < range_end; i += BLOCK_RANGE_SIZE)
|
||||
block_range_map[i].insert(&block);
|
||||
}
|
||||
|
||||
if (block_link)
|
||||
@ -362,9 +364,15 @@ void JitBaseBlockCache::ErasePhysicalRange(u32 address, u32 length)
|
||||
{
|
||||
// If the block overlaps, also remove all other occupied slots in the other macro blocks.
|
||||
// This will leak empty macro blocks, but they may be reused or cleared later on.
|
||||
for (u32 addr : block->physical_addresses)
|
||||
if ((addr & BLOCK_RANGE_MAP_MASK) != start->first)
|
||||
block_range_map[addr & BLOCK_RANGE_MAP_MASK].erase(block);
|
||||
for (auto [range_start, range_end] : block->physical_addresses)
|
||||
{
|
||||
DEBUG_ASSERT(range_start != range_end);
|
||||
for (u32 i = range_start & BLOCK_RANGE_MAP_MASK; i < range_end; i += BLOCK_RANGE_SIZE)
|
||||
{
|
||||
if (i != start->first)
|
||||
block_range_map[i].erase(block);
|
||||
}
|
||||
}
|
||||
|
||||
// And remove the block.
|
||||
DestroyBlock(*block);
|
||||
@ -404,8 +412,11 @@ void JitBaseBlockCache::EraseSingleBlock(const JitBlock& block)
|
||||
|
||||
JitBlock& mutable_block = block_map_iter->second;
|
||||
|
||||
for (const u32 addr : mutable_block.physical_addresses)
|
||||
block_range_map[addr & BLOCK_RANGE_MAP_MASK].erase(&mutable_block);
|
||||
for (auto [range_start, range_end] : mutable_block.physical_addresses)
|
||||
{
|
||||
for (u32 i = range_start & BLOCK_RANGE_MAP_MASK; i < range_end; i += BLOCK_RANGE_SIZE)
|
||||
block_range_map[i].erase(&mutable_block);
|
||||
}
|
||||
|
||||
DestroyBlock(mutable_block);
|
||||
block_map.erase(block_map_iter); // The original JitBlock reference is now dangling.
|
||||
|
||||
@ -10,13 +10,13 @@
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/RangeSet.h"
|
||||
#include "Core/HW/Memmap.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
#include "Core/PowerPC/PPCAnalyst.h"
|
||||
@ -101,7 +101,7 @@ struct JitBlock : public JitBlockData
|
||||
std::vector<LinkData> linkData;
|
||||
|
||||
// This set stores all physical addresses of all occupied instructions.
|
||||
std::set<u32> physical_addresses;
|
||||
Common::RangeSet<u32> physical_addresses;
|
||||
|
||||
// This is only available when debugging is enabled. It is a trimmed-down copy of the
|
||||
// PPCAnalyst::CodeBuffer used to recompile this block, including repeat instructions.
|
||||
@ -218,7 +218,8 @@ private:
|
||||
// Range of overlapping code indexed by a masked physical address.
|
||||
// This is used for invalidation of memory regions. The range is grouped
|
||||
// in macro blocks of each 0x100 bytes.
|
||||
static constexpr u32 BLOCK_RANGE_MAP_MASK = ~(0x100 - 1);
|
||||
static constexpr u32 BLOCK_RANGE_SIZE = 0x100;
|
||||
static constexpr u32 BLOCK_RANGE_MAP_MASK = ~(BLOCK_RANGE_SIZE - 1);
|
||||
std::map<u32, std::unordered_set<JitBlock*>> block_range_map;
|
||||
|
||||
// This bitsets shows which cachelines overlap with any blocks.
|
||||
|
||||
@ -849,7 +849,8 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
|
||||
code[i].inst = inst;
|
||||
code[i].skip = false;
|
||||
block->m_stats->numCycles += opinfo->num_cycles;
|
||||
block->m_physical_addresses.insert(result.physical_address);
|
||||
block->m_physical_addresses.insert(result.physical_address,
|
||||
result.physical_address + sizeof(UGeckoInstruction));
|
||||
|
||||
SetInstructionStats(block, &code[i], opinfo);
|
||||
|
||||
|
||||
@ -5,11 +5,11 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/RangeSet.h"
|
||||
#include "Core/PowerPC/PPCTables.h"
|
||||
|
||||
class PPCSymbolDB;
|
||||
@ -129,7 +129,7 @@ struct CodeBlock
|
||||
BitSet32 m_gpr_inputs;
|
||||
|
||||
// Which memory locations are occupied by this block.
|
||||
std::set<u32> m_physical_addresses;
|
||||
Common::RangeSet<u32> m_physical_addresses;
|
||||
};
|
||||
|
||||
class PPCAnalyzer
|
||||
|
||||
@ -236,7 +236,8 @@ QVariant JitBlockTableModel::DisplayRoleData(const QModelIndex& index) const
|
||||
case Column::CodeBufferSize:
|
||||
return QString::number(jit_block.originalSize * sizeof(UGeckoInstruction));
|
||||
case Column::RepeatInstructions:
|
||||
return QString::number(jit_block.originalSize - jit_block.physical_addresses.size());
|
||||
return QString::number(jit_block.originalSize - jit_block.physical_addresses.get_stats().first /
|
||||
sizeof(UGeckoInstruction));
|
||||
case Column::HostNearCodeSize:
|
||||
return QString::number(jit_block.near_end - jit_block.near_begin);
|
||||
case Column::HostFarCodeSize:
|
||||
@ -329,7 +330,9 @@ QVariant JitBlockTableModel::SortRoleData(const QModelIndex& index) const
|
||||
case Column::CodeBufferSize:
|
||||
return static_cast<qulonglong>(jit_block.originalSize);
|
||||
case Column::RepeatInstructions:
|
||||
return static_cast<qulonglong>(jit_block.originalSize - jit_block.physical_addresses.size());
|
||||
return static_cast<qulonglong>(jit_block.originalSize -
|
||||
jit_block.physical_addresses.get_stats().first /
|
||||
sizeof(UGeckoInstruction));
|
||||
case Column::HostNearCodeSize:
|
||||
return static_cast<qulonglong>(jit_block.near_end - jit_block.near_begin);
|
||||
case Column::HostFarCodeSize:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user