diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index b865dcd6e0..36c6e886e3 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -116,6 +116,12 @@ struct Regs { inline u32 GetEndAddress() const { return DecodeAddressRegister(address_end); } + + inline std::string DebugName() const { + return fmt::format("from {:#X} to {:#X} with {}-bit value {:#X}", GetStartAddress(), + GetEndAddress(), fill_32bit ? "32" : (fill_24bit ? "24" : "16"), + value_32bit); + } } memory_fill_config[2]; ASSERT_MEMBER_SIZE(memory_fill_config[0], 0x10); @@ -176,6 +182,13 @@ struct Regs { return DecodeAddressRegister(output_address); } + inline std::string DebugName() const noexcept { + return fmt::format("from {:#x} to {:#x} with {} scaling and stride {}, width {}", + GetPhysicalInputAddress(), GetPhysicalOutputAddress(), + scaling == NoScale ? "no" : (scaling == ScaleX ? "X" : "XY"), + input_width, output_width); + } + union { u32 output_size; diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 357cf8d5a0..31771d5528 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -95,6 +95,9 @@ void RasterizerCache::TickFrame() { template bool RasterizerCache::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { + const DebugScope scope{runtime, Common::Vec4f{0.f, 0.f, 1.f, 1.f}, + "RasterizerCache::AccelerateTextureCopy ({})", config.DebugName()}; + // Texture copy size is aligned to 16 byte units const u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); if (copy_size == 0) { @@ -190,6 +193,9 @@ bool RasterizerCache::AccelerateTextureCopy(const GPU::Regs::DisplayTransferC template bool RasterizerCache::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { + const DebugScope scope{runtime, Common::Vec4f{0.f, 0.f, 1.f, 1.f}, + "RasterizerCache::AccelerateDisplayTransfer ({})", config.DebugName()}; + SurfaceParams src_params; src_params.addr = config.GetPhysicalInputAddress(); src_params.width = config.output_width; @@ -250,6 +256,9 @@ bool RasterizerCache::AccelerateDisplayTransfer(const GPU::Regs::DisplayTrans template bool RasterizerCache::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + const DebugScope scope{runtime, Common::Vec4f{1.f, 0.f, 1.f, 1.f}, + "RasterizerCache::AccelerateFill ({})", config.DebugName()}; + SurfaceParams params; params.addr = config.GetStartAddress(); params.end = config.GetEndAddress(); @@ -924,11 +933,19 @@ void RasterizerCache::ValidateSurface(SurfaceId surface_id, PAddr addr, u32 s SurfaceRegions validate_regions = surface.invalid_regions & validate_interval; + if (validate_regions.empty()) { + return; + } + auto notify_validated = [&](SurfaceInterval interval) { surface.MarkValid(interval); validate_regions.erase(interval); }; + const DebugScope scope{runtime, Common::Vec4f{0.f, 1.f, 0.f, 1.f}, + "RasterizerCache::ValidateSurface (from {:#x} to {:#x})", addr, + addr + size}; + u32 level = surface.LevelOf(addr); SurfaceInterval level_interval = surface.LevelInterval(level); while (!validate_regions.empty()) { @@ -1213,19 +1230,34 @@ void RasterizerCache::FlushRegion(PAddr addr, u32 size, SurfaceId flush_surfa // Small sizes imply that this most likely comes from the cpu, flush the entire region // the point is to avoid thousands of small writes every frame if the cpu decides to // access that region, anything higher than 8 you're guaranteed it comes from a service - const auto interval = size <= 8 ? region : region & flush_interval; + auto interval = size <= 8 ? region : region & flush_interval; if (flush_surface_id && surface_id != flush_surface_id) { continue; } + const DebugScope scope{runtime, Common::Vec4f{0.f, 0.f, 0.f, 1.f}, + "RasterizerCache::FlushRegion (from {:#x} to {:#x})", + interval.lower(), interval.upper()}; + // Sanity check, this surface is the last one that marked this region dirty Surface& surface = slot_surfaces[surface_id]; ASSERT(surface.IsRegionValid(interval)); if (surface.type == SurfaceType::Fill) { DownloadFillSurface(surface, interval); - } else { - DownloadSurface(surface, interval); + flushed_intervals += interval; + continue; + } + + // Download each requested level of the surface. + const u32 start_level = surface.LevelOf(interval.lower()); + const u32 end_level = surface.LevelOf(interval.upper()); + for (u32 level = start_level; level <= end_level; level++) { + const auto download_interval = interval & surface.LevelInterval(level); + if (boost::icl::is_empty(download_interval)) { + continue; + } + DownloadSurface(surface, download_interval); } flushed_intervals += interval; diff --git a/src/video_core/rasterizer_cache/rasterizer_cache_base.h b/src/video_core/rasterizer_cache/rasterizer_cache_base.h index eb638ae345..57c7473bcd 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache_base.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache_base.h @@ -56,6 +56,7 @@ class RasterizerCache { using Sampler = typename T::Sampler; using Surface = typename T::Surface; using Framebuffer = typename T::Framebuffer; + using DebugScope = typename T::DebugScope; using SurfaceMap = boost::icl::interval_map + explicit DebugScope(TextureRuntime& runtime, Common::Vec4f color, + fmt::format_string format, T... args) + : DebugScope{runtime, color, fmt::format(format, std::forward(args)...)} {} + explicit DebugScope(TextureRuntime& runtime, Common::Vec4f, std::string_view label); + ~DebugScope(); + +private: + inline static GLuint global_scope_depth = 0; + const GLuint local_scope_depth{}; +}; + struct Traits { using Runtime = OpenGL::TextureRuntime; using Sampler = OpenGL::Sampler; using Surface = OpenGL::Surface; using Framebuffer = OpenGL::Framebuffer; + using DebugScope = OpenGL::DebugScope; }; using RasterizerCache = VideoCore::RasterizerCache;