gl_buffer_cache: Rework to support internalized buffers
This commit is contained in:
		
							parent
							
								
									f8ba72d491
								
							
						
					
					
						commit
						8155b12d3d
					
				@ -7,90 +7,165 @@
 | 
			
		||||
#include <utility>
 | 
			
		||||
 | 
			
		||||
#include "common/alignment.h"
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "video_core/memory_manager.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
 | 
			
		||||
 | 
			
		||||
namespace OpenGL {
 | 
			
		||||
 | 
			
		||||
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size,
 | 
			
		||||
                                     std::size_t alignment, GLuint buffer, GLintptr offset)
 | 
			
		||||
    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, alignment{alignment},
 | 
			
		||||
      buffer{buffer}, offset{offset} {}
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
constexpr GLuint EmptyBuffer = 0;
 | 
			
		||||
constexpr GLintptr CachedBufferOffset = 0;
 | 
			
		||||
 | 
			
		||||
OGLBuffer CreateBuffer(std::size_t size, GLenum usage) {
 | 
			
		||||
    OGLBuffer buffer;
 | 
			
		||||
    buffer.Create();
 | 
			
		||||
    glNamedBufferData(buffer.handle, size, nullptr, usage);
 | 
			
		||||
    return buffer;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // Anonymous namespace
 | 
			
		||||
 | 
			
		||||
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr)
 | 
			
		||||
    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr} {}
 | 
			
		||||
 | 
			
		||||
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
 | 
			
		||||
    : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
 | 
			
		||||
 | 
			
		||||
std::pair<GLuint, GLintptr> OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size,
 | 
			
		||||
                                                         std::size_t alignment, bool cache) {
 | 
			
		||||
OGLBufferCache::~OGLBufferCache() = default;
 | 
			
		||||
 | 
			
		||||
void OGLBufferCache::Unregister(const std::shared_ptr<CachedBufferEntry>& entry) {
 | 
			
		||||
    std::lock_guard lock{mutex};
 | 
			
		||||
 | 
			
		||||
    if (entry->IsInternalized()) {
 | 
			
		||||
        internalized_entries.erase(entry->GetCacheAddr());
 | 
			
		||||
    }
 | 
			
		||||
    ReserveBuffer(entry);
 | 
			
		||||
    RasterizerCache<std::shared_ptr<CachedBufferEntry>>::Unregister(entry);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
OGLBufferCache::BufferInfo OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size,
 | 
			
		||||
                                                        std::size_t alignment, bool internalize) {
 | 
			
		||||
    std::lock_guard lock{mutex};
 | 
			
		||||
 | 
			
		||||
    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
 | 
			
		||||
 | 
			
		||||
    const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
 | 
			
		||||
    const auto host_ptr{memory_manager.GetPointer(gpu_addr)};
 | 
			
		||||
    const auto cache_addr{ToCacheAddr(host_ptr)};
 | 
			
		||||
    if (!host_ptr) {
 | 
			
		||||
        // Return a dummy buffer when host_ptr is invalid.
 | 
			
		||||
        return {0, 0};
 | 
			
		||||
        return {EmptyBuffer, 0};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Cache management is a big overhead, so only cache entries with a given size.
 | 
			
		||||
    // TODO: Figure out which size is the best for given games.
 | 
			
		||||
    cache &= size >= 2048;
 | 
			
		||||
 | 
			
		||||
    if (cache) {
 | 
			
		||||
        if (auto entry = TryGet(host_ptr); entry) {
 | 
			
		||||
            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
 | 
			
		||||
                return {entry->GetBuffer(), entry->GetOffset()};
 | 
			
		||||
            }
 | 
			
		||||
            Unregister(entry);
 | 
			
		||||
        }
 | 
			
		||||
    if (!internalize && size < 0x800 &&
 | 
			
		||||
        internalized_entries.find(cache_addr) == internalized_entries.end()) {
 | 
			
		||||
        return StreamBufferUpload(host_ptr, size, alignment);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    AlignBuffer(alignment);
 | 
			
		||||
    const GLintptr uploaded_offset = buffer_offset;
 | 
			
		||||
 | 
			
		||||
    std::memcpy(buffer_ptr, host_ptr, size);
 | 
			
		||||
    buffer_ptr += size;
 | 
			
		||||
    buffer_offset += size;
 | 
			
		||||
 | 
			
		||||
    const GLuint buffer = stream_buffer.GetHandle();
 | 
			
		||||
    if (cache) {
 | 
			
		||||
        const VAddr cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr);
 | 
			
		||||
        Register(std::make_shared<CachedBufferEntry>(cpu_addr, host_ptr, size, alignment, buffer,
 | 
			
		||||
                                                     uploaded_offset));
 | 
			
		||||
    auto entry = TryGet(host_ptr);
 | 
			
		||||
    if (!entry) {
 | 
			
		||||
        return FixedBufferUpload(gpu_addr, host_ptr, size, internalize);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return {buffer, uploaded_offset};
 | 
			
		||||
    if (entry->GetSize() < size) {
 | 
			
		||||
        GrowBuffer(entry, size);
 | 
			
		||||
    }
 | 
			
		||||
    return {entry->GetBuffer(), CachedBufferOffset};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::pair<GLuint, GLintptr> OGLBufferCache::UploadHostMemory(const void* raw_pointer,
 | 
			
		||||
                                                             std::size_t size,
 | 
			
		||||
                                                             std::size_t alignment) {
 | 
			
		||||
    std::lock_guard lock{mutex};
 | 
			
		||||
OGLBufferCache::BufferInfo OGLBufferCache::UploadHostMemory(const void* raw_pointer,
 | 
			
		||||
                                                            std::size_t size,
 | 
			
		||||
                                                            std::size_t alignment) {
 | 
			
		||||
    return StreamBufferUpload(raw_pointer, size, alignment);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool OGLBufferCache::Map(std::size_t max_size) {
 | 
			
		||||
    const auto max_size_ = static_cast<GLsizeiptr>(max_size);
 | 
			
		||||
    bool invalidate;
 | 
			
		||||
    std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer.Map(max_size_, 4);
 | 
			
		||||
    buffer_offset = buffer_offset_base;
 | 
			
		||||
    return invalidate;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void OGLBufferCache::Unmap() {
 | 
			
		||||
    stream_buffer.Unmap(buffer_offset - buffer_offset_base);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
OGLBufferCache::BufferInfo OGLBufferCache::StreamBufferUpload(const void* raw_pointer,
 | 
			
		||||
                                                              std::size_t size,
 | 
			
		||||
                                                              std::size_t alignment) {
 | 
			
		||||
    AlignBuffer(alignment);
 | 
			
		||||
    std::memcpy(buffer_ptr, raw_pointer, size);
 | 
			
		||||
    const GLintptr uploaded_offset = buffer_offset;
 | 
			
		||||
    std::memcpy(buffer_ptr, raw_pointer, size);
 | 
			
		||||
 | 
			
		||||
    buffer_ptr += size;
 | 
			
		||||
    buffer_offset += size;
 | 
			
		||||
    return {stream_buffer.GetHandle(), uploaded_offset};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool OGLBufferCache::Map(std::size_t max_size) {
 | 
			
		||||
    bool invalidate;
 | 
			
		||||
    std::tie(buffer_ptr, buffer_offset_base, invalidate) =
 | 
			
		||||
        stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
 | 
			
		||||
    buffer_offset = buffer_offset_base;
 | 
			
		||||
 | 
			
		||||
    if (invalidate) {
 | 
			
		||||
        InvalidateAll();
 | 
			
		||||
OGLBufferCache::BufferInfo OGLBufferCache::FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr,
 | 
			
		||||
                                                             std::size_t size, bool internalize) {
 | 
			
		||||
    if (internalize) {
 | 
			
		||||
        internalized_entries.emplace(ToCacheAddr(host_ptr));
 | 
			
		||||
    }
 | 
			
		||||
    return invalidate;
 | 
			
		||||
    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
 | 
			
		||||
    const auto cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr);
 | 
			
		||||
    auto entry = GetUncachedBuffer(cpu_addr, host_ptr);
 | 
			
		||||
    entry->SetSize(size);
 | 
			
		||||
    entry->SetInternalState(internalize);
 | 
			
		||||
    Register(entry);
 | 
			
		||||
 | 
			
		||||
    if (entry->GetCapacity() < size) {
 | 
			
		||||
        entry->SetCapacity(CreateBuffer(size, GL_STATIC_DRAW), size);
 | 
			
		||||
    }
 | 
			
		||||
    glNamedBufferSubData(entry->GetBuffer(), 0, static_cast<GLintptr>(size), host_ptr);
 | 
			
		||||
    return {entry->GetBuffer(), CachedBufferOffset};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void OGLBufferCache::Unmap() {
 | 
			
		||||
    stream_buffer.Unmap(buffer_offset - buffer_offset_base);
 | 
			
		||||
void OGLBufferCache::GrowBuffer(std::shared_ptr<CachedBufferEntry>& entry, std::size_t new_size) {
 | 
			
		||||
    const auto old_size = static_cast<GLintptr>(entry->GetSize());
 | 
			
		||||
    if (entry->GetCapacity() < new_size) {
 | 
			
		||||
        const auto old_buffer = entry->GetBuffer();
 | 
			
		||||
        OGLBuffer new_buffer = CreateBuffer(new_size, GL_STATIC_COPY);
 | 
			
		||||
 | 
			
		||||
        // Copy bits from the old buffer to the new buffer.
 | 
			
		||||
        glCopyNamedBufferSubData(old_buffer, new_buffer.handle, 0, 0, old_size);
 | 
			
		||||
        entry->SetCapacity(std::move(new_buffer), new_size);
 | 
			
		||||
    }
 | 
			
		||||
    // Upload the new bits.
 | 
			
		||||
    const auto size_diff = static_cast<GLintptr>(new_size - old_size);
 | 
			
		||||
    glNamedBufferSubData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size);
 | 
			
		||||
 | 
			
		||||
    // Update entry's size in the object and in the cache.
 | 
			
		||||
    entry->SetSize(new_size);
 | 
			
		||||
    Unregister(entry);
 | 
			
		||||
    Register(entry);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::shared_ptr<CachedBufferEntry> OGLBufferCache::GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) {
 | 
			
		||||
    if (auto entry = TryGetReservedBuffer(host_ptr); entry) {
 | 
			
		||||
        return entry;
 | 
			
		||||
    }
 | 
			
		||||
    return std::make_shared<CachedBufferEntry>(cpu_addr, host_ptr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::shared_ptr<CachedBufferEntry> OGLBufferCache::TryGetReservedBuffer(u8* host_ptr) {
 | 
			
		||||
    const auto it = buffer_reserve.find(ToCacheAddr(host_ptr));
 | 
			
		||||
    if (it == buffer_reserve.end()) {
 | 
			
		||||
        return {};
 | 
			
		||||
    }
 | 
			
		||||
    auto& reserve = it->second;
 | 
			
		||||
    auto entry = reserve.back();
 | 
			
		||||
    reserve.pop_back();
 | 
			
		||||
    return entry;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void OGLBufferCache::ReserveBuffer(std::shared_ptr<CachedBufferEntry> entry) {
 | 
			
		||||
    buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void OGLBufferCache::AlignBuffer(std::size_t alignment) {
 | 
			
		||||
 | 
			
		||||
@ -5,9 +5,12 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <cstddef>
 | 
			
		||||
#include <map>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <tuple>
 | 
			
		||||
#include <unordered_set>
 | 
			
		||||
#include <utility>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "video_core/rasterizer_cache.h"
 | 
			
		||||
@ -20,8 +23,7 @@ class RasterizerOpenGL;
 | 
			
		||||
 | 
			
		||||
class CachedBufferEntry final : public RasterizerCacheObject {
 | 
			
		||||
public:
 | 
			
		||||
    explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size,
 | 
			
		||||
                               std::size_t alignment, GLuint buffer, GLintptr offset);
 | 
			
		||||
    explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr);
 | 
			
		||||
 | 
			
		||||
    VAddr GetCpuAddr() const override {
 | 
			
		||||
        return cpu_addr;
 | 
			
		||||
@ -35,55 +37,87 @@ public:
 | 
			
		||||
        return size;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::size_t GetAlignment() const {
 | 
			
		||||
        return alignment;
 | 
			
		||||
    std::size_t GetCapacity() const {
 | 
			
		||||
        return capacity;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    bool IsInternalized() const {
 | 
			
		||||
        return is_internal;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    GLuint GetBuffer() const {
 | 
			
		||||
        return buffer;
 | 
			
		||||
        return buffer.handle;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    GLintptr GetOffset() const {
 | 
			
		||||
        return offset;
 | 
			
		||||
    void SetSize(std::size_t new_size) {
 | 
			
		||||
        size = new_size;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void SetInternalState(bool is_internal_) {
 | 
			
		||||
        is_internal = is_internal_;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void SetCapacity(OGLBuffer&& new_buffer, std::size_t new_capacity) {
 | 
			
		||||
        capacity = new_capacity;
 | 
			
		||||
        buffer = std::move(new_buffer);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    VAddr cpu_addr{};
 | 
			
		||||
    std::size_t size{};
 | 
			
		||||
    std::size_t alignment{};
 | 
			
		||||
 | 
			
		||||
    GLuint buffer{};
 | 
			
		||||
    GLintptr offset{};
 | 
			
		||||
    std::size_t capacity{};
 | 
			
		||||
    bool is_internal{};
 | 
			
		||||
    OGLBuffer buffer;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
 | 
			
		||||
    using BufferInfo = std::pair<GLuint, GLintptr>;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
    explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size);
 | 
			
		||||
    ~OGLBufferCache();
 | 
			
		||||
 | 
			
		||||
    void Unregister(const std::shared_ptr<CachedBufferEntry>& entry) override;
 | 
			
		||||
 | 
			
		||||
    /// Uploads data from a guest GPU address. Returns the OpenGL buffer where it's located and its
 | 
			
		||||
    /// offset.
 | 
			
		||||
    std::pair<GLuint, GLintptr> UploadMemory(GPUVAddr gpu_addr, std::size_t size,
 | 
			
		||||
                                             std::size_t alignment = 4, bool cache = true);
 | 
			
		||||
    BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
 | 
			
		||||
                            bool internalize = false);
 | 
			
		||||
 | 
			
		||||
    /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
 | 
			
		||||
    std::pair<GLuint, GLintptr> UploadHostMemory(const void* raw_pointer, std::size_t size,
 | 
			
		||||
                                                 std::size_t alignment = 4);
 | 
			
		||||
    BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
 | 
			
		||||
                                std::size_t alignment = 4);
 | 
			
		||||
 | 
			
		||||
    bool Map(std::size_t max_size);
 | 
			
		||||
    void Unmap();
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
    void AlignBuffer(std::size_t alignment);
 | 
			
		||||
 | 
			
		||||
    // We do not have to flush this cache as things in it are never modified by us.
 | 
			
		||||
    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    OGLStreamBuffer stream_buffer;
 | 
			
		||||
    BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, std::size_t alignment);
 | 
			
		||||
 | 
			
		||||
    BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size,
 | 
			
		||||
                                 bool internalize);
 | 
			
		||||
 | 
			
		||||
    void GrowBuffer(std::shared_ptr<CachedBufferEntry>& entry, std::size_t new_size);
 | 
			
		||||
 | 
			
		||||
    std::shared_ptr<CachedBufferEntry> GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr);
 | 
			
		||||
 | 
			
		||||
    std::shared_ptr<CachedBufferEntry> TryGetReservedBuffer(u8* host_ptr);
 | 
			
		||||
 | 
			
		||||
    void ReserveBuffer(std::shared_ptr<CachedBufferEntry> entry);
 | 
			
		||||
 | 
			
		||||
    void AlignBuffer(std::size_t alignment);
 | 
			
		||||
 | 
			
		||||
    u8* buffer_ptr = nullptr;
 | 
			
		||||
    GLintptr buffer_offset = 0;
 | 
			
		||||
    GLintptr buffer_offset_base = 0;
 | 
			
		||||
 | 
			
		||||
    OGLStreamBuffer stream_buffer;
 | 
			
		||||
    std::unordered_set<CacheAddr> internalized_entries;
 | 
			
		||||
    std::unordered_map<CacheAddr, std::vector<std::shared_ptr<CachedBufferEntry>>> buffer_reserve;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace OpenGL
 | 
			
		||||
 | 
			
		||||
@ -790,7 +790,7 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
 | 
			
		||||
    size = Common::AlignUp(size, sizeof(GLvec4));
 | 
			
		||||
    ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
 | 
			
		||||
 | 
			
		||||
    const std::size_t alignment = device.GetUniformBufferAlignment();
 | 
			
		||||
    const auto alignment = device.GetUniformBufferAlignment();
 | 
			
		||||
    const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
 | 
			
		||||
    bind_ubo_pushbuffer.Push(cbuf, offset, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user