diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8b3a4aa844..8dd120977d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -30,7 +30,8 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { +RasterizerOpenGL::RasterizerOpenGL() + : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE) { // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 state.clip_distance[0] = true; @@ -46,13 +47,11 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { texture_cube.Create(); // Generate VBO, VAO and UBO - vertex_buffer = OGLStreamBuffer::MakeBuffer(GLAD_GL_ARB_buffer_storage, GL_ARRAY_BUFFER); - vertex_buffer->Create(VERTEX_BUFFER_SIZE, VERTEX_BUFFER_SIZE / 2); vertex_array.Create(); uniform_buffer.Create(); state.draw.vertex_array = vertex_array.handle; - state.draw.vertex_buffer = vertex_buffer->GetHandle(); + state.draw.vertex_buffer = vertex_buffer.GetHandle(); state.draw.uniform_buffer = uniform_buffer.handle; state.Apply(); @@ -499,14 +498,16 @@ void RasterizerOpenGL::DrawTriangles() { state.Apply(); // Draw the vertex batch - size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex))); + size_t max_vertices = 3 * (vertex_buffer.GetSize() / (3 * sizeof(HardwareVertex))); for (size_t base_vertex = 0; base_vertex < vertex_batch.size(); base_vertex += max_vertices) { size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex); size_t vertex_size = vertices * sizeof(HardwareVertex); - auto map = vertex_buffer->Map(vertex_size, 1); - memcpy(map.first, vertex_batch.data() + base_vertex, vertex_size); - vertex_buffer->Unmap(); - glDrawArrays(GL_TRIANGLES, map.second / sizeof(HardwareVertex), (GLsizei)vertices); + u8* vbo; + GLintptr offset; + std::tie(vbo, offset, std::ignore) = vertex_buffer.Map(vertex_size, sizeof(HardwareVertex)); + memcpy(vbo, vertex_batch.data() + base_vertex, vertex_size); + vertex_buffer.Unmap(vertex_size); + glDrawArrays(GL_TRIANGLES, offset / sizeof(HardwareVertex), (GLsizei)vertices); } // Disable scissor test diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index fd72e9f1ed..02771a1897 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -239,8 +239,8 @@ private: std::array texture_samplers; OGLVertexArray vertex_array; - static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; - std::unique_ptr vertex_buffer; + static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024; + OGLStreamBuffer vertex_buffer; OGLBuffer uniform_buffer; OGLFramebuffer framebuffer; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index a2713e9f0f..e1698443e2 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -9,174 +9,81 @@ #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" -class OrphanBuffer : public OGLStreamBuffer { -public: - explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} - ~OrphanBuffer() override; +OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent) + : gl_target(target), buffer_size(size) { + gl_buffer.Create(); + glBindBuffer(gl_target, gl_buffer.handle); -private: - void Create(size_t size, size_t sync_subdivide) override; - void Release() override; + if (GLAD_GL_ARB_buffer_storage) { + persistent = true; + coherent = prefer_coherent; + GLbitfield flags = + GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); + glBufferStorage(gl_target, buffer_size, nullptr, flags); + mapped_ptr = static_cast(glMapBufferRange( + gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); + } else { + glBufferData(gl_target, buffer_size, nullptr, GL_STREAM_DRAW); + } +} - std::pair Map(size_t size, size_t alignment) override; - void Unmap() override; - - std::vector data; -}; - -class StorageBuffer : public OGLStreamBuffer { -public: - explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {} - ~StorageBuffer() override; - -private: - void Create(size_t size, size_t sync_subdivide) override; - void Release() override; - - std::pair Map(size_t size, size_t alignment) override; - void Unmap() override; - - struct Fence { - OGLSync sync; - size_t offset; - }; - std::deque head; - std::deque tail; - - u8* mapped_ptr; -}; - -OGLStreamBuffer::OGLStreamBuffer(GLenum target) { - gl_target = target; +OGLStreamBuffer::~OGLStreamBuffer() { + if (persistent) { + glBindBuffer(gl_target, gl_buffer.handle); + glUnmapBuffer(gl_target); + } + gl_buffer.Release(); } GLuint OGLStreamBuffer::GetHandle() const { return gl_buffer.handle; } -std::unique_ptr OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { - if (storage_buffer) { - return std::make_unique(target); - } - return std::make_unique(target); +GLsizeiptr OGLStreamBuffer::GetSize() const { + return buffer_size; } -OrphanBuffer::~OrphanBuffer() { - Release(); -} - -void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { - buffer_pos = 0; - buffer_size = size; - data.resize(buffer_size); - - if (gl_buffer.handle == 0) { - gl_buffer.Create(); - glBindBuffer(gl_target, gl_buffer.handle); - } - - glBufferData(gl_target, static_cast(buffer_size), nullptr, GL_STREAM_DRAW); -} - -void OrphanBuffer::Release() { - gl_buffer.Release(); -} - -std::pair OrphanBuffer::Map(size_t size, size_t alignment) { - buffer_pos = Common::AlignUp(buffer_pos, alignment); - - if (buffer_pos + size > buffer_size) { - Create(std::max(buffer_size, size), 0); - } - - mapped_size = size; - return std::make_pair(&data[buffer_pos], static_cast(buffer_pos)); -} - -void OrphanBuffer::Unmap() { - glBufferSubData(gl_target, static_cast(buffer_pos), - static_cast(mapped_size), &data[buffer_pos]); - buffer_pos += mapped_size; -} - -StorageBuffer::~StorageBuffer() { - Release(); -} - -void StorageBuffer::Create(size_t size, size_t sync_subdivide) { - if (gl_buffer.handle != 0) - return; - - buffer_pos = 0; - buffer_size = size; - buffer_sync_subdivide = std::max(sync_subdivide, 1); - - gl_buffer.Create(); - glBindBuffer(gl_target, gl_buffer.handle); - - glBufferStorage(gl_target, static_cast(buffer_size), nullptr, - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); - mapped_ptr = reinterpret_cast( - glMapBufferRange(gl_target, 0, static_cast(buffer_size), - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); -} - -void StorageBuffer::Release() { - if (gl_buffer.handle == 0) - return; - - glUnmapBuffer(gl_target); - - gl_buffer.Release(); - head.clear(); - tail.clear(); -} - -std::pair StorageBuffer::Map(size_t size, size_t alignment) { +std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { ASSERT(size <= buffer_size); - - OGLSync sync; - - buffer_pos = Common::AlignUp(buffer_pos, alignment); - size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide); - - if (!head.empty() && - (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) { - ASSERT(head.back().sync.handle == 0); - head.back().sync.Create(); - } - - if (buffer_pos + size > buffer_size) { - if (!tail.empty()) { - std::swap(sync, tail.back().sync); - tail.clear(); - } - std::swap(tail, head); - buffer_pos = 0; - effective_offset = 0; - } - - while (!tail.empty() && buffer_pos + size > tail.front().offset) { - std::swap(sync, tail.front().sync); - tail.pop_front(); - } - - if (sync.handle != 0) { - glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); - sync.Release(); - } - - if (head.empty() || effective_offset > head.back().offset) { - head.emplace_back(); - head.back().offset = effective_offset; - } - + ASSERT(alignment <= buffer_size); mapped_size = size; - return std::make_pair(&mapped_ptr[buffer_pos], static_cast(buffer_pos)); + + if (alignment > 0) { + buffer_pos = Common::AlignUp(buffer_pos, alignment); + } + + bool invalidate = false; + if (buffer_pos + size > buffer_size) { + buffer_pos = 0; + invalidate = true; + + if (persistent) { + glUnmapBuffer(gl_target); + } + } + + if (invalidate | !persistent) { + GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | + (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | + (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); + mapped_ptr = static_cast( + glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags)); + mapped_offset = buffer_pos; + } + + return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); } -void StorageBuffer::Unmap() { - glFlushMappedBufferRange(gl_target, static_cast(buffer_pos), - static_cast(mapped_size)); - buffer_pos += mapped_size; +void OGLStreamBuffer::Unmap(GLsizeiptr size) { + ASSERT(size <= mapped_size); + + if (!coherent) { + glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size); + } + + if (!persistent) { + glUnmapBuffer(gl_target); + } + + buffer_pos += size; } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 4bc2f52e04..45592daaf8 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -2,33 +2,41 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include #include "common/common_types.h" #include "video_core/renderer_opengl/gl_resource_manager.h" class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLenum target); - virtual ~OGLStreamBuffer() = default; - -public: - static std::unique_ptr MakeBuffer(bool storage_buffer, GLenum target); - - virtual void Create(size_t size, size_t sync_subdivide) = 0; - virtual void Release() {} + explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false); + ~OGLStreamBuffer(); GLuint GetHandle() const; + GLsizeiptr GetSize() const; - virtual std::pair Map(size_t size, size_t alignment) = 0; - virtual void Unmap() = 0; + /* + * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes + * and the optional alignment requirement. + * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. + * The return values are the pointer to the new chunk, the offset within the buffer, + * and the invalidation flag for previous chunks. + * The actual used size must be specified on unmapping the chunk. + */ + std::tuple Map(GLsizeiptr size, GLintptr alignment = 0); -protected: + void Unmap(GLsizeiptr size); + +private: OGLBuffer gl_buffer; GLenum gl_target; - size_t buffer_pos = 0; - size_t buffer_size = 0; - size_t buffer_sync_subdivide = 0; - size_t mapped_size = 0; + bool coherent = false; + bool persistent = false; + + GLintptr buffer_pos = 0; + GLsizeiptr buffer_size = 0; + GLintptr mapped_offset = 0; + GLsizeiptr mapped_size = 0; + u8* mapped_ptr = nullptr; };