From fb6cf12a17daf1f452d9c542e22c9252bbfb5c76 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 28 Nov 2019 22:59:09 -0300
Subject: [PATCH] gl_framebuffer_cache: Optimize framebuffer key

Pack color attachment enumerations into a single u32. To determine the
number of buffers, the highest color attachment with a shared pointer
that doesn't point to null is used.
---
 .../renderer_opengl/gl_framebuffer_cache.cpp  | 63 ++++++++++++-------
 .../renderer_opengl/gl_framebuffer_cache.h    | 19 +++---
 .../renderer_opengl/gl_rasterizer.cpp         | 30 ++++-----
 3 files changed, 63 insertions(+), 49 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
index a5d69d78df..874ed3c6e7 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
@@ -3,9 +3,12 @@
 // Refer to the license.txt file included.
 
 #include <tuple>
+#include <unordered_map>
+#include <utility>
 
-#include "common/cityhash.h"
-#include "common/scope_exit.h"
+#include <glad/glad.h>
+
+#include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_framebuffer_cache.h"
 #include "video_core/renderer_opengl/gl_state.h"
@@ -13,6 +16,7 @@
 namespace OpenGL {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using VideoCore::Surface::SurfaceType;
 
 FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
 
@@ -35,36 +39,49 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK
     local_state.draw.draw_framebuffer = framebuffer.handle;
     local_state.ApplyFramebufferState();
 
-    for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
-        if (key.colors[index]) {
-            key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
-                                      GL_DRAW_FRAMEBUFFER);
-        }
-    }
-    if (key.colors_count) {
-        glDrawBuffers(key.colors_count, key.color_attachments.data());
-    } else {
-        glDrawBuffer(GL_NONE);
+    if (key.zeta) {
+        const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
+        const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
+        key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
     }
 
-    if (key.zeta) {
-        key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT,
-                         GL_DRAW_FRAMEBUFFER);
+    std::size_t num_buffers = 0;
+    std::array<GLenum, Maxwell::NumRenderTargets> targets;
+
+    for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+        if (!key.colors[index]) {
+            targets[index] = GL_NONE;
+            continue;
+        }
+        const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
+        key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
+
+        const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
+        targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
+        num_buffers = index + 1;
+    }
+
+    if (num_buffers > 0) {
+        glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
+    } else {
+        glDrawBuffer(GL_NONE);
     }
 
     return framebuffer;
 }
 
-std::size_t FramebufferCacheKey::Hash() const {
-    static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct");
-    return static_cast<std::size_t>(
-        Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
+std::size_t FramebufferCacheKey::Hash() const noexcept {
+    std::size_t hash = std::hash<View>{}(zeta);
+    for (const auto& color : colors) {
+        hash ^= std::hash<View>{}(color);
+    }
+    hash ^= static_cast<std::size_t>(color_attachments) << 16;
+    return hash;
 }
 
-bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const {
-    return std::tie(stencil_enable, colors_count, color_attachments, colors, zeta) ==
-           std::tie(rhs.stencil_enable, rhs.colors_count, rhs.color_attachments, rhs.colors,
-                    rhs.zeta);
+bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
+    return std::tie(colors, zeta, color_attachments) ==
+           std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
index 3b5ff2149c..02ec80ae9a 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
@@ -18,21 +18,24 @@
 
 namespace OpenGL {
 
-struct alignas(sizeof(u64)) FramebufferCacheKey {
-    bool stencil_enable;
-    u16 colors_count;
+constexpr std::size_t BitsPerAttachment = 4;
 
-    std::array<GLenum, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_attachments{};
-    std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
+struct FramebufferCacheKey {
     View zeta;
+    std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
+    u32 color_attachments = 0;
 
-    std::size_t Hash() const;
+    std::size_t Hash() const noexcept;
 
-    bool operator==(const FramebufferCacheKey& rhs) const;
+    bool operator==(const FramebufferCacheKey& rhs) const noexcept;
 
-    bool operator!=(const FramebufferCacheKey& rhs) const {
+    bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
         return !operator==(rhs);
     }
+
+    void SetAttachment(std::size_t index, u32 attachment) {
+        color_attachments |= attachment << (BitsPerAttachment * index);
+    }
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 18c1222289..9eef7fcd28 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -372,33 +372,31 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
     UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
 
     // Bind the framebuffer surfaces
-    FramebufferCacheKey fbkey;
-    for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+    FramebufferCacheKey key;
+    const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
+    for (std::size_t index = 0; index < colors_count; ++index) {
         View color_surface{texture_cache.GetColorBufferSurface(index, true)};
-
-        if (color_surface) {
-            // Assume that a surface will be written to if it is used as a framebuffer, even
-            // if the shader doesn't actually write to it.
-            texture_cache.MarkColorBufferInUse(index);
+        if (!color_surface) {
+            continue;
         }
+        // Assume that a surface will be written to if it is used as a framebuffer, even
+        // if the shader doesn't actually write to it.
+        texture_cache.MarkColorBufferInUse(index);
 
-        fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
-        fbkey.colors[index] = std::move(color_surface);
+        key.SetAttachment(index, regs.rt_control.GetMap(index));
+        key.colors[index] = std::move(color_surface);
     }
-    fbkey.colors_count = static_cast<u16>(regs.rt_control.count);
 
     if (depth_surface) {
         // Assume that a surface will be written to if it is used as a framebuffer, even if
         // the shader doesn't actually write to it.
         texture_cache.MarkDepthBufferInUse();
-
-        fbkey.stencil_enable = depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
-        fbkey.zeta = std::move(depth_surface);
+        key.zeta = std::move(depth_surface);
     }
 
     texture_cache.GuardRenderTargets(false);
 
-    state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey);
+    state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
     SyncViewport(state);
 }
 
@@ -421,12 +419,8 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, boo
     texture_cache.GuardRenderTargets(false);
 
     FramebufferCacheKey key;
-    key.colors_count = color_surface ? 1 : 0;
     key.colors[0] = color_surface;
-    key.color_attachments[0] = GL_COLOR_ATTACHMENT0;
     key.zeta = depth_surface;
-    key.stencil_enable = depth_surface && depth_surface->GetSurfaceParams().type ==
-                                              VideoCore::Surface::SurfaceType::DepthStencil;
 
     current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
     current_state.ApplyFramebufferState();