From e9c209ccc8ec8800306aa49b9b805f9d502d0a7e Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Mon, 30 Nov 2015 22:33:38 -0800
Subject: [PATCH] PICA: Properly emulate 1-stage delay in the combiner buffer

This was discovered and verified by @fincs. The tev combiner buffer
actually lags behind by one stage, meaning stage 1 reads the initial
color, stage 2 reads stage 0's output, and so on.

Fixes character portraits in Fire Emblem: Awakening and world textures
in Zelda: ALBW. Closes #1140.
---
 src/video_core/rasterizer.cpp                  | 13 ++++++++-----
 .../renderer_opengl/gl_shader_gen.cpp          | 18 +++++++++++-------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 226fad7834..ecfdbc9e85 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -498,7 +498,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
             // with some basic arithmetic. Alpha combiners can be configured separately but work
             // analogously.
             Math::Vec4<u8> combiner_output;
-            Math::Vec4<u8> combiner_buffer = {
+            Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0};
+            Math::Vec4<u8> next_combiner_buffer = {
                 regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g,
                 regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a
             };
@@ -747,14 +748,16 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
                 combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier());
                 combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier());
 
+                combiner_buffer = next_combiner_buffer;
+
                 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) {
-                    combiner_buffer.r() = combiner_output.r();
-                    combiner_buffer.g() = combiner_output.g();
-                    combiner_buffer.b() = combiner_output.b();
+                    next_combiner_buffer.r() = combiner_output.r();
+                    next_combiner_buffer.g() = combiner_output.g();
+                    next_combiner_buffer.b() = combiner_output.b();
                 }
 
                 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) {
-                    combiner_buffer.a() = combiner_output.a();
+                    next_combiner_buffer.a() = combiner_output.a();
                 }
             }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 5268340cfc..498c506e7b 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -311,11 +311,13 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
             "clamp(alpha_output_" + index_name + " * " + std::to_string(stage.GetAlphaMultiplier()) + ".0, 0.0, 1.0));\n";
     }
 
+    out += "combiner_buffer = next_combiner_buffer;\n";
+
     if (config.TevStageUpdatesCombinerBufferColor(index))
-        out += "combiner_buffer.rgb = last_tex_env_out.rgb;\n";
+        out += "next_combiner_buffer.rgb = last_tex_env_out.rgb;\n";
 
     if (config.TevStageUpdatesCombinerBufferAlpha(index))
-        out += "combiner_buffer.a = last_tex_env_out.a;\n";
+        out += "next_combiner_buffer.a = last_tex_env_out.a;\n";
 }
 
 std::string GenerateFragmentShader(const PicaShaderConfig& config) {
@@ -334,12 +336,10 @@ layout (std140) uniform shader_data {
     int alphatest_ref;
 };
 
-)";
+uniform sampler2D tex[3];
 
-    out += "uniform sampler2D tex[3];\n";
-    out += "void main() {\n";
-    out += "vec4 combiner_buffer = tev_combiner_buffer_color;\n";
-    out += "vec4 last_tex_env_out = vec4(0.0);\n";
+void main() {
+)";
 
     // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
     if (config.alpha_test_func == Regs::CompareFunc::Never) {
@@ -347,6 +347,10 @@ layout (std140) uniform shader_data {
         return out;
     }
 
+    out += "vec4 combiner_buffer = vec4(0.0);\n";
+    out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n";
+    out += "vec4 last_tex_env_out = vec4(0.0);\n";
+
     for (size_t index = 0; index < config.tev_stages.size(); ++index)
         WriteTevStage(out, config, (unsigned)index);