From 9645c1e1b2cb4bab4303e05e09ab761938ed203c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 8 Jun 2020 06:37:39 -0400 Subject: [PATCH 1/4] gl_shader_gen: Eliminate unnecessary std::string construction in AppendProcTexCombineAndMap() We can make use of a std::string_view here, since only string literals are used within the switch. --- .../renderer_opengl/gl_shader_gen.cpp | 65 ++++++++----------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 2601ad58e4..00735b2135 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1018,43 +1018,34 @@ void AppendProcTexClamp(std::string& out, std::string_view var, ProcTexClamp mod void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, std::string_view offset) { - std::string combined; - switch (combiner) { - case ProcTexCombiner::U: - combined = "u"; - break; - case ProcTexCombiner::U2: - combined = "(u * u)"; - break; - case TexturingRegs::ProcTexCombiner::V: - combined = "v"; - break; - case TexturingRegs::ProcTexCombiner::V2: - combined = "(v * v)"; - break; - case TexturingRegs::ProcTexCombiner::Add: - combined = "((u + v) * 0.5)"; - break; - case TexturingRegs::ProcTexCombiner::Add2: - combined = "((u * u + v * v) * 0.5)"; - break; - case TexturingRegs::ProcTexCombiner::SqrtAdd2: - combined = "min(sqrt(u * u + v * v), 1.0)"; - break; - case TexturingRegs::ProcTexCombiner::Min: - combined = "min(u, v)"; - break; - case TexturingRegs::ProcTexCombiner::Max: - combined = "max(u, v)"; - break; - case TexturingRegs::ProcTexCombiner::RMax: - combined = "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)"; - break; - default: - LOG_CRITICAL(HW_GPU, "Unknown combiner {}", static_cast(combiner)); - combined = "0.0"; - break; - } + const auto combined = [combiner]() -> std::string_view { + switch (combiner) { + case ProcTexCombiner::U: + return "u"; + case ProcTexCombiner::U2: + return "(u * u)"; + case TexturingRegs::ProcTexCombiner::V: + return "v"; + case TexturingRegs::ProcTexCombiner::V2: + return "(v * v)"; + case TexturingRegs::ProcTexCombiner::Add: + return "((u + v) * 0.5)"; + case TexturingRegs::ProcTexCombiner::Add2: + return "((u * u + v * v) * 0.5)"; + case TexturingRegs::ProcTexCombiner::SqrtAdd2: + return "min(sqrt(u * u + v * v), 1.0)"; + case TexturingRegs::ProcTexCombiner::Min: + return "min(u, v)"; + case TexturingRegs::ProcTexCombiner::Max: + return "max(u, v)"; + case TexturingRegs::ProcTexCombiner::RMax: + return "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)"; + default: + LOG_CRITICAL(HW_GPU, "Unknown combiner {}", static_cast(combiner)); + return "0.0"; + } + }(); + out += fmt::format("ProcTexLookupLUT({}, {})", offset, combined); } From e79e991681e605fe9bf99416690beef0057b925b Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 8 Jun 2020 06:40:36 -0400 Subject: [PATCH 2/4] gl_shader_gen: Mark local functions as static where applicable These aren't used outside of the translation unit. --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 00735b2135..cbd1b44c6e 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -971,8 +971,8 @@ using ProcTexShift = TexturingRegs::ProcTexShift; using ProcTexCombiner = TexturingRegs::ProcTexCombiner; using ProcTexFilter = TexturingRegs::ProcTexFilter; -void AppendProcTexShiftOffset(std::string& out, std::string_view v, ProcTexShift mode, - ProcTexClamp clamp_mode) { +static void AppendProcTexShiftOffset(std::string& out, std::string_view v, ProcTexShift mode, + ProcTexClamp clamp_mode) { const std::string_view offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5"; switch (mode) { case ProcTexShift::None: @@ -991,7 +991,7 @@ void AppendProcTexShiftOffset(std::string& out, std::string_view v, ProcTexShift } } -void AppendProcTexClamp(std::string& out, std::string_view var, ProcTexClamp mode) { +static void AppendProcTexClamp(std::string& out, std::string_view var, ProcTexClamp mode) { switch (mode) { case ProcTexClamp::ToZero: out += fmt::format("{0} = {0} > 1.0 ? 0 : {0};\n", var); @@ -1016,8 +1016,8 @@ void AppendProcTexClamp(std::string& out, std::string_view var, ProcTexClamp mod } } -void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, - std::string_view offset) { +static void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, + std::string_view offset) { const auto combined = [combiner]() -> std::string_view { switch (combiner) { case ProcTexCombiner::U: @@ -1049,7 +1049,7 @@ void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, out += fmt::format("ProcTexLookupLUT({}, {})", offset, combined); } -void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { +static void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { // LUT sampling uitlity // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using From 11792682fbbc53f57d92f5363a4541518016cb64 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 8 Jun 2020 06:44:01 -0400 Subject: [PATCH 3/4] gl_shader_gen: Collapse several format calls into one Same behavior, but performs one formatting pass without altering the readability of the formatting. --- .../renderer_opengl/gl_shader_gen.cpp | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index cbd1b44c6e..302c93626c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1583,22 +1583,19 @@ ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shade out += "#extension GL_ARB_separate_shader_objects : enable\n"; } - out += fmt::format("layout(location = {}) in vec4 vert_position;\n", - static_cast(ATTRIBUTE_POSITION)); - out += fmt::format("layout(location = {}) in vec4 vert_color;\n", - static_cast(ATTRIBUTE_COLOR)); - out += fmt::format("layout(location = {}) in vec2 vert_texcoord0;\n", - static_cast(ATTRIBUTE_TEXCOORD0)); - out += fmt::format("layout(location = {}) in vec2 vert_texcoord1;\n", - static_cast(ATTRIBUTE_TEXCOORD1)); - out += fmt::format("layout(location = {}) in vec2 vert_texcoord2;\n", - static_cast(ATTRIBUTE_TEXCOORD2)); - out += fmt::format("layout(location = {}) in float vert_texcoord0_w;\n", - static_cast(ATTRIBUTE_TEXCOORD0_W)); - out += fmt::format("layout(location = {}) in vec4 vert_normquat;\n", - static_cast(ATTRIBUTE_NORMQUAT)); out += - fmt::format("layout(location = {}) in vec3 vert_view;\n", static_cast(ATTRIBUTE_VIEW)); + fmt::format("layout(location = {}) in vec4 vert_position;\n" + "layout(location = {}) in vec4 vert_color;\n" + "layout(location = {}) in vec2 vert_texcoord0;\n" + "layout(location = {}) in vec2 vert_texcoord1;\n" + "layout(location = {}) in vec2 vert_texcoord2;\n" + "layout(location = {}) in float vert_texcoord0_w;\n" + "layout(location = {}) in vec4 vert_normquat;\n" + "layout(location = {}) in vec3 vert_view;\n", + static_cast(ATTRIBUTE_POSITION), static_cast(ATTRIBUTE_COLOR), + static_cast(ATTRIBUTE_TEXCOORD0), static_cast(ATTRIBUTE_TEXCOORD1), + static_cast(ATTRIBUTE_TEXCOORD2), static_cast(ATTRIBUTE_TEXCOORD0_W), + static_cast(ATTRIBUTE_NORMQUAT), static_cast(ATTRIBUTE_VIEW)); out += GetVertexInterfaceDeclaration(true, separable_shader); From 2b031e4eafe76d3c69f1d185358033246369e54b Mon Sep 17 00:00:00 2001 From: Lioncash Date: Mon, 8 Jun 2020 06:56:07 -0400 Subject: [PATCH 4/4] gl_shader_gen: std::move strings where applicable Avoids the creation of a copy when returning. --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 302c93626c..77429b887c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1470,7 +1470,7 @@ vec4 secondary_fragment_color = vec4(0.0); // Do not do any sort of processing if it's obvious we're not going to pass the alpha test if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) { out += "discard; }"; - return {out}; + return {std::move(out)}; } // Append the scissor test @@ -1536,7 +1536,7 @@ vec4 secondary_fragment_color = vec4(0.0); "VideoCore_Pica_UseGasMode", true); LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode"); out += "discard; }"; - return {out}; + return {std::move(out)}; } if (state.shadow_rendering) { @@ -1574,7 +1574,7 @@ do { out += '}'; - return {out}; + return {std::move(out)}; } ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader) { @@ -1619,7 +1619,7 @@ void main() { } )"; - return {out}; + return {std::move(out)}; } std::optional GenerateVertexShader( @@ -1684,7 +1684,7 @@ layout (std140) uniform vs_config { out += program_source; - return {{out}}; + return {{std::move(out)}}; } static std::string GetGSCommonSource(const PicaGSConfigCommonRaw& config, bool separable_shader) { @@ -1802,6 +1802,6 @@ void main() { out += " EmitPrim(prim_buffer[0], prim_buffer[1], prim_buffer[2]);\n"; out += "}\n"; - return {out}; + return {std::move(out)}; } } // namespace OpenGL