From 1f6393e7d5095d83089387f2e915e420e6bbff86 Mon Sep 17 00:00:00 2001 From: GPUCode <47210458+GPUCode@users.noreply.github.com> Date: Mon, 6 Nov 2023 22:26:28 +0200 Subject: [PATCH] video_core: Refactor GLSL fragment emitter (#7093) * video_core: Refactor GLSL fragment emitter * shader: Add back custom normal maps --- CMakeModules/GenerateSCMRev.cmake | 8 +- src/common/CMakeLists.txt | 8 +- src/video_core/CMakeLists.txt | 9 +- src/video_core/rasterizer_accelerated.h | 3 + src/video_core/renderer_opengl/gl_driver.cpp | 4 +- src/video_core/renderer_opengl/gl_driver.h | 13 +- .../renderer_opengl/gl_rasterizer.cpp | 17 +- .../renderer_opengl/gl_rasterizer.h | 1 - .../renderer_opengl/gl_shader_manager.cpp | 64 +- .../renderer_opengl/gl_shader_manager.h | 5 +- .../renderer_vulkan/vk_graphics_pipeline.h | 4 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 42 +- .../renderer_vulkan/vk_pipeline_cache.h | 14 +- .../renderer_vulkan/vk_rasterizer.cpp | 2 +- .../shader/generator/glsl_fs_shader_gen.cpp | 1605 +++++++++++++++++ .../shader/generator/glsl_fs_shader_gen.h | 100 + .../shader/generator/glsl_shader_gen.cpp | 1570 +--------------- .../shader/generator/glsl_shader_gen.h | 8 - .../shader/generator/pica_fs_config.cpp | 193 ++ .../shader/generator/pica_fs_config.h | 207 +++ src/video_core/shader/generator/profile.h | 25 + .../shader/generator/shader_gen.cpp | 212 --- src/video_core/shader/generator/shader_gen.h | 142 -- ...v_shader_gen.cpp => spv_fs_shader_gen.cpp} | 169 +- .../{spv_shader_gen.h => spv_fs_shader_gen.h} | 13 +- 25 files changed, 2340 insertions(+), 2098 deletions(-) create mode 100644 src/video_core/shader/generator/glsl_fs_shader_gen.cpp create mode 100644 src/video_core/shader/generator/glsl_fs_shader_gen.h create mode 100644 src/video_core/shader/generator/pica_fs_config.cpp create mode 100644 src/video_core/shader/generator/pica_fs_config.h create mode 100644 src/video_core/shader/generator/profile.h rename src/video_core/shader/generator/{spv_shader_gen.cpp => spv_fs_shader_gen.cpp} (93%) rename src/video_core/shader/generator/{spv_shader_gen.h => spv_fs_shader_gen.h} (96%) diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index ed261c1cf9..baf13335ec 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -10,16 +10,20 @@ set(HASH_FILES "${VIDEO_CORE}/renderer_opengl/gl_shader_util.h" "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.cpp" "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.h" + "${VIDEO_CORE}/shader/generator/glsl_fs_shader_gen.cpp" + "${VIDEO_CORE}/shader/generator/glsl_fs_shader_gen.h" "${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.cpp" "${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.h" "${VIDEO_CORE}/shader/generator/glsl_shader_gen.cpp" "${VIDEO_CORE}/shader/generator/glsl_shader_gen.h" + "${VIDEO_CORE}/shader/generator/pica_fs_config.cpp" + "${VIDEO_CORE}/shader/generator/pica_fs_config.h" "${VIDEO_CORE}/shader/generator/shader_gen.cpp" "${VIDEO_CORE}/shader/generator/shader_gen.h" "${VIDEO_CORE}/shader/generator/shader_uniforms.cpp" "${VIDEO_CORE}/shader/generator/shader_uniforms.h" - "${VIDEO_CORE}/shader/generator/spv_shader_gen.cpp" - "${VIDEO_CORE}/shader/generator/spv_shader_gen.h" + "${VIDEO_CORE}/shader/generator/spv_fs_shader_gen.cpp" + "${VIDEO_CORE}/shader/generator/spv_fs_shader_gen.h" "${VIDEO_CORE}/shader/shader.cpp" "${VIDEO_CORE}/shader/shader.h" "${VIDEO_CORE}/pica.cpp" diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 3b9c51984f..6b97b68378 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -21,16 +21,20 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/renderer_opengl/gl_shader_util.h" "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.cpp" "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.h" + "${VIDEO_CORE}/shader/generator/glsl_fs_shader_gen.cpp" + "${VIDEO_CORE}/shader/generator/glsl_fs_shader_gen.h" "${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.cpp" "${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.h" "${VIDEO_CORE}/shader/generator/glsl_shader_gen.cpp" "${VIDEO_CORE}/shader/generator/glsl_shader_gen.h" + "${VIDEO_CORE}/shader/generator/pica_fs_config.cpp" + "${VIDEO_CORE}/shader/generator/pica_fs_config.h" "${VIDEO_CORE}/shader/generator/shader_gen.cpp" "${VIDEO_CORE}/shader/generator/shader_gen.h" "${VIDEO_CORE}/shader/generator/shader_uniforms.cpp" "${VIDEO_CORE}/shader/generator/shader_uniforms.h" - "${VIDEO_CORE}/shader/generator/spv_shader_gen.cpp" - "${VIDEO_CORE}/shader/generator/spv_shader_gen.h" + "${VIDEO_CORE}/shader/generator/spv_fs_shader_gen.cpp" + "${VIDEO_CORE}/shader/generator/spv_fs_shader_gen.h" "${VIDEO_CORE}/shader/shader.cpp" "${VIDEO_CORE}/shader/shader.h" "${VIDEO_CORE}/pica.cpp" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cdba5cbd31..252a80c17e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -135,16 +135,21 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_runtime.cpp renderer_vulkan/vk_texture_runtime.h shader/debug_data.h + shader/generator/glsl_fs_shader_gen.cpp + shader/generator/glsl_fs_shader_gen.h shader/generator/glsl_shader_decompiler.cpp shader/generator/glsl_shader_decompiler.h shader/generator/glsl_shader_gen.cpp shader/generator/glsl_shader_gen.h + shader/generator/pica_fs_config.cpp + shader/generator/pica_fs_config.h + shader/generator/profile.h shader/generator/shader_gen.cpp shader/generator/shader_gen.h shader/generator/shader_uniforms.cpp shader/generator/shader_uniforms.h - shader/generator/spv_shader_gen.cpp - shader/generator/spv_shader_gen.h + shader/generator/spv_fs_shader_gen.cpp + shader/generator/spv_fs_shader_gen.h shader/shader.cpp shader/shader.h shader/shader_interpreter.cpp diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index 0bef2c5370..bf14c97283 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -7,6 +7,7 @@ #include "common/vector_math.h" #include "video_core/rasterizer_interface.h" #include "video_core/regs_texturing.h" +#include "video_core/shader/generator/pica_fs_config.h" #include "video_core/shader/generator/shader_uniforms.h" namespace Memory { @@ -153,6 +154,7 @@ protected: Pica::Regs& regs; std::vector vertex_batch; + Pica::Shader::UserConfig user_config{}; bool shader_dirty = true; VSUniformBlockData vs_uniform_block_data{}; @@ -166,4 +168,5 @@ protected: std::array proctex_lut_data{}; std::array proctex_diff_lut_data{}; }; + } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_driver.cpp b/src/video_core/renderer_opengl/gl_driver.cpp index cddd915aa6..60ee92f4d6 100644 --- a/src/video_core/renderer_opengl/gl_driver.cpp +++ b/src/video_core/renderer_opengl/gl_driver.cpp @@ -179,8 +179,8 @@ void Driver::CheckExtensionSupport() { arb_texture_compression_bptc = GLAD_GL_ARB_texture_compression_bptc; clip_cull_distance = !is_gles || GLAD_GL_EXT_clip_cull_distance; ext_texture_compression_s3tc = GLAD_GL_EXT_texture_compression_s3tc; - shader_framebuffer_fetch = - GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch; + ext_shader_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch; + arm_shader_framebuffer_fetch = GLAD_GL_ARM_shader_framebuffer_fetch; blend_minmax_factor = GLAD_GL_AMD_blend_minmax_factor || GLAD_GL_NV_blend_minmax_factor; is_suitable = GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1; } diff --git a/src/video_core/renderer_opengl/gl_driver.h b/src/video_core/renderer_opengl/gl_driver.h index 62c6ee2ceb..81c6d734dc 100644 --- a/src/video_core/renderer_opengl/gl_driver.h +++ b/src/video_core/renderer_opengl/gl_driver.h @@ -107,7 +107,15 @@ public: /// Returns true if the implementation supports (EXT/ARM)_shader_framebuffer_fetch bool HasShaderFramebufferFetch() const { - return shader_framebuffer_fetch; + return ext_shader_framebuffer_fetch || arm_shader_framebuffer_fetch; + } + + bool HasExtFramebufferFetch() const { + return ext_shader_framebuffer_fetch; + } + + bool HasArmShaderFramebufferFetch() const { + return arm_shader_framebuffer_fetch; } /// Returns true if the implementation supports (NV/AMD)_blend_minmax_factor @@ -136,7 +144,8 @@ private: bool clip_cull_distance{}; bool ext_texture_compression_s3tc{}; bool arb_texture_compression_bptc{}; - bool shader_framebuffer_fetch{}; + bool arm_shader_framebuffer_fetch{}; + bool ext_shader_framebuffer_fetch{}; bool blend_minmax_factor{}; std::string_view gl_version{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a5bed59ef2..d9c3bbf669 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -426,7 +426,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { // Sync and bind the shader if (shader_dirty) { - shader_manager.UseFragmentShader(regs, use_custom_normal); + shader_manager.UseFragmentShader(regs, user_config); shader_dirty = false; } @@ -479,7 +479,7 @@ void RasterizerOpenGL::SyncTextureUnits(const Framebuffer* framebuffer) { // Reset transient draw state state.color_buffer.texture_2d = 0; - use_custom_normal = false; + user_config = {}; const auto pica_textures = regs.texturing.GetTextures(); for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { @@ -577,20 +577,15 @@ void RasterizerOpenGL::BindMaterial(u32 texture_index, Surface& surface) { return; } - const auto bind_texture = [&](const TextureUnits::TextureUnit& unit, GLuint texture, - GLuint sampler) { - glActiveTexture(unit.Enum()); - glBindTexture(GL_TEXTURE_2D, texture); - glBindSampler(unit.id, sampler); - }; - const GLuint sampler = state.texture_units[texture_index].sampler; if (surface.HasNormalMap()) { if (regs.lighting.disable) { LOG_WARNING(Render_OpenGL, "Custom normal map used but scene has no light enabled"); } - bind_texture(TextureUnits::TextureNormalMap, surface.Handle(2), sampler); - use_custom_normal = true; + glActiveTexture(TextureUnits::TextureNormalMap.Enum()); + glBindTexture(GL_TEXTURE_2D, surface.Handle(2)); + glBindSampler(TextureUnits::TextureNormalMap.id, sampler); + user_config.use_custom_normal.Assign(1); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 72efd78a29..b102d8089d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -155,7 +155,6 @@ private: OGLTexture texture_buffer_lut_lf; OGLTexture texture_buffer_lut_rg; OGLTexture texture_buffer_lut_rgba; - bool use_custom_normal{}; bool emulate_minmax_blend{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index e171820bfd..44a753f6cc 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -14,10 +14,12 @@ #include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" -#include "video_core/shader/generator/shader_uniforms.h" +#include "video_core/shader/generator/glsl_fs_shader_gen.h" +#include "video_core/shader/generator/profile.h" #include "video_core/video_core.h" using namespace Pica::Shader::Generator; +using Pica::Shader::FSConfig; namespace OpenGL { @@ -146,17 +148,20 @@ private: OGLShaderStage program; }; -template +template class ShaderCache { public: - explicit ShaderCache(bool separable) : separable(separable) {} - std::tuple> Get(const KeyConfigType& config) { + explicit ShaderCache(bool separable_) : separable{separable_} {} + ~ShaderCache() = default; + + template + std::tuple> Get(const KeyConfigType& config, + Args&&... args) { auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable}); OGLShaderStage& cached_shader = iter->second; std::optional result{}; if (new_shader) { - result = CodeGenerator(config, separable); + result = CodeGenerator(config, args...); cached_shader.Create(result->c_str(), ShaderType); } return {cached_shader.GetHandle(), std::move(result)}; @@ -243,8 +248,7 @@ using ProgrammableVertexShaders = using FixedGeometryShaders = ShaderCache; -using FragmentShaders = - ShaderCache; +using FragmentShaders = ShaderCache; class ShaderProgramManager::Impl { public: @@ -252,8 +256,24 @@ public: : separable(separable), programmable_vertex_shaders(separable), trivial_vertex_shader(driver, separable), fixed_geometry_shaders(separable), fragment_shaders(separable), disk_cache(separable) { - if (separable) + if (separable) { pipeline.Create(); + } + profile = Pica::Shader::Profile{ + .has_separable_shaders = separable, + .has_clip_planes = driver.HasClipCullDistance(), + .has_geometry_shader = true, + .has_custom_border_color = true, + .has_fragment_shader_interlock = false, + .has_blend_minmax_factor = driver.HasBlendMinMaxFactor(), + .has_minus_one_to_one_range = true, + .has_logic_op = !driver.IsOpenGLES(), + .has_gl_ext_framebuffer_fetch = driver.HasExtFramebufferFetch(), + .has_gl_arm_framebuffer_fetch = driver.HasArmShaderFramebufferFetch(), + .has_gl_nv_fragment_shader_interlock = driver.GetVendor() == Vendor::Nvidia, + .has_gl_intel_fragment_shader_interlock = driver.GetVendor() == Vendor::Intel, + .is_vulkan = false, + }; } struct ShaderTuple { @@ -283,7 +303,7 @@ public: "ShaderTuple layout changed!"); bool separable; - + Pica::Shader::Profile profile{}; ShaderTuple current; ProgrammableVertexShaders programmable_vertex_shaders; @@ -336,7 +356,7 @@ void ShaderProgramManager::UseTrivialVertexShader() { void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) { PicaFixedGSConfig gs_config(regs, driver.HasClipCullDistance()); - auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config); + auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config, impl->separable); impl->current.gs = handle; impl->current.gs_hash = gs_config.Hash(); } @@ -346,12 +366,12 @@ void ShaderProgramManager::UseTrivialGeometryShader() { impl->current.gs_hash = 0; } -void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs, bool use_normal) { - PicaFSConfig config(regs, false, driver.IsOpenGLES(), false, driver.HasBlendMinMaxFactor(), - use_normal); - auto [handle, result] = impl->fragment_shaders.Get(config); +void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs, + const Pica::Shader::UserConfig& user) { + const FSConfig fs_config{regs, user, impl->profile}; + auto [handle, result] = impl->fragment_shaders.Get(fs_config, impl->profile); impl->current.fs = handle; - impl->current.fs_hash = config.Hash(); + impl->current.fs_hash = fs_config.Hash(); // Save FS to the disk cache if its a new shader if (result) { auto& disk_cache = impl->disk_cache; @@ -470,8 +490,8 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, impl->programmable_vertex_shaders.Inject(conf, decomp->second.code, std::move(shader)); } else if (raw.GetProgramType() == ProgramType::FS) { - PicaFSConfig conf(raw.GetRawShaderConfig(), false, driver.IsOpenGLES(), false, - driver.HasBlendMinMaxFactor()); + // TODO: Support UserConfig in disk shader cache + const FSConfig conf(raw.GetRawShaderConfig(), {}, impl->profile); std::scoped_lock lock(mutex); impl->fragment_shaders.Inject(conf, std::move(shader)); } else { @@ -581,14 +601,14 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, std::scoped_lock lock(mutex); impl->programmable_vertex_shaders.Inject(conf, code, std::move(stage)); } else if (raw.GetProgramType() == ProgramType::FS) { - PicaFSConfig conf(raw.GetRawShaderConfig(), false, driver.IsOpenGLES(), false, - driver.HasBlendMinMaxFactor()); - code = GLSL::GenerateFragmentShader(conf, impl->separable); + // TODO: Support UserConfig in disk shader cache + const FSConfig fs_config{raw.GetRawShaderConfig(), {}, impl->profile}; + code = GLSL::GenerateFragmentShader(fs_config, impl->profile); OGLShaderStage stage{impl->separable}; stage.Create(code.c_str(), GL_FRAGMENT_SHADER); handle = stage.GetHandle(); std::scoped_lock lock(mutex); - impl->fragment_shaders.Inject(conf, std::move(stage)); + impl->fragment_shaders.Inject(fs_config, std::move(stage)); } else { // Unsupported shader type got stored somehow so nuke the cache LOG_ERROR(Frontend, "failed to load raw ProgramType {}", raw.GetProgramType()); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 16c1b2142d..82dee27aae 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -17,7 +17,8 @@ struct Regs; namespace Pica::Shader { struct ShaderSetup; -} +union UserConfig; +} // namespace Pica::Shader namespace OpenGL { @@ -47,7 +48,7 @@ public: void UseTrivialGeometryShader(); - void UseFragmentShader(const Pica::Regs& config, bool use_normal); + void UseFragmentShader(const Pica::Regs& config, const Pica::Shader::UserConfig& user); void ApplyTo(OpenGLState& state); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index af2abd6d9d..5d20f0b13f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -4,9 +4,9 @@ #include "common/thread_worker.h" #include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/regs_pipeline.h" +#include "video_core/regs_rasterizer.h" #include "video_core/renderer_vulkan/vk_common.h" -#include "video_core/shader/generator/glsl_shader_gen.h" -#include "video_core/shader/generator/spv_shader_gen.h" namespace Common { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index aa1d2cbe8d..a041afb452 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -15,8 +15,12 @@ #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/shader/generator/glsl_fs_shader_gen.h" +#include "video_core/shader/generator/glsl_shader_gen.h" +#include "video_core/shader/generator/spv_fs_shader_gen.h" using namespace Pica::Shader::Generator; +using Pica::Shader::FSConfig; MICROPROFILE_DEFINE(Vulkan_Bind, "Vulkan", "Pipeline Bind", MP_RGB(192, 32, 32)); @@ -86,6 +90,17 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, trivial_vertex_shader{ instance, vk::ShaderStageFlagBits::eVertex, GLSL::GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported(), true)} { + profile = Pica::Shader::Profile{ + .has_separable_shaders = true, + .has_clip_planes = instance.IsShaderClipDistanceSupported(), + .has_geometry_shader = instance.UseGeometryShaders(), + .has_custom_border_color = instance.IsCustomBorderColorSupported(), + .has_fragment_shader_interlock = instance.IsFragmentShaderInterlockSupported(), + .has_blend_minmax_factor = false, + .has_minus_one_to_one_range = false, + .has_logic_op = !instance.NeedsLogicOpEmulation(), + .is_vulkan = true, + }; BuildLayout(); } @@ -403,35 +418,30 @@ void PipelineCache::UseTrivialGeometryShader() { shader_hashes[ProgramType::GS] = 0; } -void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { - const PicaFSConfig config{regs, instance.IsFragmentShaderInterlockSupported(), - instance.NeedsLogicOpEmulation(), - !instance.IsCustomBorderColorSupported(), false}; - - const auto [it, new_shader] = fragment_shaders.try_emplace(config, instance); +void PipelineCache::UseFragmentShader(const Pica::Regs& regs, + const Pica::Shader::UserConfig& user) { + const FSConfig fs_config{regs, user, profile}; + const auto [it, new_shader] = fragment_shaders.try_emplace(fs_config, instance); auto& shader = it->second; if (new_shader) { const bool use_spirv = Settings::values.spirv_shader_gen.GetValue(); - const auto texture0_type = config.state.texture0_type.Value(); - const bool is_shadow = texture0_type == Pica::TexturingRegs::TextureConfig::Shadow2D || - texture0_type == Pica::TexturingRegs::TextureConfig::ShadowCube || - config.state.shadow_rendering.Value(); - if (use_spirv && !is_shadow) { - const std::vector code = SPIRV::GenerateFragmentShader(config); + if (use_spirv && !fs_config.UsesShadowPipeline()) { + const std::vector code = SPIRV::GenerateFragmentShader(fs_config); shader.module = CompileSPV(code, instance.GetDevice()); shader.MarkDone(); } else { - workers.QueueWork([config, device = instance.GetDevice(), &shader]() { - const std::string code = GLSL::GenerateFragmentShader(config, true); - shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device); + workers.QueueWork([fs_config, this, &shader]() { + const std::string code = GLSL::GenerateFragmentShader(fs_config, profile); + shader.module = + Compile(code, vk::ShaderStageFlagBits::eFragment, instance.GetDevice()); shader.MarkDone(); }); } } current_shaders[ProgramType::FS] = &shader; - shader_hashes[ProgramType::FS] = config.Hash(); + shader_hashes[ProgramType::FS] = fs_config.Hash(); } void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 1a54a0f38c..4ef581493a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -9,13 +9,18 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/shader/generator/glsl_shader_gen.h" -#include "video_core/shader/generator/spv_shader_gen.h" +#include "video_core/shader/generator/pica_fs_config.h" +#include "video_core/shader/generator/profile.h" +#include "video_core/shader/generator/shader_gen.h" namespace Pica { struct Regs; } +namespace Pica::Shader { +struct ShaderSetup; +} + namespace Vulkan { class Instance; @@ -62,7 +67,7 @@ public: void UseTrivialGeometryShader(); /// Binds a fragment shader generated from PICA state - void UseFragmentShader(const Pica::Regs& regs); + void UseFragmentShader(const Pica::Regs& regs, const Pica::Shader::UserConfig& user); /// Binds a texture to the specified binding void BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler); @@ -98,6 +103,7 @@ private: RenderpassCache& renderpass_cache; DescriptorPool& pool; + Pica::Shader::Profile profile{}; vk::UniquePipelineCache pipeline_cache; vk::UniquePipelineLayout pipeline_layout; std::size_t num_worker_threads; @@ -118,7 +124,7 @@ private: std::unordered_map programmable_vertex_map; std::unordered_map programmable_vertex_cache; std::unordered_map fixed_geometry_shaders; - std::unordered_map fragment_shaders; + std::unordered_map fragment_shaders; Shader trivial_vertex_shader; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f990661762..d565c9315f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -497,7 +497,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { // Sync and bind the shader if (shader_dirty) { - pipeline_cache.UseFragmentShader(regs); + pipeline_cache.UseFragmentShader(regs, user_config); shader_dirty = false; } diff --git a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp new file mode 100644 index 0000000000..449ed62c18 --- /dev/null +++ b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp @@ -0,0 +1,1605 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/shader/generator/glsl_fs_shader_gen.h" + +namespace Pica::Shader::Generator::GLSL { + +using ProcTexClamp = TexturingRegs::ProcTexClamp; +using ProcTexShift = TexturingRegs::ProcTexShift; +using ProcTexCombiner = TexturingRegs::ProcTexCombiner; +using ProcTexFilter = TexturingRegs::ProcTexFilter; + +constexpr static size_t RESERVE_SIZE = 8 * 1024 * 1024; + +enum class Semantic : u32 { + Position, + Color, + Texcoord0, + Texcoord1, + Texcoord2, + Texcoord0_W, + Normquat, + View, +}; + +static bool IsPassThroughTevStage(const Pica::TexturingRegs::TevStageConfig& stage) { + using TevStageConfig = Pica::TexturingRegs::TevStageConfig; + return (stage.color_op == TevStageConfig::Operation::Replace && + stage.alpha_op == TevStageConfig::Operation::Replace && + stage.color_source1 == TevStageConfig::Source::Previous && + stage.alpha_source1 == TevStageConfig::Source::Previous && + stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && + stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && + stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); +} + +// High precision may or may not be supported in GLES3. If it isn't, use medium precision instead. +static constexpr char fragment_shader_precision_OES[] = R"( +#if GL_ES +#ifdef GL_FRAGMENT_PRECISION_HIGH +precision highp int; +precision highp float; +precision highp samplerBuffer; +precision highp uimage2D; +#else +precision mediump int; +precision mediump float; +precision mediump samplerBuffer; +precision mediump uimage2D; +#endif // GL_FRAGMENT_PRECISION_HIGH +#endif +)"; + +constexpr static std::string_view FSUniformBlockDef = R"( +#define NUM_TEV_STAGES 6 +#define NUM_LIGHTS 8 +#define NUM_LIGHTING_SAMPLERS 24 +struct LightSrc { + vec3 specular_0; + vec3 specular_1; + vec3 diffuse; + vec3 ambient; + vec3 position; + vec3 spot_direction; + float dist_atten_bias; + float dist_atten_scale; +}; +layout (binding = 2, std140) uniform fs_data { + int framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + float shadow_bias_constant; + float shadow_bias_linear; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + float proctex_bias; + int shadow_texture_bias; + ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4]; + vec3 fog_color; + vec2 proctex_noise_f; + vec2 proctex_noise_a; + vec2 proctex_noise_p; + vec3 lighting_global_ambient; + LightSrc light_src[NUM_LIGHTS]; + vec4 const_color[NUM_TEV_STAGES]; + vec4 tev_combiner_buffer_color; + vec3 tex_lod_bias; + vec4 tex_border_color[3]; + vec4 blend_color; +}; +)"; + +FragmentModule::FragmentModule(const FSConfig& config_, const Profile& profile_) + : config{config_}, profile{profile_} { + out.reserve(RESERVE_SIZE); + DefineExtensions(); + DefineInterface(); + DefineBindings(); + DefineHelpers(); + DefineShadowHelpers(); + DefineLightingHelpers(); + DefineProcTexSampler(); + for (u32 i = 0; i < 4; i++) { + DefineTexUnitSampler(i); + } +} + +FragmentModule::~FragmentModule() = default; + +std::string FragmentModule::Generate() { + // We round the interpolated primary color to the nearest 1/255th + // This maintains the PICA's 8 bits of precision + out += R"( +void main() { +vec4 rounded_primary_color = byteround(primary_color); +vec4 primary_fragment_color = vec4(0.0); +vec4 secondary_fragment_color = vec4(0.0); +)"; + + // Do not do any sort of processing if it's obvious we're not going to pass the alpha test + if (config.framebuffer.alpha_test_func == FramebufferRegs::CompareFunc::Never) { + out += "discard; }"; + return out; + } + + // Append the scissor and depth tests + WriteScissor(); + WriteDepth(); + + // Write shader source to emulate all enabled PICA lights + WriteLighting(); + + out += "vec4 combiner_buffer = vec4(0.0);\n" + "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n" + "vec4 combiner_output = rounded_primary_color;\n"; + + out += "vec3 color_results_1 = vec3(0.0);\n" + "vec3 color_results_2 = vec3(0.0);\n" + "vec3 color_results_3 = vec3(0.0);\n"; + + out += "float alpha_results_1 = 0.0;\n" + "float alpha_results_2 = 0.0;\n" + "float alpha_results_3 = 0.0;\n"; + + // Write shader source to emulate PICA TEV stages + for (u32 index = 0; index < config.texture.tev_stages.size(); index++) { + WriteTevStage(index); + } + + // Append the alpha test condition + WriteAlphaTestCondition(config.framebuffer.alpha_test_func); + + // Emulate the fog + switch (config.texture.fog_mode) { + case TexturingRegs::FogMode::Fog: + WriteFog(); + break; + case TexturingRegs::FogMode::Gas: + WriteGas(); + return out; + default: + break; + } + + if (config.framebuffer.shadow_rendering) { + WriteShadow(); + } else { + out += "gl_FragDepth = depth;\n"; + // Round the final fragment color to maintain the PICA's 8 bits of precision + out += "combiner_output = byteround(combiner_output);\n"; + WriteBlending(); + out += "color = combiner_output;\n"; + } + + WriteLogicOp(); + + out += '}'; + return out; +} + +void FragmentModule::WriteDepth() { + // The PICA depth range is [-1, 0]. The vertex shader outputs the negated Z value, otherwise + // unmodified. When the depth range is [-1, 1], it is converted into [near, far] = [0, 1]. + // This compresses our effective range into [0.5, 1]. To account for this we un-negate the value + // to range [-1, -0.5], multiply by 2 to the range [-2, -1], and add 1 to arrive back at the + // original range of [-1, 0]. If the depth range is [0, 1], so all we need to do is + // un-negate the value to range [-1, 0]. Once we have z_over_w, we can do our own transformation + // according to PICA specification. + if (profile.has_minus_one_to_one_range) { + out += "float z_over_w = -2.0 * gl_FragCoord.z + 1.0;\n"; + } else { + out += "float z_over_w = -gl_FragCoord.z;\n"; + } + out += "float depth = z_over_w * depth_scale + depth_offset;\n"; + if (config.framebuffer.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { + out += "depth /= gl_FragCoord.w;\n"; + } +} + +void FragmentModule::WriteScissor() { + const auto scissor_mode = config.framebuffer.scissor_test_mode.Value(); + if (scissor_mode == RasterizerRegs::ScissorMode::Disabled) { + return; + } + + out += "if ("; + // Negate the condition if we have to keep only the pixels outside the scissor box + if (scissor_mode == RasterizerRegs::ScissorMode::Include) { + out += '!'; + } + out += "(gl_FragCoord.x >= float(scissor_x1) && " + "gl_FragCoord.y >= float(scissor_y1) && " + "gl_FragCoord.x < float(scissor_x2) && " + "gl_FragCoord.y < float(scissor_y2))) discard;\n"; +} + +void FragmentModule::AppendSource(Pica::TexturingRegs::TevStageConfig::Source source, + u32 tev_index) { + using Source = Pica::TexturingRegs::TevStageConfig::Source; + switch (source) { + case Source::PrimaryColor: + out += "rounded_primary_color"; + break; + case Source::PrimaryFragmentColor: + out += "primary_fragment_color"; + break; + case Source::SecondaryFragmentColor: + out += "secondary_fragment_color"; + break; + case Source::Texture0: + out += "sampleTexUnit0()"; + break; + case Source::Texture1: + out += "sampleTexUnit1()"; + break; + case Source::Texture2: + out += "sampleTexUnit2()"; + break; + case Source::Texture3: + out += "sampleTexUnit3()"; + break; + case Source::PreviousBuffer: + out += "combiner_buffer"; + break; + case Source::Constant: + out += fmt::format("const_color[{}]", tev_index); + break; + case Source::Previous: + out += "combiner_output"; + break; + default: + out += "vec4(0.0)"; + LOG_CRITICAL(Render, "Unknown source op {}", source); + break; + } +} + +void FragmentModule::AppendColorModifier( + Pica::TexturingRegs::TevStageConfig::ColorModifier modifier, + Pica::TexturingRegs::TevStageConfig::Source source, u32 tev_index) { + using ColorModifier = Pica::TexturingRegs::TevStageConfig::ColorModifier; + switch (modifier) { + case ColorModifier::SourceColor: + AppendSource(source, tev_index); + out += ".rgb"; + break; + case ColorModifier::OneMinusSourceColor: + out += "vec3(1.0) - "; + AppendSource(source, tev_index); + out += ".rgb"; + break; + case ColorModifier::SourceAlpha: + AppendSource(source, tev_index); + out += ".aaa"; + break; + case ColorModifier::OneMinusSourceAlpha: + out += "vec3(1.0) - "; + AppendSource(source, tev_index); + out += ".aaa"; + break; + case ColorModifier::SourceRed: + AppendSource(source, tev_index); + out += ".rrr"; + break; + case ColorModifier::OneMinusSourceRed: + out += "vec3(1.0) - "; + AppendSource(source, tev_index); + out += ".rrr"; + break; + case ColorModifier::SourceGreen: + AppendSource(source, tev_index); + out += ".ggg"; + break; + case ColorModifier::OneMinusSourceGreen: + out += "vec3(1.0) - "; + AppendSource(source, tev_index); + out += ".ggg"; + break; + case ColorModifier::SourceBlue: + AppendSource(source, tev_index); + out += ".bbb"; + break; + case ColorModifier::OneMinusSourceBlue: + out += "vec3(1.0) - "; + AppendSource(source, tev_index); + out += ".bbb"; + break; + default: + out += "vec3(0.0)"; + LOG_CRITICAL(Render, "Unknown color modifier op {}", modifier); + break; + } +} + +void FragmentModule::AppendAlphaModifier( + Pica::TexturingRegs::TevStageConfig::AlphaModifier modifier, + Pica::TexturingRegs::TevStageConfig::Source source, u32 tev_index) { + using AlphaModifier = Pica::TexturingRegs::TevStageConfig::AlphaModifier; + switch (modifier) { + case AlphaModifier::SourceAlpha: + AppendSource(source, tev_index); + out += ".a"; + break; + case AlphaModifier::OneMinusSourceAlpha: + out += "1.0 - "; + AppendSource(source, tev_index); + out += ".a"; + break; + case AlphaModifier::SourceRed: + AppendSource(source, tev_index); + out += ".r"; + break; + case AlphaModifier::OneMinusSourceRed: + out += "1.0 - "; + AppendSource(source, tev_index); + out += ".r"; + break; + case AlphaModifier::SourceGreen: + AppendSource(source, tev_index); + out += ".g"; + break; + case AlphaModifier::OneMinusSourceGreen: + out += "1.0 - "; + AppendSource(source, tev_index); + out += ".g"; + break; + case AlphaModifier::SourceBlue: + AppendSource(source, tev_index); + out += ".b"; + break; + case AlphaModifier::OneMinusSourceBlue: + out += "1.0 - "; + AppendSource(source, tev_index); + out += ".b"; + break; + default: + out += "0.0"; + LOG_CRITICAL(Render, "Unknown alpha modifier op {}", modifier); + break; + } +} + +void FragmentModule::AppendColorCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation) { + const auto get_combiner = [operation] { + using Operation = Pica::TexturingRegs::TevStageConfig::Operation; + switch (operation) { + case Operation::Replace: + return "color_results_1"; + case Operation::Modulate: + return "color_results_1 * color_results_2"; + case Operation::Add: + return "color_results_1 + color_results_2"; + case Operation::AddSigned: + return "color_results_1 + color_results_2 - vec3(0.5)"; + case Operation::Lerp: + return "color_results_1 * color_results_3 + color_results_2 * (vec3(1.0) - " + "color_results_3)"; + case Operation::Subtract: + return "color_results_1 - color_results_2"; + case Operation::MultiplyThenAdd: + return "color_results_1 * color_results_2 + color_results_3"; + case Operation::AddThenMultiply: + return "min(color_results_1 + color_results_2, vec3(1.0)) * color_results_3"; + case Operation::Dot3_RGB: + case Operation::Dot3_RGBA: + return "vec3(dot(color_results_1 - vec3(0.5), color_results_2 - vec3(0.5)) * 4.0)"; + default: + LOG_CRITICAL(Render, "Unknown color combiner operation: {}", operation); + return "vec3(0.0)"; + } + }; + out += fmt::format("clamp({}, vec3(0.0), vec3(1.0))", get_combiner()); +} + +void FragmentModule::AppendAlphaCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation) { + const auto get_combiner = [operation] { + using Operation = Pica::TexturingRegs::TevStageConfig::Operation; + switch (operation) { + case Operation::Replace: + return "alpha_results_1"; + case Operation::Modulate: + return "alpha_results_1 * alpha_results_2"; + case Operation::Add: + return "alpha_results_1 + alpha_results_2"; + case Operation::AddSigned: + return "alpha_results_1 + alpha_results_2 - 0.5"; + case Operation::Lerp: + return "alpha_results_1 * alpha_results_3 + alpha_results_2 * (1.0 - alpha_results_3)"; + case Operation::Subtract: + return "alpha_results_1 - alpha_results_2"; + case Operation::MultiplyThenAdd: + return "alpha_results_1 * alpha_results_2 + alpha_results_3"; + case Operation::AddThenMultiply: + return "min(alpha_results_1 + alpha_results_2, 1.0) * alpha_results_3"; + default: + LOG_CRITICAL(Render, "Unknown alpha combiner operation: {}", operation); + return "0.0"; + } + }; + out += fmt::format("clamp({}, 0.0, 1.0)", get_combiner()); +} + +void FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) { + const auto get_cond = [func]() -> std::string { + using CompareFunc = Pica::FramebufferRegs::CompareFunc; + switch (func) { + case CompareFunc::Never: + return "true"; + case CompareFunc::Always: + return "false"; + case CompareFunc::Equal: + case CompareFunc::NotEqual: + case CompareFunc::LessThan: + case CompareFunc::LessThanOrEqual: + case CompareFunc::GreaterThan: + case CompareFunc::GreaterThanOrEqual: { + static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"}; + const auto index = static_cast(func) - static_cast(CompareFunc::Equal); + return fmt::format("int(combiner_output.a * 255.0) {} alphatest_ref", op[index]); + } + default: + LOG_CRITICAL(Render, "Unknown alpha test condition {}", func); + return "false"; + break; + } + }; + out += fmt::format("if ({}) discard;\n", get_cond()); +} + +void FragmentModule::WriteTevStage(u32 index) { + const TexturingRegs::TevStageConfig stage = config.texture.tev_stages[index]; + if (!IsPassThroughTevStage(stage)) { + out += "color_results_1 = "; + AppendColorModifier(stage.color_modifier1, stage.color_source1, index); + out += ";\ncolor_results_2 = "; + AppendColorModifier(stage.color_modifier2, stage.color_source2, index); + out += ";\ncolor_results_3 = "; + AppendColorModifier(stage.color_modifier3, stage.color_source3, index); + + // Round the output of each TEV stage to maintain the PICA's 8 bits of precision + out += fmt::format(";\nvec3 color_output_{} = byteround(", index); + AppendColorCombiner(stage.color_op); + out += ");\n"; + + if (stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { + // result of Dot3_RGBA operation is also placed to the alpha component + out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index); + } else { + out += "alpha_results_1 = "; + AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index); + out += ";\nalpha_results_2 = "; + AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index); + out += ";\nalpha_results_3 = "; + AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index); + + out += fmt::format(";\nfloat alpha_output_{} = byteround(", index); + AppendAlphaCombiner(stage.alpha_op); + out += ");\n"; + } + + out += fmt::format("combiner_output = vec4(" + "clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), " + "clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n", + index, stage.GetColorMultiplier(), index, stage.GetAlphaMultiplier()); + } + + out += "combiner_buffer = next_combiner_buffer;\n"; + if (config.TevStageUpdatesCombinerBufferColor(index)) { + out += "next_combiner_buffer.rgb = combiner_output.rgb;\n"; + } + if (config.TevStageUpdatesCombinerBufferAlpha(index)) { + out += "next_combiner_buffer.a = combiner_output.a;\n"; + } +} + +void FragmentModule::WriteLighting() { + if (!config.lighting.enable) { + return; + } + + const auto& lighting = config.lighting; + + // Define lighting globals + out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec3 light_vector = vec3(0.0);\n" + "float light_distance = 0.0;\n" + "vec3 refl_value = vec3(0.0);\n" + "vec3 spot_dir = vec3(0.0);\n" + "vec3 half_vector = vec3(0.0);\n" + "float dot_product = 0.0;\n" + "float clamp_highlights = 1.0;\n" + "float geo_factor = 1.0;\n"; + + // Compute fragment normals and tangents + const auto perturbation = [&] { + return fmt::format("2.0 * (sampleTexUnit{}()).rgb - 1.0", lighting.bump_selector.Value()); + }; + + if (config.user.use_custom_normal) { + const auto texel = fmt::format("2.0 * (texture(tex_normal, texcoord0)).rgb - 1.0"); + out += fmt::format("vec3 surface_normal = {};\n", texel); + out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; + } else { + switch (lighting.bump_mode) { + case LightingRegs::LightingBumpMode::NormalMap: { + // Bump mapping is enabled using a normal map + out += fmt::format("vec3 surface_normal = {};\n", perturbation()); + + // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher + // precision result + if (lighting.bump_renorm) { + constexpr std::string_view val = "(1.0 - (surface_normal.x*surface_normal.x + " + "surface_normal.y*surface_normal.y))"; + out += fmt::format("surface_normal.z = sqrt(max({}, 0.0));\n", val); + } + + // The tangent vector is not perturbed by the normal map and is just a unit vector. + out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; + break; + } + case LightingRegs::LightingBumpMode::TangentMap: { + // Bump mapping is enabled using a tangent map + out += fmt::format("vec3 surface_tangent = {};\n", perturbation()); + // Mathematically, recomputing Z-component of the tangent vector won't affect the + // relevant computation below, which is also confirmed on 3DS. So we don't bother + // recomputing here even if 'renorm' is enabled. + + // The normal vector is not perturbed by the tangent map and is just a unit vector. + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; + break; + } + default: + // No bump mapping - surface local normal and tangent are just unit vectors + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n" + "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; + } + } + + // Rotate the surface-local normal by the interpolated normal quaternion to convert it to + // eyespace. + out += "vec4 normalized_normquat = normalize(normquat);\n" + "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n" + "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; + + if (lighting.enable_shadow) { + std::string shadow_texture = + fmt::format("sampleTexUnit{}()", lighting.shadow_selector.Value()); + if (lighting.shadow_invert) { + out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture); + } else { + out += fmt::format("vec4 shadow = {};\n", shadow_texture); + } + } else { + out += "vec4 shadow = vec4(1.0);\n"; + } + + // Samples the specified lookup table for specular lighting + const auto get_lut_value = [&lighting](LightingRegs::LightingSampler sampler, u32 light_num, + LightingRegs::LightingLutInput input, bool abs) { + std::string index; + switch (input) { + case LightingRegs::LightingLutInput::NH: + index = "dot(normal, normalize(half_vector))"; + break; + case LightingRegs::LightingLutInput::VH: + index = "dot(normalize(view), normalize(half_vector))"; + break; + case LightingRegs::LightingLutInput::NV: + index = "dot(normal, normalize(view))"; + break; + case LightingRegs::LightingLutInput::LN: + index = "dot(light_vector, normal)"; + break; + case LightingRegs::LightingLutInput::SP: + index = "dot(light_vector, spot_dir)"; + break; + case LightingRegs::LightingLutInput::CP: + // CP input is only available with configuration 7 + if (lighting.config == LightingRegs::LightingConfig::Config7) { + // Note: even if the normal vector is modified by normal map, which is not the + // normal of the tangent plane anymore, the half angle vector is still projected + // using the modified normal vector. + constexpr std::string_view half_angle_proj = + "normalize(half_vector) - normal * dot(normal, normalize(half_vector))"; + // Note: the half angle vector projection is confirmed not normalized before the dot + // product. The result is in fact not cos(phi) as the name suggested. + index = fmt::format("dot({}, tangent)", half_angle_proj); + } else { + index = "0.0"; + } + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input {}", static_cast(input)); + UNIMPLEMENTED(); + index = "0.0"; + break; + } + + const auto sampler_index = static_cast(sampler); + + if (abs) { + // LUT index is in the range of (0.0, 1.0) + index = lighting.lights[light_num].two_sided_diffuse + ? fmt::format("abs({})", index) + : fmt::format("max({}, 0.0)", index); + return fmt::format("LookupLightingLUTUnsigned({}, {})", sampler_index, index); + } else { + // LUT index is in the range of (-1.0, 1.0) + return fmt::format("LookupLightingLUTSigned({}, {})", sampler_index, index); + } + }; + + // Write the code to emulate each enabled light + for (u32 light_index = 0; light_index < lighting.src_num; ++light_index) { + const auto& light_config = lighting.lights[light_index]; + const std::string light_src = fmt::format("light_src[{}]", light_config.num.Value()); + + // Compute light vector (directional or positional) + if (light_config.directional) { + out += fmt::format("light_vector = {}.position;\n", light_src); + } else { + out += fmt::format("light_vector = {}.position + view;\n", light_src); + } + out += fmt::format("light_distance = length(light_vector);\n", light_src); + out += fmt::format("light_vector = normalize(light_vector);\n", light_src); + + out += fmt::format("spot_dir = {}.spot_direction;\n", light_src); + out += "half_vector = normalize(view) + light_vector;\n"; + + // Compute dot product of light_vector and normal, adjust if lighting is one-sided or + // two-sided + out += "dot_product = "; + out += light_config.two_sided_diffuse ? "abs(dot(light_vector, normal));\n" + : "max(dot(light_vector, normal), 0.0);\n"; + + // If enabled, clamp specular component if lighting result is zero + if (lighting.clamp_highlights) { + out += "clamp_highlights = sign(dot_product);\n"; + } + + // If enabled, compute spot light attenuation value + std::string spot_atten = "1.0"; + if (light_config.spot_atten_enable && + LightingRegs::IsLightingSamplerSupported( + lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { + const std::string value = + get_lut_value(LightingRegs::SpotlightAttenuationSampler(light_config.num), + light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input); + spot_atten = fmt::format("({:#} * {})", lighting.lut_sp.scale, value); + } + + // If enabled, compute distance attenuation value + std::string dist_atten = "1.0"; + if (light_config.dist_atten_enable) { + const std::string index = fmt::format("clamp({}.dist_atten_scale * light_distance " + "+ {}.dist_atten_bias, 0.0, 1.0)", + light_src, light_src, light_src); + const auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num); + dist_atten = fmt::format("LookupLightingLUTUnsigned({}, {})", sampler, index); + } + + if (light_config.geometric_factor_0 || light_config.geometric_factor_1) { + out += "geo_factor = dot(half_vector, half_vector);\n" + "geo_factor = geo_factor == 0.0 ? 0.0 : min(" + "dot_product / geo_factor, 1.0);\n"; + } + + // Specular 0 component + std::string d0_lut_value = "1.0"; + if (lighting.lut_d0.enable && + LightingRegs::IsLightingSamplerSupported( + lighting.config, LightingRegs::LightingSampler::Distribution0)) { + // Lookup specular "distribution 0" LUT value + const std::string value = + get_lut_value(LightingRegs::LightingSampler::Distribution0, light_config.num, + lighting.lut_d0.type, lighting.lut_d0.abs_input); + d0_lut_value = fmt::format("({:#} * {})", lighting.lut_d0.scale, value); + } + std::string specular_0 = fmt::format("({} * {}.specular_0)", d0_lut_value, light_src); + if (light_config.geometric_factor_0) { + specular_0 = fmt::format("({} * geo_factor)", specular_0); + } + + // If enabled, lookup ReflectRed value, otherwise, 1.0 is used + if (lighting.lut_rr.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::ReflectRed)) { + std::string value = + get_lut_value(LightingRegs::LightingSampler::ReflectRed, light_config.num, + lighting.lut_rr.type, lighting.lut_rr.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_rr.scale, value); + out += fmt::format("refl_value.r = {};\n", value); + } else { + out += "refl_value.r = 1.0;\n"; + } + + // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used + if (lighting.lut_rg.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::ReflectGreen)) { + std::string value = + get_lut_value(LightingRegs::LightingSampler::ReflectGreen, light_config.num, + lighting.lut_rg.type, lighting.lut_rg.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_rg.scale, value); + out += fmt::format("refl_value.g = {};\n", value); + } else { + out += "refl_value.g = refl_value.r;\n"; + } + + // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used + if (lighting.lut_rb.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::ReflectBlue)) { + std::string value = + get_lut_value(LightingRegs::LightingSampler::ReflectBlue, light_config.num, + lighting.lut_rb.type, lighting.lut_rb.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_rb.scale, value); + out += fmt::format("refl_value.b = {};\n", value); + } else { + out += "refl_value.b = refl_value.r;\n"; + } + + // Specular 1 component + std::string d1_lut_value = "1.0"; + if (lighting.lut_d1.enable && + LightingRegs::IsLightingSamplerSupported( + lighting.config, LightingRegs::LightingSampler::Distribution1)) { + // Lookup specular "distribution 1" LUT value + const std::string value = + get_lut_value(LightingRegs::LightingSampler::Distribution1, light_config.num, + lighting.lut_d1.type, lighting.lut_d1.abs_input); + d1_lut_value = fmt::format("({:#} * {})", lighting.lut_d1.scale, value); + } + std::string specular_1 = + fmt::format("({} * refl_value * {}.specular_1)", d1_lut_value, light_src); + if (light_config.geometric_factor_1) { + specular_1 = fmt::format("({} * geo_factor)", specular_1); + } + + // Fresnel + // Note: only the last entry in the light slots applies the Fresnel factor + if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable && + LightingRegs::IsLightingSamplerSupported(lighting.config, + LightingRegs::LightingSampler::Fresnel)) { + // Lookup fresnel LUT value + std::string value = + get_lut_value(LightingRegs::LightingSampler::Fresnel, light_config.num, + lighting.lut_fr.type, lighting.lut_fr.abs_input); + value = fmt::format("({:#} * {})", lighting.lut_fr.scale, value); + + // Enabled for diffuse lighting alpha component + if (lighting.enable_primary_alpha) { + out += fmt::format("diffuse_sum.a = {};\n", value); + } + + // Enabled for the specular lighting alpha component + if (lighting.enable_secondary_alpha) { + out += fmt::format("specular_sum.a = {};\n", value); + } + } + + const bool shadow_primary_enable = lighting.shadow_primary && light_config.shadow_enable; + const bool shadow_secondary_enable = + lighting.shadow_secondary && light_config.shadow_enable; + const auto shadow_primary = shadow_primary_enable ? " * shadow.rgb" : ""; + const auto shadow_secondary = shadow_secondary_enable ? " * shadow.rgb" : ""; + + // Compute primary fragment color (diffuse lighting) function + out += fmt::format( + "diffuse_sum.rgb += (({}.diffuse * dot_product) + {}.ambient) * {} * {}{};\n", + light_src, light_src, dist_atten, spot_atten, shadow_primary); + + // Compute secondary fragment color (specular lighting) function + out += fmt::format("specular_sum.rgb += ({} + {}) * clamp_highlights * {} * {}{};\n", + specular_0, specular_1, dist_atten, spot_atten, shadow_secondary); + } + + // Apply shadow attenuation to alpha components if enabled + if (lighting.shadow_alpha) { + if (lighting.enable_primary_alpha) { + out += "diffuse_sum.a *= shadow.a;\n"; + } + if (lighting.enable_secondary_alpha) { + out += "specular_sum.a *= shadow.a;\n"; + } + } + + // Sum final lighting result + out += "diffuse_sum.rgb += lighting_global_ambient;\n" + "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n" + "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; +} + +void FragmentModule::WriteFog() { + // Get index into fog LUT + if (config.texture.fog_flip) { + out += "float fog_index = (1.0 - float(depth)) * 128.0;\n"; + } else { + out += "float fog_index = depth * 128.0;\n"; + } + + // Generate clamped fog factor from LUT for given fog index + out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n" + "float fog_f = fog_index - fog_i;\n" + "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + " + "fog_lut_offset).rg;\n" + "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n" + "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; + + // Blend the fog + out += "combiner_output.rgb = mix(fog_color.rgb, combiner_output.rgb, fog_factor);\n"; +} + +void FragmentModule::WriteGas() { + // TODO: Implement me + LOG_CRITICAL(Render, "Unimplemented gas mode"); + out += "discard; }"; +} + +void FragmentModule::WriteShadow() { + out += R"( +uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF)); +uint s = uint(combiner_output.g * float(0xFF)); +ivec2 image_coord = ivec2(gl_FragCoord.xy); +)"; + + if (use_fragment_shader_interlock) { + out += R"( +beginInvocationInterlock(); +uint old_shadow = imageLoad(shadow_buffer, image_coord).x; +uint new_shadow = UpdateShadow(old_shadow, d, s); +imageStore(shadow_buffer, image_coord, uvec4(new_shadow)); +endInvocationInterlock(); +)"; + } else { + out += R"( +uint old = imageLoad(shadow_buffer, image_coord).x; +uint new1; +uint old2; +do { + old2 = old; + new1 = UpdateShadow(old, d, s); +} while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new1)) != old2); +)"; + } +} + +void FragmentModule::WriteLogicOp() { + const auto logic_op = config.framebuffer.logic_op.Value(); + switch (logic_op) { + case FramebufferRegs::LogicOp::Clear: + out += "color = vec4(0);\n"; + break; + case FramebufferRegs::LogicOp::Set: + out += "color = vec4(1);\n"; + break; + case FramebufferRegs::LogicOp::Copy: + // Take the color output as-is + break; + case FramebufferRegs::LogicOp::CopyInverted: + out += "color = ~color;\n"; + break; + case FramebufferRegs::LogicOp::NoOp: + // We need to discard the color, but not necessarily the depth. This is not possible + // with fragment shader alone, so we emulate this behavior on GLES with glColorMask. + break; + default: + LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", logic_op); + UNIMPLEMENTED(); + } +} + +void FragmentModule::WriteBlending() { + if (!config.EmulateBlend()) [[likely]] { + return; + } + + using BlendFactor = Pica::FramebufferRegs::BlendFactor; + out += "vec4 source_color = combiner_output;\n"; + out += "vec4 dest_color = destFactor;\n"; + const auto get_factor = [&](BlendFactor factor) -> std::string { + switch (factor) { + case BlendFactor::Zero: + return "vec4(0.f)"; + case BlendFactor::One: + return "vec4(1.f)"; + case BlendFactor::SourceColor: + return "source_color"; + case BlendFactor::OneMinusSourceColor: + return "vec4(1.f) - source_color"; + case BlendFactor::DestColor: + return "dest_color"; + case BlendFactor::OneMinusDestColor: + return "vec4(1.f) - dest_color"; + case BlendFactor::SourceAlpha: + return "source_color.aaaa"; + case BlendFactor::OneMinusSourceAlpha: + return "vec4(1.f) - source_color.aaaa"; + case BlendFactor::DestAlpha: + return "dest_color.aaaa"; + case BlendFactor::OneMinusDestAlpha: + return "vec4(1.f) - dest_color.aaaa"; + case BlendFactor::ConstantColor: + return "blend_color"; + case BlendFactor::OneMinusConstantColor: + return "vec4(1.f) - blend_color"; + case BlendFactor::ConstantAlpha: + return "blend_color.aaaa"; + case BlendFactor::OneMinusConstantAlpha: + return "vec4(1.f) - blend_color.aaaa"; + default: + LOG_CRITICAL(Render_OpenGL, "Unknown blend factor {}", factor); + return "vec4(1.f)"; + } + }; + const auto get_func = [](Pica::FramebufferRegs::BlendEquation eq) { + return eq == Pica::FramebufferRegs::BlendEquation::Min ? "min" : "max"; + }; + + if (config.framebuffer.rgb_blend.eq != Pica::FramebufferRegs::BlendEquation::Add) { + out += fmt::format( + "combiner_output.rgb = {}(source_color.rgb * ({}).rgb, dest_color.rgb * ({}).rgb);\n", + get_func(config.framebuffer.rgb_blend.eq), + get_factor(config.framebuffer.rgb_blend.src_factor), + get_factor(config.framebuffer.rgb_blend.dst_factor)); + } + if (config.framebuffer.alpha_blend.eq != Pica::FramebufferRegs::BlendEquation::Add) { + out += + fmt::format("combiner_output.a = {}(source_color.a * ({}).a, dest_color.a * ({}).a);\n", + get_func(config.framebuffer.alpha_blend.eq), + get_factor(config.framebuffer.alpha_blend.src_factor), + get_factor(config.framebuffer.alpha_blend.dst_factor)); + } +} + +void FragmentModule::AppendProcTexShiftOffset(std::string_view v, ProcTexShift mode, + ProcTexClamp clamp_mode) { + const auto offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5"; + switch (mode) { + case ProcTexShift::None: + out += "0.0"; + break; + case ProcTexShift::Odd: + out += fmt::format("{} * float((int({}) / 2) % 2)", offset, v); + break; + case ProcTexShift::Even: + out += fmt::format("{} * float(((int({}) + 1) / 2) % 2)", offset, v); + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown shift mode {}", mode); + out += "0.0"; + break; + } +} + +void FragmentModule::AppendProcTexClamp(std::string_view var, ProcTexClamp mode) { + switch (mode) { + case ProcTexClamp::ToZero: + out += fmt::format("{0} = {0} > 1.0 ? 0 : {0};\n", var); + break; + case ProcTexClamp::ToEdge: + out += fmt::format("{0} = min({0}, 1.0);\n", var); + break; + case ProcTexClamp::SymmetricalRepeat: + out += fmt::format("{0} = fract({0});\n", var); + break; + case ProcTexClamp::MirroredRepeat: + out += fmt::format("{0} = int({0}) % 2 == 0 ? fract({0}) : 1.0 - fract({0});\n", var); + break; + case ProcTexClamp::Pulse: + out += fmt::format("{0} = {0} > 0.5 ? 1.0 : 0.0;\n", var); + break; + default: + LOG_CRITICAL(HW_GPU, "Unknown clamp mode {}", mode); + out += fmt::format("{0} = min({0}, 1.0);\n", var); + break; + } +} + +void FragmentModule::AppendProcTexCombineAndMap(ProcTexCombiner combiner, std::string_view offset) { + const auto combined = [combiner] { + switch (combiner) { + case ProcTexCombiner::U: + return "u"; + case ProcTexCombiner::U2: + return "(u * u)"; + case TexturingRegs::ProcTexCombiner::V: + return "v"; + case TexturingRegs::ProcTexCombiner::V2: + return "(v * v)"; + case TexturingRegs::ProcTexCombiner::Add: + return "((u + v) * 0.5)"; + case TexturingRegs::ProcTexCombiner::Add2: + return "((u * u + v * v) * 0.5)"; + case TexturingRegs::ProcTexCombiner::SqrtAdd2: + return "min(sqrt(u * u + v * v), 1.0)"; + case TexturingRegs::ProcTexCombiner::Min: + return "min(u, v)"; + case TexturingRegs::ProcTexCombiner::Max: + return "max(u, v)"; + case TexturingRegs::ProcTexCombiner::RMax: + return "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)"; + default: + LOG_CRITICAL(HW_GPU, "Unknown combiner {}", combiner); + return "0.0"; + } + }(); + out += fmt::format("ProcTexLookupLUT({}, {})", offset, combined); +} + +void FragmentModule::DefineProcTexSampler() { + if (!config.proctex.enable) { + return; + } + + // LUT sampling uitlity + // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and + // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using + // value entries and difference entries. + out += R"( +float ProcTexLookupLUT(int offset, float coord) { + coord *= 128.0; + float index_i = clamp(floor(coord), 0.0, 127.0); + float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be + // extracted as index_i = 127.0 and index_f = 1.0 + vec2 entry = texelFetch(texture_buffer_lut_rg, int(index_i) + offset).rg; + return clamp(entry.r + entry.g * index_f, 0.0, 1.0); +} + )"; + + // Noise utility + if (config.proctex.noise_enable) { + // See swrasterizer/proctex.cpp for more information about these functions + out += R"( +int ProcTexNoiseRand1D(int v) { + const int table[] = int[](0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11); + return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; +} + +float ProcTexNoiseRand2D(vec2 point) { + const int table[] = int[](10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14); + int u2 = ProcTexNoiseRand1D(int(point.x)); + int v2 = ProcTexNoiseRand1D(int(point.y)); + v2 += ((u2 & 3) == 1) ? 4 : 0; + v2 ^= (u2 & 1) * 6; + v2 += 10 + u2; + v2 &= 0xF; + v2 ^= table[u2]; + return -1.0 + float(v2) * (2.0/15.0); +} + +float ProcTexNoiseCoef(vec2 x) { + vec2 grid = 9.0 * proctex_noise_f * abs(x + proctex_noise_p); + vec2 point = floor(grid); + vec2 frac = grid - point; + + float g0 = ProcTexNoiseRand2D(point) * (frac.x + frac.y); + float g1 = ProcTexNoiseRand2D(point + vec2(1.0, 0.0)) * (frac.x + frac.y - 1.0); + float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); + float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); + + float x_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.x); + float y_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.y); + float x0 = mix(g0, g1, x_noise); + float x1 = mix(g2, g3, x_noise); + return mix(x0, x1, y_noise); +} + )"; + } + + out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n"; + out += fmt::format("int lut_width = {} >> level;\n", config.proctex.lut_width); + // Offsets for level 4-7 seem to be hardcoded + out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n", + config.proctex.lut_offset0, config.proctex.lut_offset1, + config.proctex.lut_offset2, config.proctex.lut_offset3); + out += "int lut_offset = lut_offsets[level];\n"; + // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] + out += "lut_coord *= float(lut_width - 1);\n"; + + switch (config.proctex.lut_filter) { + case ProcTexFilter::Linear: + case ProcTexFilter::LinearMipmapLinear: + case ProcTexFilter::LinearMipmapNearest: + out += "int lut_index_i = int(lut_coord) + lut_offset;\n"; + out += "float lut_index_f = fract(lut_coord);\n"; + out += "return texelFetch(texture_buffer_lut_rgba, lut_index_i + " + "proctex_lut_offset) + " + "lut_index_f * " + "texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n"; + break; + case ProcTexFilter::Nearest: + case ProcTexFilter::NearestMipmapLinear: + case ProcTexFilter::NearestMipmapNearest: + out += "lut_coord += float(lut_offset);\n"; + out += "return texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + " + "proctex_lut_offset);\n"; + break; + } + + out += "}\n"; + + out += "vec4 ProcTex() {\n"; + if (config.proctex.coord < 3) { + out += fmt::format("vec2 uv = abs(texcoord{});\n", config.proctex.coord.Value()); + } else { + LOG_CRITICAL(Render, "Unexpected proctex.coord >= 3"); + out += "vec2 uv = abs(texcoord0);\n"; + } + + // This LOD formula is the same as the LOD upper limit defined in OpenGL. + // f(x, y) <= m_u + m_v + m_w + // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) + // Note: this is different from the one normal 2D textures use. + out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n"; + // unlike normal texture, the bias is inside the log2 + out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n", + config.proctex.lut_width); + out += "if (proctex_bias == 0.0) lod = 0.0;\n"; + out += fmt::format("lod = clamp(lod, {:#}, {:#});\n", + std::max(0.0f, static_cast(config.proctex.lod_min)), + std::min(7.0f, static_cast(config.proctex.lod_max))); + + // Get shift offset before noise generation + out += "float u_shift = "; + AppendProcTexShiftOffset("uv.y", config.proctex.u_shift, config.proctex.u_clamp); + out += ";\n"; + out += "float v_shift = "; + AppendProcTexShiftOffset("uv.x", config.proctex.v_shift, config.proctex.v_clamp); + out += ";\n"; + + // Generate noise + if (config.proctex.noise_enable) { + out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n" + "uv = abs(uv);\n"; + } + + // Shift + out += "float u = uv.x + u_shift;\n" + "float v = uv.y + v_shift;\n"; + + // Clamp + AppendProcTexClamp("u", config.proctex.u_clamp); + AppendProcTexClamp("v", config.proctex.v_clamp); + + // Combine and map + out += "float lut_coord = "; + AppendProcTexCombineAndMap(config.proctex.color_combiner, "proctex_color_map_offset"); + out += ";\n"; + + switch (config.proctex.lut_filter) { + case ProcTexFilter::Linear: + case ProcTexFilter::Nearest: + out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n"; + break; + case ProcTexFilter::NearestMipmapNearest: + case ProcTexFilter::LinearMipmapNearest: + out += "vec4 final_color = SampleProcTexColor(lut_coord, int(round(lod)));\n"; + break; + case ProcTexFilter::NearestMipmapLinear: + case ProcTexFilter::LinearMipmapLinear: + out += "int lod_i = int(lod);\n" + "float lod_f = fract(lod);\n" + "vec4 final_color = mix(SampleProcTexColor(lut_coord, lod_i), " + "SampleProcTexColor(lut_coord, lod_i + 1), lod_f);\n"; + break; + } + + if (config.proctex.separate_alpha) { + // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It + // uses the output of CombineAndMap directly instead. + out += "float final_alpha = "; + AppendProcTexCombineAndMap(config.proctex.alpha_combiner, "proctex_alpha_map_offset"); + out += ";\n"; + out += "return vec4(final_color.xyz, final_alpha);\n}\n"; + } else { + out += "return final_color;\n}\n"; + } +} + +void FragmentModule::DefineExtensions() { + if (profile.has_separable_shaders) { + out += "#extension GL_ARB_separate_shader_objects : enable\n"; + } + if (config.framebuffer.shadow_rendering) { + use_fragment_shader_interlock = true; + if (profile.has_fragment_shader_interlock) { + out += "#extension GL_ARB_fragment_shader_interlock : enable\n"; + out += "#define beginInvocationInterlock beginInvocationInterlockARB\n"; + out += "#define endInvocationInterlock endInvocationInterlockARB\n"; + } else if (profile.has_gl_nv_fragment_shader_interlock) { + out += "#extension GL_NV_fragment_shader_interlock : enable\n"; + out += "#define beginInvocationInterlock beginInvocationInterlockNV\n"; + out += "#define endInvocationInterlock endInvocationInterlockNV\n"; + } else if (profile.has_gl_intel_fragment_shader_interlock) { + out += "#extension GL_INTEL_fragment_shader_ordering : enable\n"; + out += "#define beginInvocationInterlock beginFragmentShaderOrderingINTEL\n"; + out += "#define endInvocationInterlock\n"; + } else { + use_fragment_shader_interlock = false; + } + } + if (config.EmulateBlend()) { + if (profile.has_gl_ext_framebuffer_fetch) { + out += "#extension GL_EXT_shader_framebuffer_fetch : enable\n"; + out += "#define destFactor color\n"; + } else if (profile.has_gl_arm_framebuffer_fetch) { + out += "#extension GL_ARM_shader_framebuffer_fetch : enable\n"; + out += "#define destFactor gl_LastFragColorARM\n"; + } else { + out += "#define destFactor texelFetch(color_buffer, ivec2(gl_FragCoord.xy), 0)\n"; + use_blend_fallback = true; + } + } + + if (!profile.is_vulkan) { + out += fragment_shader_precision_OES; + } +} + +void FragmentModule::DefineInterface() { + const auto define_input = [&](std::string_view var, Semantic location) { + if (profile.has_separable_shaders) { + out += fmt::format("layout (location = {}) ", location); + } + out += fmt::format("in {};\n", var); + }; + + // Input attributes + define_input("vec4 primary_color", Semantic::Color); + define_input("vec2 texcoord0", Semantic::Texcoord0); + define_input("vec2 texcoord1", Semantic::Texcoord1); + define_input("vec2 texcoord2", Semantic::Texcoord2); + define_input("float texcoord0_w", Semantic::Texcoord0_W); + define_input("vec4 normquat", Semantic::Normquat); + define_input("vec3 view", Semantic::View); + + // Output attributes + out += "layout (location = 0) out vec4 color;\n\n"; +} + +void FragmentModule::DefineBindings() { + out += FSUniformBlockDef; + out += "layout(binding = 3) uniform samplerBuffer texture_buffer_lut_lf;\n"; + out += "layout(binding = 4) uniform samplerBuffer texture_buffer_lut_rg;\n"; + out += "layout(binding = 5) uniform samplerBuffer texture_buffer_lut_rgba;\n\n"; + + const std::string_view texunit_set = profile.is_vulkan ? "set = 1, " : ""; + for (u32 i = 0; i < 3; i++) { + out += fmt::format("layout({0}binding = {1}) uniform sampler2D tex{1};\n", texunit_set, i); + } + + out += fmt::format("layout({}binding = 3) uniform samplerCube tex_cube;\n\n", texunit_set); + + if (config.user.use_custom_normal && !profile.is_vulkan) { + out += "layout(binding = 7) uniform sampler2D tex_normal;\n"; + } + if (use_blend_fallback && !profile.is_vulkan) { + out += "layout(location = 10) uniform sampler2D color_buffer;\n"; + } + + static constexpr std::array postfixes = {"px", "nx", "py", "ny", "pz", "nz"}; + const std::string_view shadow_set = profile.is_vulkan ? "set = 2, " : ""; + for (u32 i = 0; i < postfixes.size(); i++) { + out += fmt::format( + "layout({}binding = {}, r32ui) uniform readonly uimage2D shadow_texture_{};\n", + shadow_set, i, postfixes[i]); + } + if (config.framebuffer.shadow_rendering) { + out += fmt::format("layout({}binding = 6, r32ui) uniform uimage2D shadow_buffer;\n\n", + shadow_set); + } +} + +void FragmentModule::DefineHelpers() { + out += R"( +vec3 quaternion_rotate(vec4 q, vec3 v) { + return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); +} + +float byteround(float x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +vec2 byteround(vec2 x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +vec3 byteround(vec3 x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +vec4 byteround(vec4 x) { + return round(x * 255.0) * (1.0 / 255.0); +} + +float getLod(vec2 coord) { + vec2 d = max(abs(dFdx(coord)), abs(dFdy(coord))); + return log2(max(d.x, d.y)); +} + +uvec2 DecodeShadow(uint pixel) { + return uvec2(pixel >> 8, pixel & 0xFFu); +} +)"; +} + +void FragmentModule::DefineLightingHelpers() { + if (!config.lighting.enable) { + return; + } + + out += R"( +float LookupLightingLUT(int lut_index, int index, float delta) { + vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; + return entry.r + entry.g * delta; +} + +float LookupLightingLUTUnsigned(int lut_index, float pos) { + int index = int(clamp(floor(pos * 256.0), 0.f, 255.f)); + float delta = pos * 256.0 - float(index); + return LookupLightingLUT(lut_index, index, delta); +} + +float LookupLightingLUTSigned(int lut_index, float pos) { + int index = int(clamp(floor(pos * 128.0), -128.f, 127.f)); + float delta = pos * 128.0 - float(index); + if (index < 0) index += 256; + return LookupLightingLUT(lut_index, index, delta); +} +)"; +} + +void FragmentModule::DefineShadowHelpers() { + if (config.framebuffer.shadow_rendering) { + out += R"( +uint EncodeShadow(uvec2 pixel) { + return (pixel.x << 8) | pixel.y; +} + +uint UpdateShadow(uint pixel, uint d, uint s) { + uvec2 ref = DecodeShadow(pixel); + if (d < ref.x) { + if (s == 0u) { + ref.x = d; + } else { + s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); + ref.y = min(s, ref.y); + } + } + return EncodeShadow(ref); +} +)"; + } + + if (config.texture.texture0_type == TexturingRegs::TextureConfig::Shadow2D || + config.texture.texture0_type == TexturingRegs::TextureConfig::ShadowCube) { + out += R"( +float CompareShadow(uint pixel, uint z) { + uvec2 p = DecodeShadow(pixel); + return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z); +} + +float mix2(vec4 s, vec2 a) { + vec2 t = mix(s.xy, s.zw, a.yy); + return mix(t.x, t.y, a.x); +} +)"; + + if (config.texture.texture0_type == TexturingRegs::TextureConfig::Shadow2D) { + out += R"( +float SampleShadow2D(ivec2 uv, uint z) { + if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) + return 1.0; + return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); +} + +vec4 shadowTexture(vec2 uv, float w) { +)"; + if (!config.texture.shadow_texture_orthographic) { + out += "uv /= w;"; + } + out += R"( + uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); + vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i = ivec2(coord_floor); + vec4 s = vec4( + SampleShadow2D(i , z), + SampleShadow2D(i + ivec2(1, 0), z), + SampleShadow2D(i + ivec2(0, 1), z), + SampleShadow2D(i + ivec2(1, 1), z)); + return vec4(mix2(s, f)); +} +)"; + } else if (config.texture.texture0_type == TexturingRegs::TextureConfig::ShadowCube) { + out += R"( +vec4 shadowTextureCube(vec2 uv, float w) { + ivec2 size = imageSize(shadow_texture_px); + vec3 c = vec3(uv, w); + vec3 a = abs(c); + if (a.x > a.y && a.x > a.z) { + w = a.x; + uv = -c.zy; + if (c.x < 0.0) uv.x = -uv.x; + } else if (a.y > a.z) { + w = a.y; + uv = c.xz; + if (c.y < 0.0) uv.y = -uv.y; + } else { + w = a.z; + uv = -c.xy; + if (c.z > 0.0) uv.x = -uv.x; + } + uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); + vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); + vec2 coord_floor = floor(coord); + vec2 f = coord - coord_floor; + ivec2 i00 = ivec2(coord_floor); + ivec2 i10 = i00 + ivec2(1, 0); + ivec2 i01 = i00 + ivec2(0, 1); + ivec2 i11 = i00 + ivec2(1, 1); + ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1); + i00 = clamp(i00, cmin, cmax); + i10 = clamp(i10, cmin, cmax); + i01 = clamp(i01, cmin, cmax); + i11 = clamp(i11, cmin, cmax); + uvec4 pixels; + // This part should have been refactored into functions, + // but many drivers don't like passing uimage2D as parameters + if (a.x > a.y && a.x > a.z) { + if (c.x > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_px, i00).r, + imageLoad(shadow_texture_px, i10).r, + imageLoad(shadow_texture_px, i01).r, + imageLoad(shadow_texture_px, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_nx, i00).r, + imageLoad(shadow_texture_nx, i10).r, + imageLoad(shadow_texture_nx, i01).r, + imageLoad(shadow_texture_nx, i11).r); + } else if (a.y > a.z) { + if (c.y > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_py, i00).r, + imageLoad(shadow_texture_py, i10).r, + imageLoad(shadow_texture_py, i01).r, + imageLoad(shadow_texture_py, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_ny, i00).r, + imageLoad(shadow_texture_ny, i10).r, + imageLoad(shadow_texture_ny, i01).r, + imageLoad(shadow_texture_ny, i11).r); + } else { + if (c.z > 0.0) + pixels = uvec4( + imageLoad(shadow_texture_pz, i00).r, + imageLoad(shadow_texture_pz, i10).r, + imageLoad(shadow_texture_pz, i01).r, + imageLoad(shadow_texture_pz, i11).r); + else + pixels = uvec4( + imageLoad(shadow_texture_nz, i00).r, + imageLoad(shadow_texture_nz, i10).r, + imageLoad(shadow_texture_nz, i01).r, + imageLoad(shadow_texture_nz, i11).r); + } + vec4 s = vec4( + CompareShadow(pixels.x, z), + CompareShadow(pixels.y, z), + CompareShadow(pixels.z, z), + CompareShadow(pixels.w, z)); + return vec4(mix2(s, f)); +} + )"; + } + } +} + +void FragmentModule::DefineTexUnitSampler(u32 texture_unit) { + out += fmt::format("vec4 sampleTexUnit{}() {{\n", texture_unit); + if (texture_unit == 0 && + config.texture.texture0_type == TexturingRegs::TextureConfig::Disabled) { + out += "return vec4(0.0);\n}"; + return; + } + + if (texture_unit < 3) { + const u32 texcoord_num = + texture_unit == 2 && config.texture.texture2_use_coord1 ? 1 : texture_unit; + if (config.texture.texture_border_color[texture_unit].enable_s) { + out += fmt::format(R"( + if (texcoord{}.x < 0 || texcoord{}.x > 1) {{ + return tex_border_color[{}]; + }} + )", + texcoord_num, texcoord_num, texture_unit); + } + if (config.texture.texture_border_color[texture_unit].enable_t) { + out += fmt::format(R"( + if (texcoord{}.y < 0 || texcoord{}.y > 1) {{ + return tex_border_color[{}]; + }} + )", + texcoord_num, texcoord_num, texture_unit); + } + } + + switch (texture_unit) { + case 0: + switch (config.texture.texture0_type) { + case TexturingRegs::TextureConfig::Texture2D: + out += "return textureLod(tex0, texcoord0, getLod(texcoord0 * " + "vec2(textureSize(tex0, 0))) + tex_lod_bias[0]);"; + break; + case TexturingRegs::TextureConfig::Projection2D: + // TODO (wwylele): find the exact LOD formula for projection texture + out += "return textureProj(tex0, vec3(texcoord0, texcoord0_w));"; + break; + case TexturingRegs::TextureConfig::TextureCube: + out += "return texture(tex_cube, vec3(texcoord0, texcoord0_w));"; + break; + case TexturingRegs::TextureConfig::Shadow2D: + out += "return shadowTexture(texcoord0, texcoord0_w);"; + break; + case TexturingRegs::TextureConfig::ShadowCube: + out += "return shadowTextureCube(texcoord0, texcoord0_w);"; + break; + default: + LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", + config.texture.texture0_type.Value()); + UNIMPLEMENTED(); + out += "return texture(tex0, texcoord0);"; + break; + } + break; + case 1: + out += "return textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, " + "0))) + tex_lod_bias[1]);"; + break; + case 2: + if (config.texture.texture2_use_coord1) { + out += "return textureLod(tex2, texcoord1, getLod(texcoord1 * " + "vec2(textureSize(tex2, 0))) + tex_lod_bias[2]);"; + } else { + out += "return textureLod(tex2, texcoord2, getLod(texcoord2 * " + "vec2(textureSize(tex2, 0))) + tex_lod_bias[2]);"; + } + break; + case 3: + if (config.proctex.enable) { + out += "return ProcTex();"; + } else { + out += "return vec4(0.0);"; + } + break; + default: + UNREACHABLE(); + break; + } + + out += "\n}\n"; +} + +std::string GenerateFragmentShader(const FSConfig& config, const Profile& profile) { + FragmentModule module{config, profile}; + return module.Generate(); +} + +} // namespace Pica::Shader::Generator::GLSL diff --git a/src/video_core/shader/generator/glsl_fs_shader_gen.h b/src/video_core/shader/generator/glsl_fs_shader_gen.h new file mode 100644 index 0000000000..77836315c4 --- /dev/null +++ b/src/video_core/shader/generator/glsl_fs_shader_gen.h @@ -0,0 +1,100 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/shader/generator/pica_fs_config.h" + +namespace Pica::Shader::Generator::GLSL { + +class FragmentModule { +public: + explicit FragmentModule(const FSConfig& config, const Profile& profile); + ~FragmentModule(); + + /// Emits GLSL source corresponding to the provided pica fragment configuration + std::string Generate(); + +private: + /// Undos the host perspective transformation and applies the PICA one + void WriteDepth(); + + /// Emits code to emulate the scissor rectangle + void WriteScissor(); + + /// Writes the code to emulate fragment lighting + void WriteLighting(); + + /// Writes the code to emulate fog + void WriteFog(); + + /// Writes the code to emulate gas rendering + void WriteGas(); + + /// Writes the code to emulate shadow-map rendering + void WriteShadow(); + + /// Writes the code to emulate logic ops in the fragment shader + void WriteLogicOp(); + + /// Writes the code to emulate PICA min/max blending factors + void WriteBlending(); + + /// Writes the specified TEV stage source component(s) + void AppendSource(Pica::TexturingRegs::TevStageConfig::Source source, u32 tev_index); + + /// Writes the color components to use for the specified TEV stage color modifier + void AppendColorModifier(Pica::TexturingRegs::TevStageConfig::ColorModifier modifier, + Pica::TexturingRegs::TevStageConfig::Source source, u32 tev_index); + + /// Writes the alpha component to use for the specified TEV stage alpha modifier + void AppendAlphaModifier(Pica::TexturingRegs::TevStageConfig::AlphaModifier modifier, + Pica::TexturingRegs::TevStageConfig::Source source, u32 tev_index); + + /// Writes the combiner function for the color components for the specified TEV stage operation + void AppendColorCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation); + + /// Writes the combiner function for the alpha component for the specified TEV stage operation + void AppendAlphaCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation); + + /// Writes the if-statement condition used to evaluate alpha testing + void WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func); + + /// Writes the code to emulate the specified TEV stage + void WriteTevStage(u32 index); + + void AppendProcTexShiftOffset(std::string_view v, Pica::TexturingRegs::ProcTexShift mode, + Pica::TexturingRegs::ProcTexClamp clamp_mode); + + void AppendProcTexClamp(std::string_view var, Pica::TexturingRegs::ProcTexClamp mode); + + void AppendProcTexCombineAndMap(Pica::TexturingRegs::ProcTexCombiner combiner, + std::string_view offset); + + void DefineExtensions(); + void DefineInterface(); + void DefineBindings(); + void DefineHelpers(); + void DefineLightingHelpers(); + void DefineShadowHelpers(); + void DefineProcTexSampler(); + void DefineTexUnitSampler(u32 i); + +private: + const FSConfig& config; + const Profile& profile; + std::string out; + bool use_blend_fallback{}; + bool use_fragment_shader_interlock{}; +}; + +/** + * Generates the GLSL fragment shader program source code for the current Pica state + * @param config ShaderCacheKey object generated for the current Pica state, used for the shader + * configuration (NOTE: Use state in this struct only, not the Pica registers!) + * @returns String of the shader source code + */ +std::string GenerateFragmentShader(const FSConfig& config, const Profile& profile); + +} // namespace Pica::Shader::Generator::GLSL diff --git a/src/video_core/shader/generator/glsl_shader_gen.cpp b/src/video_core/shader/generator/glsl_shader_gen.cpp index 33ebf5a586..a936851be7 100644 --- a/src/video_core/shader/generator/glsl_shader_gen.cpp +++ b/src/video_core/shader/generator/glsl_shader_gen.cpp @@ -4,19 +4,12 @@ #include #include + #include "common/logging/log.h" -#include "core/core.h" -#include "core/telemetry_session.h" #include "video_core/shader/generator/glsl_shader_decompiler.h" #include "video_core/shader/generator/glsl_shader_gen.h" -#include "video_core/shader/generator/shader_uniforms.h" -using Pica::FramebufferRegs; -using Pica::LightingRegs; -using Pica::RasterizerRegs; -using Pica::TexturingRegs; -using TevStageConfig = TexturingRegs::TevStageConfig; -using VSOutputAttributes = RasterizerRegs::VSOutputAttributes; +using VSOutputAttributes = Pica::RasterizerRegs::VSOutputAttributes; namespace Pica::Shader::Generator::GLSL { @@ -47,58 +40,6 @@ layout (binding = 1, std140) uniform vs_data { }; )"; -constexpr std::string_view FSUniformBlockDef = R"( -#define NUM_TEV_STAGES 6 -#define NUM_LIGHTS 8 -#define NUM_LIGHTING_SAMPLERS 24 -struct LightSrc { - vec3 specular_0; - vec3 specular_1; - vec3 diffuse; - vec3 ambient; - vec3 position; - vec3 spot_direction; - float dist_atten_bias; - float dist_atten_scale; -}; -#ifdef VULKAN -layout (set = 0, binding = 2, std140) uniform fs_data { -#else -layout (binding = 2, std140) uniform fs_data { -#endif - int framebuffer_scale; - int alphatest_ref; - float depth_scale; - float depth_offset; - float shadow_bias_constant; - float shadow_bias_linear; - int scissor_x1; - int scissor_y1; - int scissor_x2; - int scissor_y2; - int fog_lut_offset; - int proctex_noise_lut_offset; - int proctex_color_map_offset; - int proctex_alpha_map_offset; - int proctex_lut_offset; - int proctex_diff_lut_offset; - float proctex_bias; - int shadow_texture_bias; - ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4]; - vec3 fog_color; - vec2 proctex_noise_f; - vec2 proctex_noise_a; - vec2 proctex_noise_p; - vec3 lighting_global_ambient; - LightSrc light_src[NUM_LIGHTS]; - vec4 const_color[NUM_TEV_STAGES]; - vec4 tev_combiner_buffer_color; - vec3 tex_lod_bias; - vec4 tex_border_color[3]; - vec4 blend_color; -}; -)"; - static std::string GetVertexInterfaceDeclaration(bool is_output, bool use_clip_planes, bool separable_shader) { std::string out; @@ -131,1513 +72,6 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool use_clip_p return out; } -/// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) -static bool IsPassThroughTevStage(const TevStageConfig& stage) { - return (stage.color_op == TevStageConfig::Operation::Replace && - stage.alpha_op == TevStageConfig::Operation::Replace && - stage.color_source1 == TevStageConfig::Source::Previous && - stage.alpha_source1 == TevStageConfig::Source::Previous && - stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && - stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && - stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); -} - -/// Writes the specified TEV stage source component(s) -static void AppendSource(std::string& out, const PicaFSConfig& config, - TevStageConfig::Source source, std::string_view index_name) { - using Source = TevStageConfig::Source; - switch (source) { - case Source::PrimaryColor: - out += "rounded_primary_color"; - break; - case Source::PrimaryFragmentColor: - out += "primary_fragment_color"; - break; - case Source::SecondaryFragmentColor: - out += "secondary_fragment_color"; - break; - case Source::Texture0: - out += "sampleTexUnit0()"; - break; - case Source::Texture1: - out += "sampleTexUnit1()"; - break; - case Source::Texture2: - out += "sampleTexUnit2()"; - break; - case Source::Texture3: - out += "sampleTexUnit3()"; - break; - case Source::PreviousBuffer: - out += "combiner_buffer"; - break; - case Source::Constant: - out += fmt::format("const_color[{}]", index_name); - break; - case Source::Previous: - out += "last_tex_env_out"; - break; - default: - out += "vec4(0.0)"; - LOG_CRITICAL(Render, "Unknown source op {}", source); - break; - } -} - -/// Writes the color components to use for the specified TEV stage color modifier -static void AppendColorModifier(std::string& out, const PicaFSConfig& config, - TevStageConfig::ColorModifier modifier, - TevStageConfig::Source source, std::string_view index_name) { - using ColorModifier = TevStageConfig::ColorModifier; - switch (modifier) { - case ColorModifier::SourceColor: - AppendSource(out, config, source, index_name); - out += ".rgb"; - break; - case ColorModifier::OneMinusSourceColor: - out += "vec3(1.0) - "; - AppendSource(out, config, source, index_name); - out += ".rgb"; - break; - case ColorModifier::SourceAlpha: - AppendSource(out, config, source, index_name); - out += ".aaa"; - break; - case ColorModifier::OneMinusSourceAlpha: - out += "vec3(1.0) - "; - AppendSource(out, config, source, index_name); - out += ".aaa"; - break; - case ColorModifier::SourceRed: - AppendSource(out, config, source, index_name); - out += ".rrr"; - break; - case ColorModifier::OneMinusSourceRed: - out += "vec3(1.0) - "; - AppendSource(out, config, source, index_name); - out += ".rrr"; - break; - case ColorModifier::SourceGreen: - AppendSource(out, config, source, index_name); - out += ".ggg"; - break; - case ColorModifier::OneMinusSourceGreen: - out += "vec3(1.0) - "; - AppendSource(out, config, source, index_name); - out += ".ggg"; - break; - case ColorModifier::SourceBlue: - AppendSource(out, config, source, index_name); - out += ".bbb"; - break; - case ColorModifier::OneMinusSourceBlue: - out += "vec3(1.0) - "; - AppendSource(out, config, source, index_name); - out += ".bbb"; - break; - default: - out += "vec3(0.0)"; - LOG_CRITICAL(Render, "Unknown color modifier op {}", modifier); - break; - } -} - -/// Writes the alpha component to use for the specified TEV stage alpha modifier -static void AppendAlphaModifier(std::string& out, const PicaFSConfig& config, - TevStageConfig::AlphaModifier modifier, - TevStageConfig::Source source, const std::string& index_name) { - using AlphaModifier = TevStageConfig::AlphaModifier; - switch (modifier) { - case AlphaModifier::SourceAlpha: - AppendSource(out, config, source, index_name); - out += ".a"; - break; - case AlphaModifier::OneMinusSourceAlpha: - out += "1.0 - "; - AppendSource(out, config, source, index_name); - out += ".a"; - break; - case AlphaModifier::SourceRed: - AppendSource(out, config, source, index_name); - out += ".r"; - break; - case AlphaModifier::OneMinusSourceRed: - out += "1.0 - "; - AppendSource(out, config, source, index_name); - out += ".r"; - break; - case AlphaModifier::SourceGreen: - AppendSource(out, config, source, index_name); - out += ".g"; - break; - case AlphaModifier::OneMinusSourceGreen: - out += "1.0 - "; - AppendSource(out, config, source, index_name); - out += ".g"; - break; - case AlphaModifier::SourceBlue: - AppendSource(out, config, source, index_name); - out += ".b"; - break; - case AlphaModifier::OneMinusSourceBlue: - out += "1.0 - "; - AppendSource(out, config, source, index_name); - out += ".b"; - break; - default: - out += "0.0"; - LOG_CRITICAL(Render, "Unknown alpha modifier op {}", modifier); - break; - } -} - -/// Writes the combiner function for the color components for the specified TEV stage operation -static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation, - std::string_view variable_name) { - const auto get_combiner = [operation] { - using Operation = TevStageConfig::Operation; - switch (operation) { - case Operation::Replace: - return "color_results_1"; - case Operation::Modulate: - return "color_results_1 * color_results_2"; - case Operation::Add: - return "color_results_1 + color_results_2"; - case Operation::AddSigned: - return "color_results_1 + color_results_2 - vec3(0.5)"; - case Operation::Lerp: - return "color_results_1 * color_results_3 + color_results_2 * (vec3(1.0) - " - "color_results_3)"; - case Operation::Subtract: - return "color_results_1 - color_results_2"; - case Operation::MultiplyThenAdd: - return "color_results_1 * color_results_2 + color_results_3"; - case Operation::AddThenMultiply: - return "min(color_results_1 + color_results_2, vec3(1.0)) * color_results_3"; - case Operation::Dot3_RGB: - case Operation::Dot3_RGBA: - return "vec3(dot(color_results_1 - vec3(0.5), color_results_2 - vec3(0.5)) * 4.0)"; - default: - LOG_CRITICAL(Render, "Unknown color combiner operation: {}", operation); - return "vec3(0.0)"; - } - }; - - // Clamp result to 0.0, 1.0 - out += fmt::format("clamp({}, vec3(0.0), vec3(1.0))", get_combiner()); -} - -/// Writes the combiner function for the alpha component for the specified TEV stage operation -static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation, - std::string_view variable_name) { - out += "clamp("; - using Operation = TevStageConfig::Operation; - switch (operation) { - case Operation::Replace: - out += "alpha_results_1"; - break; - case Operation::Modulate: - out += "alpha_results_1 * alpha_results_2"; - break; - case Operation::Add: - out += "alpha_results_1 + alpha_results_2"; - break; - case Operation::AddSigned: - out += "alpha_results_1 + alpha_results_2 - 0.5"; - break; - case Operation::Lerp: - out += "alpha_results_1 * alpha_results_3 + alpha_results_2 * (1.0 - alpha_results_3)"; - break; - case Operation::Subtract: - out += "alpha_results_1 - alpha_results_2"; - break; - case Operation::MultiplyThenAdd: - out += "alpha_results_1 * alpha_results_2 + alpha_results_3"; - break; - case Operation::AddThenMultiply: - out += "min(alpha_results_1 + alpha_results_2, 1.0) * alpha_results_3"; - break; - default: - out += "0.0"; - LOG_CRITICAL(Render, "Unknown alpha combiner operation: {}", operation); - break; - } - out += ", 0.0, 1.0)"; -} - -/// Writes the if-statement condition used to evaluate alpha testing -static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) { - using CompareFunc = FramebufferRegs::CompareFunc; - switch (func) { - case CompareFunc::Never: - out += "true"; - break; - case CompareFunc::Always: - out += "false"; - break; - case CompareFunc::Equal: - case CompareFunc::NotEqual: - case CompareFunc::LessThan: - case CompareFunc::LessThanOrEqual: - case CompareFunc::GreaterThan: - case CompareFunc::GreaterThanOrEqual: { - static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"}; - const auto index = static_cast(func) - static_cast(CompareFunc::Equal); - out += fmt::format("int(last_tex_env_out.a * 255.0) {} alphatest_ref", op[index]); - break; - } - - default: - out += "false"; - LOG_CRITICAL(Render, "Unknown alpha test condition {}", func); - break; - } -} - -/// Writes the code to emulate the specified TEV stage -static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned index) { - const auto stage = - static_cast(config.state.tev_stages[index]); - if (!IsPassThroughTevStage(stage)) { - const std::string index_name = std::to_string(index); - - out += fmt::format("color_results_1 = ", index_name); - AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name); - out += fmt::format(";\ncolor_results_2 = ", index_name); - AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name); - out += fmt::format(";\ncolor_results_3 = ", index_name); - AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name); - - // Round the output of each TEV stage to maintain the PICA's 8 bits of precision - out += fmt::format(";\nvec3 color_output_{} = byteround(", index_name); - AppendColorCombiner(out, stage.color_op, "color_results"); - out += ");\n"; - - if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { - // result of Dot3_RGBA operation is also placed to the alpha component - out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index_name); - } else { - out += fmt::format("alpha_results_1 = ", index_name); - AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, - index_name); - out += fmt::format(";\nalpha_results_2 = ", index_name); - AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, - index_name); - out += fmt::format(";\nalpha_results_3 = ", index_name); - AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, - index_name); - - out += fmt::format(";\nfloat alpha_output_{} = byteround(", index_name); - AppendAlphaCombiner(out, stage.alpha_op, "alpha_results"); - out += ");\n"; - } - - out += fmt::format("last_tex_env_out = vec4(" - "clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), " - "clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n", - index_name, stage.GetColorMultiplier(), index_name, - stage.GetAlphaMultiplier()); - } - - out += "combiner_buffer = next_combiner_buffer;\n"; - - if (config.TevStageUpdatesCombinerBufferColor(index)) - out += "next_combiner_buffer.rgb = last_tex_env_out.rgb;\n"; - - if (config.TevStageUpdatesCombinerBufferAlpha(index)) - out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; -} - -/// Writes the code to emulate fragment lighting -static void WriteLighting(std::string& out, const PicaFSConfig& config) { - const auto& lighting = config.state.lighting; - - // Define lighting globals - out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" - "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" - "vec3 light_vector = vec3(0.0);\n" - "float light_distance = 0.0;\n" - "vec3 refl_value = vec3(0.0);\n" - "vec3 spot_dir = vec3(0.0);\n" - "vec3 half_vector = vec3(0.0);\n" - "float dot_product = 0.0;\n" - "float clamp_highlights = 1.0;\n" - "float geo_factor = 1.0;\n"; - - // Compute fragment normals and tangents - const auto perturbation = [&] { - return fmt::format("2.0 * (sampleTexUnit{}()).rgb - 1.0", lighting.bump_selector.Value()); - }; - - if (config.state.use_custom_normal_map) { - const std::string normal_texel = - fmt::format("2.0 * (texture(tex_normal, texcoord0)).rgb - 1.0"); - out += fmt::format("vec3 surface_normal = {};\n", normal_texel); - out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; - } else { - switch (lighting.bump_mode) { - case LightingRegs::LightingBumpMode::NormalMap: { - // Bump mapping is enabled using a normal map - out += fmt::format("vec3 surface_normal = {};\n", perturbation()); - - // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher - // precision result - if (lighting.bump_renorm) { - constexpr std::string_view val = "(1.0 - (surface_normal.x*surface_normal.x + " - "surface_normal.y*surface_normal.y))"; - out += fmt::format("surface_normal.z = sqrt(max({}, 0.0));\n", val); - } - - // The tangent vector is not perturbed by the normal map and is just a unit vector. - out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; - break; - } - case LightingRegs::LightingBumpMode::TangentMap: { - // Bump mapping is enabled using a tangent map - out += fmt::format("vec3 surface_tangent = {};\n", perturbation()); - // Mathematically, recomputing Z-component of the tangent vector won't affect the - // relevant computation below, which is also confirmed on 3DS. So we don't bother - // recomputing here even if 'renorm' is enabled. - - // The normal vector is not perturbed by the tangent map and is just a unit vector. - out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; - break; - } - default: - // No bump mapping - surface local normal and tangent are just unit vectors - out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n" - "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; - } - } - - // Rotate the surface-local normal by the interpolated normal quaternion to convert it to - // eyespace. - out += "vec4 normalized_normquat = normalize(normquat);\n" - "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n" - "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; - - if (lighting.enable_shadow) { - std::string shadow_texture = - fmt::format("sampleTexUnit{}()", lighting.shadow_selector.Value()); - if (lighting.shadow_invert) { - out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture); - } else { - out += fmt::format("vec4 shadow = {};\n", shadow_texture); - } - } else { - out += "vec4 shadow = vec4(1.0);\n"; - } - - // Samples the specified lookup table for specular lighting - auto get_lut_value = [&lighting](LightingRegs::LightingSampler sampler, unsigned light_num, - LightingRegs::LightingLutInput input, bool abs) { - std::string index; - switch (input) { - case LightingRegs::LightingLutInput::NH: - index = "dot(normal, normalize(half_vector))"; - break; - - case LightingRegs::LightingLutInput::VH: - index = "dot(normalize(view), normalize(half_vector))"; - break; - - case LightingRegs::LightingLutInput::NV: - index = "dot(normal, normalize(view))"; - break; - - case LightingRegs::LightingLutInput::LN: - index = "dot(light_vector, normal)"; - break; - - case LightingRegs::LightingLutInput::SP: - index = "dot(light_vector, spot_dir)"; - break; - - case LightingRegs::LightingLutInput::CP: - // CP input is only available with configuration 7 - if (lighting.config == LightingRegs::LightingConfig::Config7) { - // Note: even if the normal vector is modified by normal map, which is not the - // normal of the tangent plane anymore, the half angle vector is still projected - // using the modified normal vector. - constexpr std::string_view half_angle_proj = - "normalize(half_vector) - normal * dot(normal, normalize(half_vector))"; - // Note: the half angle vector projection is confirmed not normalized before the dot - // product. The result is in fact not cos(phi) as the name suggested. - index = fmt::format("dot({}, tangent)", half_angle_proj); - } else { - index = "0.0"; - } - break; - - default: - LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input {}", static_cast(input)); - UNIMPLEMENTED(); - index = "0.0"; - break; - } - - const auto sampler_index = static_cast(sampler); - - if (abs) { - // LUT index is in the range of (0.0, 1.0) - index = lighting.light[light_num].two_sided_diffuse - ? fmt::format("abs({})", index) - : fmt::format("max({}, 0.0)", index); - return fmt::format("LookupLightingLUTUnsigned({}, {})", sampler_index, index); - } else { - // LUT index is in the range of (-1.0, 1.0) - return fmt::format("LookupLightingLUTSigned({}, {})", sampler_index, index); - } - }; - - // Write the code to emulate each enabled light - for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) { - const auto& light_config = lighting.light[light_index]; - const std::string light_src = fmt::format("light_src[{}]", light_config.num.Value()); - - // Compute light vector (directional or positional) - if (light_config.directional) { - out += fmt::format("light_vector = {}.position;\n", light_src); - } else { - out += fmt::format("light_vector = {}.position + view;\n", light_src); - } - out += fmt::format("light_distance = length(light_vector);\n", light_src); - out += fmt::format("light_vector = normalize(light_vector);\n", light_src); - - out += fmt::format("spot_dir = {}.spot_direction;\n", light_src); - out += "half_vector = normalize(view) + light_vector;\n"; - - // Compute dot product of light_vector and normal, adjust if lighting is one-sided or - // two-sided - out += std::string("dot_product = ") + (light_config.two_sided_diffuse - ? "abs(dot(light_vector, normal));\n" - : "max(dot(light_vector, normal), 0.0);\n"); - - // If enabled, clamp specular component if lighting result is zero - if (lighting.clamp_highlights) { - out += "clamp_highlights = sign(dot_product);\n"; - } - - // If enabled, compute spot light attenuation value - std::string spot_atten = "1.0"; - if (light_config.spot_atten_enable && - LightingRegs::IsLightingSamplerSupported( - lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { - const std::string value = - get_lut_value(LightingRegs::SpotlightAttenuationSampler(light_config.num), - light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input); - spot_atten = fmt::format("({:#} * {})", lighting.lut_sp.scale, value); - } - - // If enabled, compute distance attenuation value - std::string dist_atten = "1.0"; - if (light_config.dist_atten_enable) { - const std::string index = fmt::format("clamp({}.dist_atten_scale * light_distance " - "+ {}.dist_atten_bias, 0.0, 1.0)", - light_src, light_src, light_src); - const auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num); - dist_atten = fmt::format("LookupLightingLUTUnsigned({}, {})", sampler, index); - } - - if (light_config.geometric_factor_0 || light_config.geometric_factor_1) { - out += "geo_factor = dot(half_vector, half_vector);\n" - "geo_factor = geo_factor == 0.0 ? 0.0 : min(" - "dot_product / geo_factor, 1.0);\n"; - } - - // Specular 0 component - std::string d0_lut_value = "1.0"; - if (lighting.lut_d0.enable && - LightingRegs::IsLightingSamplerSupported( - lighting.config, LightingRegs::LightingSampler::Distribution0)) { - // Lookup specular "distribution 0" LUT value - const std::string value = - get_lut_value(LightingRegs::LightingSampler::Distribution0, light_config.num, - lighting.lut_d0.type, lighting.lut_d0.abs_input); - d0_lut_value = fmt::format("({:#} * {})", lighting.lut_d0.scale, value); - } - std::string specular_0 = fmt::format("({} * {}.specular_0)", d0_lut_value, light_src); - if (light_config.geometric_factor_0) { - specular_0 = fmt::format("({} * geo_factor)", specular_0); - } - - // If enabled, lookup ReflectRed value, otherwise, 1.0 is used - if (lighting.lut_rr.enable && - LightingRegs::IsLightingSamplerSupported(lighting.config, - LightingRegs::LightingSampler::ReflectRed)) { - std::string value = - get_lut_value(LightingRegs::LightingSampler::ReflectRed, light_config.num, - lighting.lut_rr.type, lighting.lut_rr.abs_input); - value = fmt::format("({:#} * {})", lighting.lut_rr.scale, value); - out += fmt::format("refl_value.r = {};\n", value); - } else { - out += "refl_value.r = 1.0;\n"; - } - - // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used - if (lighting.lut_rg.enable && - LightingRegs::IsLightingSamplerSupported(lighting.config, - LightingRegs::LightingSampler::ReflectGreen)) { - std::string value = - get_lut_value(LightingRegs::LightingSampler::ReflectGreen, light_config.num, - lighting.lut_rg.type, lighting.lut_rg.abs_input); - value = fmt::format("({:#} * {})", lighting.lut_rg.scale, value); - out += fmt::format("refl_value.g = {};\n", value); - } else { - out += "refl_value.g = refl_value.r;\n"; - } - - // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used - if (lighting.lut_rb.enable && - LightingRegs::IsLightingSamplerSupported(lighting.config, - LightingRegs::LightingSampler::ReflectBlue)) { - std::string value = - get_lut_value(LightingRegs::LightingSampler::ReflectBlue, light_config.num, - lighting.lut_rb.type, lighting.lut_rb.abs_input); - value = fmt::format("({:#} * {})", lighting.lut_rb.scale, value); - out += fmt::format("refl_value.b = {};\n", value); - } else { - out += "refl_value.b = refl_value.r;\n"; - } - - // Specular 1 component - std::string d1_lut_value = "1.0"; - if (lighting.lut_d1.enable && - LightingRegs::IsLightingSamplerSupported( - lighting.config, LightingRegs::LightingSampler::Distribution1)) { - // Lookup specular "distribution 1" LUT value - const std::string value = - get_lut_value(LightingRegs::LightingSampler::Distribution1, light_config.num, - lighting.lut_d1.type, lighting.lut_d1.abs_input); - d1_lut_value = fmt::format("({:#} * {})", lighting.lut_d1.scale, value); - } - std::string specular_1 = - fmt::format("({} * refl_value * {}.specular_1)", d1_lut_value, light_src); - if (light_config.geometric_factor_1) { - specular_1 = fmt::format("({} * geo_factor)", specular_1); - } - - // Fresnel - // Note: only the last entry in the light slots applies the Fresnel factor - if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable && - LightingRegs::IsLightingSamplerSupported(lighting.config, - LightingRegs::LightingSampler::Fresnel)) { - // Lookup fresnel LUT value - std::string value = - get_lut_value(LightingRegs::LightingSampler::Fresnel, light_config.num, - lighting.lut_fr.type, lighting.lut_fr.abs_input); - value = fmt::format("({:#} * {})", lighting.lut_fr.scale, value); - - // Enabled for diffuse lighting alpha component - if (lighting.enable_primary_alpha) { - out += fmt::format("diffuse_sum.a = {};\n", value); - } - - // Enabled for the specular lighting alpha component - if (lighting.enable_secondary_alpha) { - out += fmt::format("specular_sum.a = {};\n", value); - } - } - - bool shadow_primary_enable = lighting.shadow_primary && light_config.shadow_enable; - bool shadow_secondary_enable = lighting.shadow_secondary && light_config.shadow_enable; - std::string shadow_primary = shadow_primary_enable ? " * shadow.rgb" : ""; - std::string shadow_secondary = shadow_secondary_enable ? " * shadow.rgb" : ""; - - // Compute primary fragment color (diffuse lighting) function - out += fmt::format( - "diffuse_sum.rgb += (({}.diffuse * dot_product) + {}.ambient) * {} * {}{};\n", - light_src, light_src, dist_atten, spot_atten, shadow_primary); - - // Compute secondary fragment color (specular lighting) function - out += fmt::format("specular_sum.rgb += ({} + {}) * clamp_highlights * {} * {}{};\n", - specular_0, specular_1, dist_atten, spot_atten, shadow_secondary); - } - - // Apply shadow attenuation to alpha components if enabled - if (lighting.shadow_alpha) { - if (lighting.enable_primary_alpha) { - out += "diffuse_sum.a *= shadow.a;\n"; - } - if (lighting.enable_secondary_alpha) { - out += "specular_sum.a *= shadow.a;\n"; - } - } - - // Sum final lighting result - out += "diffuse_sum.rgb += lighting_global_ambient;\n" - "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n" - "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; -} - -using ProcTexClamp = TexturingRegs::ProcTexClamp; -using ProcTexShift = TexturingRegs::ProcTexShift; -using ProcTexCombiner = TexturingRegs::ProcTexCombiner; -using ProcTexFilter = TexturingRegs::ProcTexFilter; - -static void AppendProcTexShiftOffset(std::string& out, std::string_view v, ProcTexShift mode, - ProcTexClamp clamp_mode) { - const std::string_view offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5"; - switch (mode) { - case ProcTexShift::None: - out += "0.0"; - break; - case ProcTexShift::Odd: - out += fmt::format("{} * float((int({}) / 2) % 2)", offset, v); - break; - case ProcTexShift::Even: - out += fmt::format("{} * float(((int({}) + 1) / 2) % 2)", offset, v); - break; - default: - LOG_CRITICAL(HW_GPU, "Unknown shift mode {}", mode); - out += "0.0"; - break; - } -} - -static void AppendProcTexClamp(std::string& out, std::string_view var, ProcTexClamp mode) { - switch (mode) { - case ProcTexClamp::ToZero: - out += fmt::format("{0} = {0} > 1.0 ? 0 : {0};\n", var); - break; - case ProcTexClamp::ToEdge: - out += fmt::format("{0} = min({0}, 1.0);\n", var); - break; - case ProcTexClamp::SymmetricalRepeat: - out += fmt::format("{0} = fract({0});\n", var); - break; - case ProcTexClamp::MirroredRepeat: { - out += fmt::format("{0} = int({0}) % 2 == 0 ? fract({0}) : 1.0 - fract({0});\n", var); - break; - } - case ProcTexClamp::Pulse: - out += fmt::format("{0} = {0} > 0.5 ? 1.0 : 0.0;\n", var); - break; - default: - LOG_CRITICAL(HW_GPU, "Unknown clamp mode {}", mode); - out += fmt::format("{0} = min({0}, 1.0);\n", var); - break; - } -} - -static void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, - std::string_view offset) { - const auto combined = [combiner]() -> std::string_view { - switch (combiner) { - case ProcTexCombiner::U: - return "u"; - case ProcTexCombiner::U2: - return "(u * u)"; - case TexturingRegs::ProcTexCombiner::V: - return "v"; - case TexturingRegs::ProcTexCombiner::V2: - return "(v * v)"; - case TexturingRegs::ProcTexCombiner::Add: - return "((u + v) * 0.5)"; - case TexturingRegs::ProcTexCombiner::Add2: - return "((u * u + v * v) * 0.5)"; - case TexturingRegs::ProcTexCombiner::SqrtAdd2: - return "min(sqrt(u * u + v * v), 1.0)"; - case TexturingRegs::ProcTexCombiner::Min: - return "min(u, v)"; - case TexturingRegs::ProcTexCombiner::Max: - return "max(u, v)"; - case TexturingRegs::ProcTexCombiner::RMax: - return "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)"; - default: - LOG_CRITICAL(HW_GPU, "Unknown combiner {}", combiner); - return "0.0"; - } - }(); - - out += fmt::format("ProcTexLookupLUT({}, {})", offset, combined); -} - -static void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { - // LUT sampling uitlity - // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and - // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using - // value entries and difference entries. - out += R"( -float ProcTexLookupLUT(int offset, float coord) { - coord *= 128.0; - float index_i = clamp(floor(coord), 0.0, 127.0); - float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be - // extracted as index_i = 127.0 and index_f = 1.0 - vec2 entry = texelFetch(texture_buffer_lut_rg, int(index_i) + offset).rg; - return clamp(entry.r + entry.g * index_f, 0.0, 1.0); -} - )"; - - // Noise utility - if (config.state.proctex.noise_enable) { - // See swrasterizer/proctex.cpp for more information about these functions - out += R"( -int ProcTexNoiseRand1D(int v) { - const int table[] = int[](0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11); - return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; -} - -float ProcTexNoiseRand2D(vec2 point) { - const int table[] = int[](10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14); - int u2 = ProcTexNoiseRand1D(int(point.x)); - int v2 = ProcTexNoiseRand1D(int(point.y)); - v2 += ((u2 & 3) == 1) ? 4 : 0; - v2 ^= (u2 & 1) * 6; - v2 += 10 + u2; - v2 &= 0xF; - v2 ^= table[u2]; - return -1.0 + float(v2) * (2.0/15.0); -} - -float ProcTexNoiseCoef(vec2 x) { - vec2 grid = 9.0 * proctex_noise_f * abs(x + proctex_noise_p); - vec2 point = floor(grid); - vec2 frac = grid - point; - - float g0 = ProcTexNoiseRand2D(point) * (frac.x + frac.y); - float g1 = ProcTexNoiseRand2D(point + vec2(1.0, 0.0)) * (frac.x + frac.y - 1.0); - float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); - float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); - - float x_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.x); - float y_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.y); - float x0 = mix(g0, g1, x_noise); - float x1 = mix(g2, g3, x_noise); - return mix(x0, x1, y_noise); -} - )"; - } - - out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n"; - out += fmt::format("int lut_width = {} >> level;\n", config.state.proctex.lut_width); - // Offsets for level 4-7 seem to be hardcoded - out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n", - config.state.proctex.lut_offset0, config.state.proctex.lut_offset1, - config.state.proctex.lut_offset2, config.state.proctex.lut_offset3); - out += "int lut_offset = lut_offsets[level];\n"; - // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] - out += "lut_coord *= float(lut_width - 1);\n"; - - switch (config.state.proctex.lut_filter) { - case ProcTexFilter::Linear: - case ProcTexFilter::LinearMipmapLinear: - case ProcTexFilter::LinearMipmapNearest: - out += "int lut_index_i = int(lut_coord) + lut_offset;\n"; - out += "float lut_index_f = fract(lut_coord);\n"; - out += "return texelFetch(texture_buffer_lut_rgba, lut_index_i + " - "proctex_lut_offset) + " - "lut_index_f * " - "texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n"; - break; - case ProcTexFilter::Nearest: - case ProcTexFilter::NearestMipmapLinear: - case ProcTexFilter::NearestMipmapNearest: - out += "lut_coord += float(lut_offset);\n"; - out += "return texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + " - "proctex_lut_offset);\n"; - break; - } - - out += "}\n"; - - out += "vec4 ProcTex() {\n"; - if (config.state.proctex.coord < 3) { - out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord.Value()); - } else { - LOG_CRITICAL(Render, "Unexpected proctex.coord >= 3"); - out += "vec2 uv = abs(texcoord0);\n"; - } - - // This LOD formula is the same as the LOD upper limit defined in OpenGL. - // f(x, y) <= m_u + m_v + m_w - // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) - // Note: this is different from the one normal 2D textures use. - out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n"; - // unlike normal texture, the bias is inside the log2 - out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n", - config.state.proctex.lut_width); - out += "if (proctex_bias == 0.0) lod = 0.0;\n"; - out += fmt::format("lod = clamp(lod, {:#}, {:#});\n", - std::max(0.0f, static_cast(config.state.proctex.lod_min)), - std::min(7.0f, static_cast(config.state.proctex.lod_max))); - // Get shift offset before noise generation - out += "float u_shift = "; - AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift, - config.state.proctex.u_clamp); - out += ";\n"; - out += "float v_shift = "; - AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift, - config.state.proctex.v_clamp); - out += ";\n"; - - // Generate noise - if (config.state.proctex.noise_enable) { - out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n" - "uv = abs(uv);\n"; - } - - // Shift - out += "float u = uv.x + u_shift;\n" - "float v = uv.y + v_shift;\n"; - - // Clamp - AppendProcTexClamp(out, "u", config.state.proctex.u_clamp); - AppendProcTexClamp(out, "v", config.state.proctex.v_clamp); - - // Combine and map - out += "float lut_coord = "; - AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, - "proctex_color_map_offset"); - out += ";\n"; - - switch (config.state.proctex.lut_filter) { - case ProcTexFilter::Linear: - case ProcTexFilter::Nearest: - out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n"; - break; - case ProcTexFilter::NearestMipmapNearest: - case ProcTexFilter::LinearMipmapNearest: - out += "vec4 final_color = SampleProcTexColor(lut_coord, int(round(lod)));\n"; - break; - case ProcTexFilter::NearestMipmapLinear: - case ProcTexFilter::LinearMipmapLinear: - out += "int lod_i = int(lod);\n" - "float lod_f = fract(lod);\n" - "vec4 final_color = mix(SampleProcTexColor(lut_coord, lod_i), " - "SampleProcTexColor(lut_coord, lod_i + 1), lod_f);\n"; - break; - } - - if (config.state.proctex.separate_alpha) { - // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It - // uses the output of CombineAndMap directly instead. - out += "float final_alpha = "; - AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, - "proctex_alpha_map_offset"); - out += ";\n"; - out += "return vec4(final_color.xyz, final_alpha);\n}\n"; - } else { - out += "return final_color;\n}\n"; - } -} - -static void WriteLogicOp(std::string& out, const PicaFSConfig& config) { - if (!config.state.emulate_logic_op) { - return; - } - switch (config.state.logic_op) { - case FramebufferRegs::LogicOp::Clear: - out += "color = vec4(0);\n"; - break; - case FramebufferRegs::LogicOp::Set: - out += "color = vec4(1);\n"; - break; - case FramebufferRegs::LogicOp::Copy: - // Take the color output as-is - break; - case FramebufferRegs::LogicOp::CopyInverted: - out += "color = ~color;\n"; - break; - case FramebufferRegs::LogicOp::NoOp: - // We need to discard the color, but not necessarily the depth. This is not possible - // with fragment shader alone, so we emulate this behavior on GLES with glColorMask. - break; - default: - LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", config.state.logic_op.Value()); - UNIMPLEMENTED(); - } -} - -static void WriteBlending(std::string& out, const PicaFSConfig& config) { - if (!config.state.rgb_blend.emulate_blending && !config.state.alpha_blend.emulate_blending) - [[likely]] { - return; - } - - using BlendFactor = Pica::FramebufferRegs::BlendFactor; - out += R"( -vec4 source_color = last_tex_env_out; -#if defined(GL_EXT_shader_framebuffer_fetch) -vec4 dest_color = color; -#elif defined(GL_ARM_shader_framebuffer_fetch) -vec4 dest_color = gl_LastFragColorARM; -#else -vec4 dest_color = texelFetch(colorBuffer, ivec2(gl_FragCoord.xy), 0); -#endif -)"; - const auto get_factor = [&](BlendFactor factor) -> std::string { - switch (factor) { - case BlendFactor::Zero: - return "vec4(0.f)"; - case BlendFactor::One: - return "vec4(1.f)"; - case BlendFactor::SourceColor: - return "source_color"; - case BlendFactor::OneMinusSourceColor: - return "vec4(1.f) - source_color"; - case BlendFactor::DestColor: - return "dest_color"; - case BlendFactor::OneMinusDestColor: - return "vec4(1.f) - dest_color"; - case BlendFactor::SourceAlpha: - return "source_color.aaaa"; - case BlendFactor::OneMinusSourceAlpha: - return "vec4(1.f) - source_color.aaaa"; - case BlendFactor::DestAlpha: - return "dest_color.aaaa"; - case BlendFactor::OneMinusDestAlpha: - return "vec4(1.f) - dest_color.aaaa"; - case BlendFactor::ConstantColor: - return "blend_color"; - case BlendFactor::OneMinusConstantColor: - return "vec4(1.f) - blend_color"; - case BlendFactor::ConstantAlpha: - return "blend_color.aaaa"; - case BlendFactor::OneMinusConstantAlpha: - return "vec4(1.f) - blend_color.aaaa"; - default: - LOG_CRITICAL(Render_OpenGL, "Unknown blend factor {}", factor); - return "vec4(1.f)"; - } - }; - const auto get_func = [](Pica::FramebufferRegs::BlendEquation eq) { - return eq == Pica::FramebufferRegs::BlendEquation::Min ? "min" : "max"; - }; - - if (config.state.rgb_blend.emulate_blending) { - out += fmt::format( - "last_tex_env_out.rgb = {}(source_color.rgb * ({}).rgb, dest_color.rgb * ({}).rgb);\n", - get_func(config.state.rgb_blend.eq), get_factor(config.state.rgb_blend.src_factor), - get_factor(config.state.rgb_blend.dst_factor)); - } - if (config.state.alpha_blend.emulate_blending) { - out += fmt::format( - "last_tex_env_out.a = {}(source_color.a * ({}).a, dest_color.a * ({}).a);\n", - get_func(config.state.alpha_blend.eq), get_factor(config.state.alpha_blend.src_factor), - get_factor(config.state.alpha_blend.dst_factor)); - } -} - -std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) { - const auto& state = config.state; - std::string out; - - if (separable_shader) { - out += "#extension GL_ARB_separate_shader_objects : enable\n"; - } - - if (state.use_fragment_shader_interlock) { - out += R"( -#if defined(GL_ARB_fragment_shader_interlock) -#extension GL_ARB_fragment_shader_interlock : enable -#define beginInvocationInterlock beginInvocationInterlockARB -#define endInvocationInterlock endInvocationInterlockARB -#elif defined(GL_NV_fragment_shader_interlock) -#extension GL_NV_fragment_shader_interlock : enable -#define beginInvocationInterlock beginInvocationInterlockNV -#define endInvocationInterlock endInvocationInterlockNV -#elif defined(GL_INTEL_fragment_shader_ordering) -#extension GL_INTEL_fragment_shader_ordering : enable -#define beginInvocationInterlock beginFragmentShaderOrderingINTEL -#define endInvocationInterlock -#endif - -layout(pixel_interlock_ordered) in; -)"; - } - - if (config.state.rgb_blend.emulate_blending || config.state.alpha_blend.emulate_blending) { - out += R"( -#if defined(GL_EXT_shader_framebuffer_fetch) -#extension GL_EXT_shader_framebuffer_fetch : enable -#elif defined(GL_ARM_shader_framebuffer_fetch) -#extension GL_ARM_shader_framebuffer_fetch : enable -#else -#define CITRA_EMULATED_BLENDING_FALLBACK 1 -#endif -)"; - } - - out += fragment_shader_precision_OES; - out += GetVertexInterfaceDeclaration(false, false, separable_shader); - - out += R"( -layout (location = 0) out vec4 color; - -#ifdef VULKAN -layout(set = 0, binding = 3) uniform samplerBuffer texture_buffer_lut_lf; -layout(set = 0, binding = 4) uniform samplerBuffer texture_buffer_lut_rg; -layout(set = 0, binding = 5) uniform samplerBuffer texture_buffer_lut_rgba; - -layout(set = 1, binding = 0) uniform sampler2D tex0; -layout(set = 1, binding = 1) uniform sampler2D tex1; -layout(set = 1, binding = 2) uniform sampler2D tex2; -layout(set = 1, binding = 3) uniform samplerCube tex_cube; -// TODO: Binding for custom normal maps, when supported by Vulkan. - -layout(set = 2, binding = 0, r32ui) uniform readonly uimage2D shadow_texture_px; -layout(set = 2, binding = 1, r32ui) uniform readonly uimage2D shadow_texture_nx; -layout(set = 2, binding = 2, r32ui) uniform readonly uimage2D shadow_texture_py; -layout(set = 2, binding = 3, r32ui) uniform readonly uimage2D shadow_texture_ny; -layout(set = 2, binding = 4, r32ui) uniform readonly uimage2D shadow_texture_pz; -layout(set = 2, binding = 5, r32ui) uniform readonly uimage2D shadow_texture_nz; -layout(set = 2, binding = 6, r32ui) uniform uimage2D shadow_buffer; -#else -layout(binding = 0) uniform sampler2D tex0; -layout(binding = 1) uniform sampler2D tex1; -layout(binding = 2) uniform sampler2D tex2; -layout(binding = 3) uniform samplerBuffer texture_buffer_lut_lf; -layout(binding = 4) uniform samplerBuffer texture_buffer_lut_rg; -layout(binding = 5) uniform samplerBuffer texture_buffer_lut_rgba; -layout(binding = 6) uniform samplerCube tex_cube; -layout(binding = 7) uniform sampler2D tex_normal; - -layout(binding = 0, r32ui) uniform readonly uimage2D shadow_texture_px; -layout(binding = 1, r32ui) uniform readonly uimage2D shadow_texture_nx; -layout(binding = 2, r32ui) uniform readonly uimage2D shadow_texture_py; -layout(binding = 3, r32ui) uniform readonly uimage2D shadow_texture_ny; -layout(binding = 4, r32ui) uniform readonly uimage2D shadow_texture_pz; -layout(binding = 5, r32ui) uniform readonly uimage2D shadow_texture_nz; -layout(binding = 6, r32ui) uniform uimage2D shadow_buffer; - -#if defined(CITRA_EMULATED_BLENDING_FALLBACK) -layout(location = 10) uniform sampler2D colorBuffer; -#endif -#endif -)"; - - out += FSUniformBlockDef; - - out += R"( -// Rotate the vector v by the quaternion q -vec3 quaternion_rotate(vec4 q, vec3 v) { - return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); -} - -float byteround(float x) { - return round(x * 255.0) * (1.0 / 255.0); -} - -vec2 byteround(vec2 x) { - return round(x * 255.0) * (1.0 / 255.0); -} - -vec3 byteround(vec3 x) { - return round(x * 255.0) * (1.0 / 255.0); -} - -vec4 byteround(vec4 x) { - return round(x * 255.0) * (1.0 / 255.0); -} - -// PICA's LOD formula for 2D textures. -// This LOD formula is the same as the LOD lower limit defined in OpenGL. -// f(x, y) >= max{m_u, m_v, m_w} -// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) -float getLod(vec2 coord) { - vec2 d = max(abs(dFdx(coord)), abs(dFdy(coord))); - return log2(max(d.x, d.y)); -} - -uvec2 DecodeShadow(uint pixel) { - return uvec2(pixel >> 8, pixel & 0xFFu); -} - -uint EncodeShadow(uvec2 pixel) { - return (pixel.x << 8) | pixel.y; -} -)"; - - if (state.shadow_rendering) { - out += R"( -uint UpdateShadow(uint pixel, uint d, uint s) { - uvec2 ref = DecodeShadow(pixel); - if (d < ref.x) { - if (s == 0u) { - ref.x = d; - } else { - s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); - ref.y = min(s, ref.y); - } - } - return EncodeShadow(ref); -} -)"; - } - - if (state.lighting.enable) { - out += R"( -float LookupLightingLUT(int lut_index, int index, float delta) { - vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; - return entry.r + entry.g * delta; -} - -float LookupLightingLUTUnsigned(int lut_index, float pos) { - int index = int(clamp(floor(pos * 256.0), 0.f, 255.f)); - float delta = pos * 256.0 - float(index); - return LookupLightingLUT(lut_index, index, delta); -} - -float LookupLightingLUTSigned(int lut_index, float pos) { - int index = int(clamp(floor(pos * 128.0), -128.f, 127.f)); - float delta = pos * 128.0 - float(index); - if (index < 0) index += 256; - return LookupLightingLUT(lut_index, index, delta); -} -)"; - } - - if (state.texture0_type == TexturingRegs::TextureConfig::Shadow2D || - state.texture0_type == TexturingRegs::TextureConfig::ShadowCube) { - out += R"( -float CompareShadow(uint pixel, uint z) { - uvec2 p = DecodeShadow(pixel); - return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z); -} - -float mix2(vec4 s, vec2 a) { - vec2 t = mix(s.xy, s.zw, a.yy); - return mix(t.x, t.y, a.x); -} -)"; - - if (state.texture0_type == TexturingRegs::TextureConfig::Shadow2D) { - out += R"( -float SampleShadow2D(ivec2 uv, uint z) { - if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) - return 1.0; - return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); -} - -vec4 shadowTexture(vec2 uv, float w) { -)"; - if (!config.state.shadow_texture_orthographic) { - out += "uv /= w;"; - } - out += R"( - uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); - vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5); - vec2 coord_floor = floor(coord); - vec2 f = coord - coord_floor; - ivec2 i = ivec2(coord_floor); - vec4 s = vec4( - SampleShadow2D(i , z), - SampleShadow2D(i + ivec2(1, 0), z), - SampleShadow2D(i + ivec2(0, 1), z), - SampleShadow2D(i + ivec2(1, 1), z)); - return vec4(mix2(s, f)); -} -)"; - } else if (state.texture0_type == TexturingRegs::TextureConfig::ShadowCube) { - out += R"( -vec4 shadowTextureCube(vec2 uv, float w) { - ivec2 size = imageSize(shadow_texture_px); - vec3 c = vec3(uv, w); - vec3 a = abs(c); - if (a.x > a.y && a.x > a.z) { - w = a.x; - uv = -c.zy; - if (c.x < 0.0) uv.x = -uv.x; - } else if (a.y > a.z) { - w = a.y; - uv = c.xz; - if (c.y < 0.0) uv.y = -uv.y; - } else { - w = a.z; - uv = -c.xy; - if (c.z > 0.0) uv.x = -uv.x; - } - uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); - vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); - vec2 coord_floor = floor(coord); - vec2 f = coord - coord_floor; - ivec2 i00 = ivec2(coord_floor); - ivec2 i10 = i00 + ivec2(1, 0); - ivec2 i01 = i00 + ivec2(0, 1); - ivec2 i11 = i00 + ivec2(1, 1); - ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1); - i00 = clamp(i00, cmin, cmax); - i10 = clamp(i10, cmin, cmax); - i01 = clamp(i01, cmin, cmax); - i11 = clamp(i11, cmin, cmax); - uvec4 pixels; - // This part should have been refactored into functions, - // but many drivers don't like passing uimage2D as parameters - if (a.x > a.y && a.x > a.z) { - if (c.x > 0.0) - pixels = uvec4( - imageLoad(shadow_texture_px, i00).r, - imageLoad(shadow_texture_px, i10).r, - imageLoad(shadow_texture_px, i01).r, - imageLoad(shadow_texture_px, i11).r); - else - pixels = uvec4( - imageLoad(shadow_texture_nx, i00).r, - imageLoad(shadow_texture_nx, i10).r, - imageLoad(shadow_texture_nx, i01).r, - imageLoad(shadow_texture_nx, i11).r); - } else if (a.y > a.z) { - if (c.y > 0.0) - pixels = uvec4( - imageLoad(shadow_texture_py, i00).r, - imageLoad(shadow_texture_py, i10).r, - imageLoad(shadow_texture_py, i01).r, - imageLoad(shadow_texture_py, i11).r); - else - pixels = uvec4( - imageLoad(shadow_texture_ny, i00).r, - imageLoad(shadow_texture_ny, i10).r, - imageLoad(shadow_texture_ny, i01).r, - imageLoad(shadow_texture_ny, i11).r); - } else { - if (c.z > 0.0) - pixels = uvec4( - imageLoad(shadow_texture_pz, i00).r, - imageLoad(shadow_texture_pz, i10).r, - imageLoad(shadow_texture_pz, i01).r, - imageLoad(shadow_texture_pz, i11).r); - else - pixels = uvec4( - imageLoad(shadow_texture_nz, i00).r, - imageLoad(shadow_texture_nz, i10).r, - imageLoad(shadow_texture_nz, i01).r, - imageLoad(shadow_texture_nz, i11).r); - } - vec4 s = vec4( - CompareShadow(pixels.x, z), - CompareShadow(pixels.y, z), - CompareShadow(pixels.z, z), - CompareShadow(pixels.w, z)); - return vec4(mix2(s, f)); -} - )"; - } - } - - if (config.state.proctex.enable) { - AppendProcTexSampler(out, config); - } - - for (u32 texture_unit = 0; texture_unit < 4; texture_unit++) { - out += fmt::format("vec4 sampleTexUnit{}() {{\n", texture_unit); - if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) { - out += "return vec4(0.0);\n}"; - continue; - } - - if (texture_unit < 3) { - u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit; - if (config.state.texture_border_color[texture_unit].enable_s) { - out += fmt::format(R"( - if (texcoord{}.x < 0 || texcoord{}.x > 1) {{ - return tex_border_color[{}]; - }} - )", - texcoord_num, texcoord_num, texture_unit); - } - if (config.state.texture_border_color[texture_unit].enable_t) { - out += fmt::format(R"( - if (texcoord{}.y < 0 || texcoord{}.y > 1) {{ - return tex_border_color[{}]; - }} - )", - texcoord_num, texcoord_num, texture_unit); - } - } - - switch (texture_unit) { - case 0: - // Only unit 0 respects the texturing type - switch (state.texture0_type) { - case TexturingRegs::TextureConfig::Texture2D: - out += "return textureLod(tex0, texcoord0, getLod(texcoord0 * " - "vec2(textureSize(tex0, 0))) + tex_lod_bias[0]);"; - break; - case TexturingRegs::TextureConfig::Projection2D: - // TODO (wwylele): find the exact LOD formula for projection texture - out += "return textureProj(tex0, vec3(texcoord0, texcoord0_w));"; - break; - case TexturingRegs::TextureConfig::TextureCube: - out += "return texture(tex_cube, vec3(texcoord0, texcoord0_w));"; - break; - case TexturingRegs::TextureConfig::Shadow2D: - out += "return shadowTexture(texcoord0, texcoord0_w);"; - break; - case TexturingRegs::TextureConfig::ShadowCube: - out += "return shadowTextureCube(texcoord0, texcoord0_w);"; - break; - default: - LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type.Value()); - UNIMPLEMENTED(); - out += "return texture(tex0, texcoord0);"; - break; - } - break; - case 1: - out += "return textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, " - "0))) + tex_lod_bias[1]);"; - break; - case 2: - if (state.texture2_use_coord1) { - out += "return textureLod(tex2, texcoord1, getLod(texcoord1 * " - "vec2(textureSize(tex2, 0))) + tex_lod_bias[2]);"; - } else { - out += "return textureLod(tex2, texcoord2, getLod(texcoord2 * " - "vec2(textureSize(tex2, 0))) + tex_lod_bias[2]);"; - } - break; - case 3: - if (state.proctex.enable) { - out += "return ProcTex();"; - } else { - out += "return vec4(0.0);"; - } - break; - default: - UNREACHABLE(); - break; - } - - out += "\n}\n"; - } - - // We round the interpolated primary color to the nearest 1/255th - // This maintains the PICA's 8 bits of precision - out += R"( -void main() { -vec4 rounded_primary_color = byteround(primary_color); -vec4 primary_fragment_color = vec4(0.0); -vec4 secondary_fragment_color = vec4(0.0); -)"; - - // Do not do any sort of processing if it's obvious we're not going to pass the alpha test - if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) { - out += "discard; }"; - return out; - } - - // Append the scissor test - if (state.scissor_test_mode != RasterizerRegs::ScissorMode::Disabled) { - out += "if ("; - // Negate the condition if we have to keep only the pixels outside the scissor box - if (state.scissor_test_mode == RasterizerRegs::ScissorMode::Include) { - out += '!'; - } - out += "(gl_FragCoord.x >= float(scissor_x1) && " - "gl_FragCoord.y >= float(scissor_y1) && " - "gl_FragCoord.x < float(scissor_x2) && " - "gl_FragCoord.y < float(scissor_y2))) discard;\n"; - } - - // The PICA depth range is [-1, 0]. The vertex shader outputs the negated Z value, otherwise - // unmodified. The OpenGL depth range is [-1, 1], which is compressed into [near, far] = [0, 1]. - // This compresses our effective range into [0.5, 1]. To account for this we un-negate the value - // to range [-1, -0.5], multiply by 2 to the range [-2, -1], and add 1 to arrive back at the - // original range of [-1, 0]. The Vulkan depth range is [0, 1], so all we need to do is - // un-negate the value to range [-1, 0]. Once we have z_over_w, we can do our own transformation - // according to PICA specification. - out += "#ifdef VULKAN\n" - "float z_over_w = -gl_FragCoord.z;\n" - "#else\n" - "float z_over_w = -2.0 * gl_FragCoord.z + 1.0;\n" - "#endif\n" - "float depth = z_over_w * depth_scale + depth_offset;\n"; - if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { - out += "depth /= gl_FragCoord.w;\n"; - } - - if (state.lighting.enable) - WriteLighting(out, config); - - out += "vec4 combiner_buffer = vec4(0.0);\n" - "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n" - "vec4 last_tex_env_out = rounded_primary_color;\n"; - - out += "vec3 color_results_1 = vec3(0.0);\n" - "vec3 color_results_2 = vec3(0.0);\n" - "vec3 color_results_3 = vec3(0.0);\n"; - - out += "float alpha_results_1 = 0.0;\n" - "float alpha_results_2 = 0.0;\n" - "float alpha_results_3 = 0.0;\n"; - - for (std::size_t index = 0; index < state.tev_stages.size(); ++index) { - WriteTevStage(out, config, static_cast(index)); - } - - if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) { - out += "if ("; - AppendAlphaTestCondition(out, state.alpha_test_func); - out += ") discard;\n"; - } - - // Append fog combiner - if (state.fog_mode == TexturingRegs::FogMode::Fog) { - // Get index into fog LUT - if (state.fog_flip) { - out += "float fog_index = (1.0 - float(depth)) * 128.0;\n"; - } else { - out += "float fog_index = depth * 128.0;\n"; - } - - // Generate clamped fog factor from LUT for given fog index - out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n" - "float fog_f = fog_index - fog_i;\n" - "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + " - "fog_lut_offset).rg;\n" - "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n" - "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; - - // Blend the fog - out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n"; - } else if (state.fog_mode == TexturingRegs::FogMode::Gas) { - Core::System::GetInstance().TelemetrySession().AddField( - Common::Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", true); - LOG_CRITICAL(Render, "Unimplemented gas mode"); - out += "discard; }"; - return out; - } - - if (state.shadow_rendering) { - out += R"( -uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF)); -uint s = uint(last_tex_env_out.g * float(0xFF)); -ivec2 image_coord = ivec2(gl_FragCoord.xy); -)"; - if (state.use_fragment_shader_interlock) { - out += R"( -beginInvocationInterlock(); -uint old_shadow = imageLoad(shadow_buffer, image_coord).x; -uint new_shadow = UpdateShadow(old_shadow, d, s); -imageStore(shadow_buffer, image_coord, uvec4(new_shadow)); -endInvocationInterlock(); -)"; - } else { - out += R"( -uint old = imageLoad(shadow_buffer, image_coord).x; -uint new1; -uint old2; -do { - old2 = old; - new1 = UpdateShadow(old, d, s); -} while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new1)) != old2); -)"; - } - } else { - out += "gl_FragDepth = depth;\n"; - // Round the final fragment color to maintain the PICA's 8 bits of precision - out += "last_tex_env_out = byteround(last_tex_env_out);\n"; - WriteBlending(out, config); - out += "color = last_tex_env_out;\n"; - } - - WriteLogicOp(out, config); - - out += '}'; - return out; -} - std::string GenerateTrivialVertexShader(bool use_clip_planes, bool separable_shader) { std::string out; if (separable_shader) { diff --git a/src/video_core/shader/generator/glsl_shader_gen.h b/src/video_core/shader/generator/glsl_shader_gen.h index 6531f4780e..46e91c64cf 100644 --- a/src/video_core/shader/generator/glsl_shader_gen.h +++ b/src/video_core/shader/generator/glsl_shader_gen.h @@ -46,12 +46,4 @@ std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const P */ std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader); -/** - * Generates the GLSL fragment shader program source code for the current Pica state - * @param config ShaderCacheKey object generated for the current Pica state, used for the shader - * configuration (NOTE: Use state in this struct only, not the Pica registers!) - * @returns String of the shader source code - */ -std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader); - } // namespace Pica::Shader::Generator::GLSL diff --git a/src/video_core/shader/generator/pica_fs_config.cpp b/src/video_core/shader/generator/pica_fs_config.cpp new file mode 100644 index 0000000000..3cdeb46f61 --- /dev/null +++ b/src/video_core/shader/generator/pica_fs_config.cpp @@ -0,0 +1,193 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/shader/generator/pica_fs_config.h" + +namespace Pica::Shader { + +FramebufferConfig::FramebufferConfig(const Pica::Regs& regs, const Profile& profile) { + const auto& output_merger = regs.framebuffer.output_merger; + scissor_test_mode.Assign(regs.rasterizer.scissor_test.mode); + depthmap_enable.Assign(regs.rasterizer.depthmap_enable); + shadow_rendering.Assign(regs.framebuffer.IsShadowRendering()); + alpha_test_func.Assign(output_merger.alpha_test.enable + ? output_merger.alpha_test.func.Value() + : Pica::FramebufferRegs::CompareFunc::Always); + + // Emulate logic op in the shader if needed and not supported. + logic_op.Assign(Pica::FramebufferRegs::LogicOp::Copy); + if (!profile.has_logic_op && !regs.framebuffer.output_merger.alphablend_enable) { + logic_op.Assign(regs.framebuffer.output_merger.logic_op); + } + + const auto alpha_eq = output_merger.alpha_blending.blend_equation_a.Value(); + const auto rgb_eq = output_merger.alpha_blending.blend_equation_rgb.Value(); + if (!profile.has_blend_minmax_factor && output_merger.alphablend_enable) { + if (rgb_eq == Pica::FramebufferRegs::BlendEquation::Max || + rgb_eq == Pica::FramebufferRegs::BlendEquation::Min) { + rgb_blend.eq = rgb_eq; + rgb_blend.src_factor = output_merger.alpha_blending.factor_source_rgb; + rgb_blend.dst_factor = output_merger.alpha_blending.factor_dest_rgb; + } + if (alpha_eq == Pica::FramebufferRegs::BlendEquation::Max || + alpha_eq == Pica::FramebufferRegs::BlendEquation::Min) { + alpha_blend.eq = alpha_eq; + alpha_blend.src_factor = output_merger.alpha_blending.factor_source_a; + alpha_blend.dst_factor = output_merger.alpha_blending.factor_dest_a; + } + } +} + +TextureConfig::TextureConfig(const Pica::TexturingRegs& regs, const Profile& profile) { + texture0_type.Assign(regs.texture0.type); + texture2_use_coord1.Assign(regs.main_config.texture2_use_coord1 != 0); + combiner_buffer_input.Assign(regs.tev_combiner_buffer_input.update_mask_rgb.Value() | + regs.tev_combiner_buffer_input.update_mask_a.Value() << 4); + fog_mode.Assign(regs.fog_mode); + fog_flip.Assign(regs.fog_flip != 0); + shadow_texture_orthographic.Assign(regs.shadow.orthographic != 0); + + // Emulate custom border color if needed and not supported. + const auto pica_textures = regs.GetTextures(); + for (u32 tex_index = 0; tex_index < 3; tex_index++) { + const auto& config = pica_textures[tex_index].config; + texture_border_color[tex_index].enable_s.Assign( + !profile.has_custom_border_color && + config.wrap_s == Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder); + texture_border_color[tex_index].enable_t.Assign( + !profile.has_custom_border_color && + config.wrap_t == Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder); + } + + const auto& stages = regs.GetTevStages(); + for (std::size_t i = 0; i < tev_stages.size(); i++) { + const auto& tev_stage = stages[i]; + tev_stages[i].sources_raw = tev_stage.sources_raw; + tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; + tev_stages[i].ops_raw = tev_stage.ops_raw; + tev_stages[i].scales_raw = tev_stage.scales_raw; + if (tev_stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { + tev_stages[i].sources_raw &= 0xFFF; + tev_stages[i].modifiers_raw &= 0xFFF; + tev_stages[i].ops_raw &= 0xF; + } + } +} + +LightConfig::LightConfig(const Pica::LightingRegs& regs) { + if (regs.disable) { + return; + } + + enable.Assign(1); + src_num.Assign(regs.max_light_index + 1); + config.Assign(regs.config0.config); + enable_primary_alpha.Assign(regs.config0.enable_primary_alpha); + enable_secondary_alpha.Assign(regs.config0.enable_secondary_alpha); + bump_mode.Assign(regs.config0.bump_mode); + bump_selector.Assign(regs.config0.bump_selector); + bump_renorm.Assign(regs.config0.disable_bump_renorm == 0); + clamp_highlights.Assign(regs.config0.clamp_highlights != 0); + + enable_shadow.Assign(regs.config0.enable_shadow != 0); + if (enable_shadow) { + shadow_primary.Assign(regs.config0.shadow_primary != 0); + shadow_secondary.Assign(regs.config0.shadow_secondary != 0); + shadow_invert.Assign(regs.config0.shadow_invert != 0); + shadow_alpha.Assign(regs.config0.shadow_alpha != 0); + shadow_selector.Assign(regs.config0.shadow_selector); + } + + for (u32 light_index = 0; light_index <= regs.max_light_index; ++light_index) { + const u32 num = regs.light_enable.GetNum(light_index); + const auto& light = regs.light[num]; + lights[light_index].num.Assign(num); + lights[light_index].directional.Assign(light.config.directional != 0); + lights[light_index].two_sided_diffuse.Assign(light.config.two_sided_diffuse != 0); + lights[light_index].geometric_factor_0.Assign(light.config.geometric_factor_0 != 0); + lights[light_index].geometric_factor_1.Assign(light.config.geometric_factor_1 != 0); + lights[light_index].dist_atten_enable.Assign(!regs.IsDistAttenDisabled(num)); + lights[light_index].spot_atten_enable.Assign(!regs.IsSpotAttenDisabled(num)); + lights[light_index].shadow_enable.Assign(!regs.IsShadowDisabled(num)); + } + + lut_d0.enable.Assign(regs.config1.disable_lut_d0 == 0); + if (lut_d0.enable) { + lut_d0.abs_input.Assign(regs.abs_lut_input.disable_d0 == 0); + lut_d0.type.Assign(regs.lut_input.d0.Value()); + lut_d0.scale = regs.lut_scale.GetScale(regs.lut_scale.d0); + } + + lut_d1.enable.Assign(regs.config1.disable_lut_d1 == 0); + if (lut_d1.enable) { + lut_d1.abs_input.Assign(regs.abs_lut_input.disable_d1 == 0); + lut_d1.type.Assign(regs.lut_input.d1.Value()); + lut_d1.scale = regs.lut_scale.GetScale(regs.lut_scale.d1); + } + + // This is a dummy field due to lack of the corresponding register + lut_sp.enable.Assign(1); + lut_sp.abs_input.Assign(regs.abs_lut_input.disable_sp == 0); + lut_sp.type.Assign(regs.lut_input.sp.Value()); + lut_sp.scale = regs.lut_scale.GetScale(regs.lut_scale.sp); + + lut_fr.enable.Assign(regs.config1.disable_lut_fr == 0); + if (lut_fr.enable) { + lut_fr.abs_input.Assign(regs.abs_lut_input.disable_fr == 0); + lut_fr.type.Assign(regs.lut_input.fr.Value()); + lut_fr.scale = regs.lut_scale.GetScale(regs.lut_scale.fr); + } + + lut_rr.enable.Assign(regs.config1.disable_lut_rr == 0); + if (lut_rr.enable) { + lut_rr.abs_input.Assign(regs.abs_lut_input.disable_rr == 0); + lut_rr.type.Assign(regs.lut_input.rr.Value()); + lut_rr.scale = regs.lut_scale.GetScale(regs.lut_scale.rr); + } + + lut_rg.enable.Assign(regs.config1.disable_lut_rg == 0); + if (lut_rg.enable) { + lut_rg.abs_input.Assign(regs.abs_lut_input.disable_rg == 0); + lut_rg.type.Assign(regs.lut_input.rg.Value()); + lut_rg.scale = regs.lut_scale.GetScale(regs.lut_scale.rg); + } + + lut_rb.enable.Assign(regs.config1.disable_lut_rb == 0); + if (lut_rb.enable) { + lut_rb.abs_input.Assign(regs.abs_lut_input.disable_rb == 0); + lut_rb.type.Assign(regs.lut_input.rb.Value()); + lut_rb.scale = regs.lut_scale.GetScale(regs.lut_scale.rb); + } +} + +ProcTexConfig::ProcTexConfig(const Pica::TexturingRegs& regs) { + if (!regs.main_config.texture3_enable) { + return; + } + + enable.Assign(1); + coord.Assign(regs.main_config.texture3_coordinates); + u_clamp.Assign(regs.proctex.u_clamp); + v_clamp.Assign(regs.proctex.v_clamp); + color_combiner.Assign(regs.proctex.color_combiner); + alpha_combiner.Assign(regs.proctex.alpha_combiner); + separate_alpha.Assign(regs.proctex.separate_alpha); + noise_enable.Assign(regs.proctex.noise_enable); + u_shift.Assign(regs.proctex.u_shift); + v_shift.Assign(regs.proctex.v_shift); + lut_width = regs.proctex_lut.width; + lut_offset0 = regs.proctex_lut_offset.level0; + lut_offset1 = regs.proctex_lut_offset.level1; + lut_offset2 = regs.proctex_lut_offset.level2; + lut_offset3 = regs.proctex_lut_offset.level3; + lod_min = regs.proctex_lut.lod_min; + lod_max = regs.proctex_lut.lod_max; + lut_filter.Assign(regs.proctex_lut.filter); +} + +FSConfig::FSConfig(const Pica::Regs& regs, const UserConfig& user_, const Profile& profile) + : framebuffer{regs, profile}, texture{regs.texturing, profile}, lighting{regs.lighting}, + proctex{regs.texturing}, user{user_} {} + +} // namespace Pica::Shader diff --git a/src/video_core/shader/generator/pica_fs_config.h b/src/video_core/shader/generator/pica_fs_config.h new file mode 100644 index 0000000000..6b18b735f4 --- /dev/null +++ b/src/video_core/shader/generator/pica_fs_config.h @@ -0,0 +1,207 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/hash.h" +#include "video_core/regs.h" +#include "video_core/shader/generator/profile.h" + +namespace Pica::Shader { + +struct BlendConfig { + Pica::FramebufferRegs::BlendEquation eq; + Pica::FramebufferRegs::BlendFactor src_factor; + Pica::FramebufferRegs::BlendFactor dst_factor; +}; + +struct FramebufferConfig { + explicit FramebufferConfig(const Pica::Regs& regs, const Profile& profile); + + union { + u32 raw{}; + BitField<0, 3, Pica::FramebufferRegs::CompareFunc> alpha_test_func; + BitField<3, 2, Pica::RasterizerRegs::ScissorMode> scissor_test_mode; + BitField<5, 1, Pica::RasterizerRegs::DepthBuffering> depthmap_enable; + BitField<6, 4, Pica::FramebufferRegs::LogicOp> logic_op; + BitField<10, 1, u32> shadow_rendering; + }; + BlendConfig rgb_blend{}; + BlendConfig alpha_blend{}; +}; +static_assert(std::has_unique_object_representations_v); + +struct TevStageConfigRaw { + u32 sources_raw; + u32 modifiers_raw; + u32 ops_raw; + u32 scales_raw; + operator Pica::TexturingRegs::TevStageConfig() const noexcept { + return { + .sources_raw = sources_raw, + .modifiers_raw = modifiers_raw, + .ops_raw = ops_raw, + .const_color = 0, + .scales_raw = scales_raw, + }; + } +}; + +union TextureBorder { + BitField<0, 1, u32> enable_s; + BitField<1, 1, u32> enable_t; +}; + +struct TextureConfig { + explicit TextureConfig(const Pica::TexturingRegs& regs, const Profile& profile); + + union { + u32 raw{}; + BitField<0, 3, Pica::TexturingRegs::TextureConfig::TextureType> texture0_type; + BitField<3, 1, u32> texture2_use_coord1; + BitField<4, 8, u32> combiner_buffer_input; + BitField<12, 3, Pica::TexturingRegs::FogMode> fog_mode; + BitField<15, 1, u32> fog_flip; + BitField<16, 1, u32> shadow_texture_orthographic; + }; + std::array texture_border_color{}; + std::array tev_stages{}; +}; +static_assert(std::has_unique_object_representations_v); + +union Light { + u16 raw; + BitField<0, 3, u16> num; + BitField<3, 1, u16> directional; + BitField<4, 1, u16> two_sided_diffuse; + BitField<5, 1, u16> dist_atten_enable; + BitField<6, 1, u16> spot_atten_enable; + BitField<7, 1, u16> geometric_factor_0; + BitField<8, 1, u16> geometric_factor_1; + BitField<9, 1, u16> shadow_enable; +}; +static_assert(std::has_unique_object_representations_v); + +struct LutConfig { + union { + u32 raw; + BitField<0, 1, u32> enable; + BitField<1, 1, u32> abs_input; + BitField<2, 3, Pica::LightingRegs::LightingLutInput> type; + }; + f32 scale; +}; + +struct LightConfig { + explicit LightConfig(const Pica::LightingRegs& regs); + + union { + u32 raw{}; + BitField<0, 1, u32> enable; + BitField<1, 4, u32> src_num; + BitField<5, 2, Pica::LightingRegs::LightingBumpMode> bump_mode; + BitField<7, 2, u32> bump_selector; + BitField<9, 1, u32> bump_renorm; + BitField<10, 1, u32> clamp_highlights; + BitField<11, 4, Pica::LightingRegs::LightingConfig> config; + BitField<15, 1, u32> enable_primary_alpha; + BitField<16, 1, u32> enable_secondary_alpha; + BitField<17, 1, u32> enable_shadow; + BitField<18, 1, u32> shadow_primary; + BitField<19, 1, u32> shadow_secondary; + BitField<20, 1, u32> shadow_invert; + BitField<21, 1, u32> shadow_alpha; + BitField<22, 2, u32> shadow_selector; + }; + LutConfig lut_d0{}; + LutConfig lut_d1{}; + LutConfig lut_sp{}; + LutConfig lut_fr{}; + LutConfig lut_rr{}; + LutConfig lut_rg{}; + LutConfig lut_rb{}; + std::array lights{}; +}; + +struct ProcTexConfig { + explicit ProcTexConfig(const Pica::TexturingRegs& regs); + + union { + u32 raw{}; + BitField<0, 1, u32> enable; + BitField<1, 2, u32> coord; + BitField<3, 3, Pica::TexturingRegs::ProcTexClamp> u_clamp; + BitField<6, 3, Pica::TexturingRegs::ProcTexClamp> v_clamp; + BitField<9, 4, Pica::TexturingRegs::ProcTexCombiner> color_combiner; + BitField<13, 4, Pica::TexturingRegs::ProcTexCombiner> alpha_combiner; + BitField<17, 3, Pica::TexturingRegs::ProcTexFilter> lut_filter; + BitField<20, 1, u32> separate_alpha; + BitField<21, 1, u32> noise_enable; + BitField<22, 2, Pica::TexturingRegs::ProcTexShift> u_shift; + BitField<24, 2, Pica::TexturingRegs::ProcTexShift> v_shift; + }; + s32 lut_width{}; + s32 lut_offset0{}; + s32 lut_offset1{}; + s32 lut_offset2{}; + s32 lut_offset3{}; + u16 lod_min{}; + u16 lod_max{}; +}; +static_assert(std::has_unique_object_representations_v); + +union UserConfig { + u32 raw{}; + BitField<0, 1, u32> use_custom_normal; +}; +static_assert(std::has_unique_object_representations_v); + +struct FSConfig { + explicit FSConfig(const Pica::Regs& regs, const UserConfig& user, const Profile& profile); + + [[nodiscard]] bool TevStageUpdatesCombinerBufferColor(u32 stage_index) const { + return (stage_index < 4) && (texture.combiner_buffer_input & (1 << stage_index)); + } + + [[nodiscard]] bool TevStageUpdatesCombinerBufferAlpha(u32 stage_index) const { + return (stage_index < 4) && ((texture.combiner_buffer_input >> 4) & (1 << stage_index)); + } + + [[nodiscard]] bool EmulateBlend() const { + return framebuffer.rgb_blend.eq != Pica::FramebufferRegs::BlendEquation::Add || + framebuffer.alpha_blend.eq != Pica::FramebufferRegs::BlendEquation::Add; + } + + [[nodiscard]] bool UsesShadowPipeline() const { + const auto texture0_type = texture.texture0_type.Value(); + return texture0_type == Pica::TexturingRegs::TextureConfig::Shadow2D || + texture0_type == Pica::TexturingRegs::TextureConfig::ShadowCube || + framebuffer.shadow_rendering.Value(); + } + + bool operator==(const FSConfig& other) const noexcept { + return std::memcmp(this, &other, sizeof(FSConfig)) == 0; + } + + std::size_t Hash() const noexcept { + return Common::ComputeHash64(this, sizeof(FSConfig)); + } + + FramebufferConfig framebuffer; + TextureConfig texture; + LightConfig lighting; + ProcTexConfig proctex; + UserConfig user; +}; + +} // namespace Pica::Shader + +namespace std { +template <> +struct hash { + std::size_t operator()(const Pica::Shader::FSConfig& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/shader/generator/profile.h b/src/video_core/shader/generator/profile.h new file mode 100644 index 0000000000..ba2a38085f --- /dev/null +++ b/src/video_core/shader/generator/profile.h @@ -0,0 +1,25 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Pica::Shader { + +struct Profile { + bool has_separable_shaders{}; + bool has_clip_planes{}; + bool has_geometry_shader{}; + bool has_custom_border_color{}; + bool has_fragment_shader_interlock{}; + bool has_blend_minmax_factor{}; + bool has_minus_one_to_one_range{}; + bool has_logic_op{}; + bool has_gl_ext_framebuffer_fetch{}; + bool has_gl_arm_framebuffer_fetch{}; + bool has_gl_nv_fragment_shader_interlock{}; + bool has_gl_intel_fragment_shader_interlock{}; + bool is_vulkan{}; +}; + +} // namespace Pica::Shader diff --git a/src/video_core/shader/generator/shader_gen.cpp b/src/video_core/shader/generator/shader_gen.cpp index 2f60116667..8654a6e3cb 100644 --- a/src/video_core/shader/generator/shader_gen.cpp +++ b/src/video_core/shader/generator/shader_gen.cpp @@ -9,218 +9,6 @@ namespace Pica::Shader::Generator { -PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, bool has_fragment_shader_interlock, - bool emulate_logic_op, bool emulate_custom_border_color, - bool emulate_blend_minmax_factor, bool use_custom_normal_map) { - state.scissor_test_mode.Assign(regs.rasterizer.scissor_test.mode); - - state.depthmap_enable.Assign(regs.rasterizer.depthmap_enable); - - state.alpha_test_func.Assign(regs.framebuffer.output_merger.alpha_test.enable - ? regs.framebuffer.output_merger.alpha_test.func.Value() - : Pica::FramebufferRegs::CompareFunc::Always); - - state.texture0_type.Assign(regs.texturing.texture0.type); - - state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0); - - const auto pica_textures = regs.texturing.GetTextures(); - for (u32 tex_index = 0; tex_index < 3; tex_index++) { - const auto config = pica_textures[tex_index].config; - state.texture_border_color[tex_index].enable_s.Assign( - emulate_custom_border_color && - config.wrap_s == Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder); - state.texture_border_color[tex_index].enable_t.Assign( - emulate_custom_border_color && - config.wrap_t == Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder); - } - - // Emulate logic op in the shader if not supported. This is mostly for mobile GPUs - const bool needs_emulate_logic_op = - emulate_logic_op && !regs.framebuffer.output_merger.alphablend_enable; - - state.emulate_logic_op.Assign(needs_emulate_logic_op); - if (needs_emulate_logic_op) { - state.logic_op.Assign(regs.framebuffer.output_merger.logic_op); - } else { - state.logic_op.Assign(Pica::FramebufferRegs::LogicOp::NoOp); - } - - // Copy relevant tev stages fields. - // We don't sync const_color here because of the high variance, it is a - // shader uniform instead. - const auto& tev_stages = regs.texturing.GetTevStages(); - DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); - for (std::size_t i = 0; i < tev_stages.size(); i++) { - const auto& tev_stage = tev_stages[i]; - state.tev_stages[i].sources_raw = tev_stage.sources_raw; - state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; - state.tev_stages[i].ops_raw = tev_stage.ops_raw; - state.tev_stages[i].scales_raw = tev_stage.scales_raw; - if (tev_stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { - state.tev_stages[i].sources_raw &= 0xFFF; - state.tev_stages[i].modifiers_raw &= 0xFFF; - state.tev_stages[i].ops_raw &= 0xF; - } - } - - state.fog_mode.Assign(regs.texturing.fog_mode); - state.fog_flip.Assign(regs.texturing.fog_flip != 0); - - state.combiner_buffer_input.Assign( - regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() | - regs.texturing.tev_combiner_buffer_input.update_mask_a.Value() << 4); - - // Fragment lighting - state.lighting.enable.Assign(!regs.lighting.disable); - if (state.lighting.enable) { - state.lighting.src_num.Assign(regs.lighting.max_light_index + 1); - - for (u32 light_index = 0; light_index < state.lighting.src_num; ++light_index) { - const u32 num = regs.lighting.light_enable.GetNum(light_index); - const auto& light = regs.lighting.light[num]; - state.lighting.light[light_index].num.Assign(num); - state.lighting.light[light_index].directional.Assign(light.config.directional != 0); - state.lighting.light[light_index].two_sided_diffuse.Assign( - light.config.two_sided_diffuse != 0); - state.lighting.light[light_index].geometric_factor_0.Assign( - light.config.geometric_factor_0 != 0); - state.lighting.light[light_index].geometric_factor_1.Assign( - light.config.geometric_factor_1 != 0); - state.lighting.light[light_index].dist_atten_enable.Assign( - !regs.lighting.IsDistAttenDisabled(num)); - state.lighting.light[light_index].spot_atten_enable.Assign( - !regs.lighting.IsSpotAttenDisabled(num)); - state.lighting.light[light_index].shadow_enable.Assign( - !regs.lighting.IsShadowDisabled(num)); - } - - state.lighting.lut_d0.enable.Assign(regs.lighting.config1.disable_lut_d0 == 0); - if (state.lighting.lut_d0.enable) { - state.lighting.lut_d0.abs_input.Assign(regs.lighting.abs_lut_input.disable_d0 == 0); - state.lighting.lut_d0.type.Assign(regs.lighting.lut_input.d0.Value()); - state.lighting.lut_d0.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); - } - - state.lighting.lut_d1.enable.Assign(regs.lighting.config1.disable_lut_d1 == 0); - if (state.lighting.lut_d1.enable) { - state.lighting.lut_d1.abs_input.Assign(regs.lighting.abs_lut_input.disable_d1 == 0); - state.lighting.lut_d1.type.Assign(regs.lighting.lut_input.d1.Value()); - state.lighting.lut_d1.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); - } - - // this is a dummy field due to lack of the corresponding register - state.lighting.lut_sp.enable.Assign(1); - state.lighting.lut_sp.abs_input.Assign(regs.lighting.abs_lut_input.disable_sp == 0); - state.lighting.lut_sp.type.Assign(regs.lighting.lut_input.sp.Value()); - state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp); - - state.lighting.lut_fr.enable.Assign(regs.lighting.config1.disable_lut_fr == 0); - if (state.lighting.lut_fr.enable) { - state.lighting.lut_fr.abs_input.Assign(regs.lighting.abs_lut_input.disable_fr == 0); - state.lighting.lut_fr.type.Assign(regs.lighting.lut_input.fr.Value()); - state.lighting.lut_fr.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); - } - - state.lighting.lut_rr.enable.Assign(regs.lighting.config1.disable_lut_rr == 0); - if (state.lighting.lut_rr.enable) { - state.lighting.lut_rr.abs_input.Assign(regs.lighting.abs_lut_input.disable_rr == 0); - state.lighting.lut_rr.type.Assign(regs.lighting.lut_input.rr.Value()); - state.lighting.lut_rr.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); - } - - state.lighting.lut_rg.enable.Assign(regs.lighting.config1.disable_lut_rg == 0); - if (state.lighting.lut_rg.enable) { - state.lighting.lut_rg.abs_input.Assign(regs.lighting.abs_lut_input.disable_rg == 0); - state.lighting.lut_rg.type.Assign(regs.lighting.lut_input.rg.Value()); - state.lighting.lut_rg.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); - } - - state.lighting.lut_rb.enable.Assign(regs.lighting.config1.disable_lut_rb == 0); - if (state.lighting.lut_rb.enable) { - state.lighting.lut_rb.abs_input.Assign(regs.lighting.abs_lut_input.disable_rb == 0); - state.lighting.lut_rb.type.Assign(regs.lighting.lut_input.rb.Value()); - state.lighting.lut_rb.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); - } - - state.lighting.config.Assign(regs.lighting.config0.config); - state.lighting.enable_primary_alpha.Assign(regs.lighting.config0.enable_primary_alpha); - state.lighting.enable_secondary_alpha.Assign(regs.lighting.config0.enable_secondary_alpha); - state.lighting.bump_mode.Assign(regs.lighting.config0.bump_mode); - state.lighting.bump_selector.Assign(regs.lighting.config0.bump_selector); - state.lighting.bump_renorm.Assign(regs.lighting.config0.disable_bump_renorm == 0); - state.lighting.clamp_highlights.Assign(regs.lighting.config0.clamp_highlights != 0); - - state.lighting.enable_shadow.Assign(regs.lighting.config0.enable_shadow != 0); - if (state.lighting.enable_shadow) { - state.lighting.shadow_primary.Assign(regs.lighting.config0.shadow_primary != 0); - state.lighting.shadow_secondary.Assign(regs.lighting.config0.shadow_secondary != 0); - state.lighting.shadow_invert.Assign(regs.lighting.config0.shadow_invert != 0); - state.lighting.shadow_alpha.Assign(regs.lighting.config0.shadow_alpha != 0); - state.lighting.shadow_selector.Assign(regs.lighting.config0.shadow_selector); - } - } - - state.proctex.enable.Assign(regs.texturing.main_config.texture3_enable); - if (state.proctex.enable) { - state.proctex.coord.Assign(regs.texturing.main_config.texture3_coordinates); - state.proctex.u_clamp.Assign(regs.texturing.proctex.u_clamp); - state.proctex.v_clamp.Assign(regs.texturing.proctex.v_clamp); - state.proctex.color_combiner.Assign(regs.texturing.proctex.color_combiner); - state.proctex.alpha_combiner.Assign(regs.texturing.proctex.alpha_combiner); - state.proctex.separate_alpha.Assign(regs.texturing.proctex.separate_alpha); - state.proctex.noise_enable.Assign(regs.texturing.proctex.noise_enable); - state.proctex.u_shift.Assign(regs.texturing.proctex.u_shift); - state.proctex.v_shift.Assign(regs.texturing.proctex.v_shift); - state.proctex.lut_width = regs.texturing.proctex_lut.width; - state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0; - state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1; - state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2; - state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3; - state.proctex.lod_min = regs.texturing.proctex_lut.lod_min; - state.proctex.lod_max = regs.texturing.proctex_lut.lod_max; - state.proctex.lut_filter.Assign(regs.texturing.proctex_lut.filter); - } - - const auto alpha_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_a.Value(); - const auto rgb_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb.Value(); - if (emulate_blend_minmax_factor && regs.framebuffer.output_merger.alphablend_enable) { - if (rgb_eq == Pica::FramebufferRegs::BlendEquation::Max || - rgb_eq == Pica::FramebufferRegs::BlendEquation::Min) { - state.rgb_blend.emulate_blending = true; - state.rgb_blend.eq = rgb_eq; - state.rgb_blend.src_factor = - regs.framebuffer.output_merger.alpha_blending.factor_source_rgb; - state.rgb_blend.dst_factor = - regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb; - } - if (alpha_eq == Pica::FramebufferRegs::BlendEquation::Max || - alpha_eq == Pica::FramebufferRegs::BlendEquation::Min) { - state.alpha_blend.emulate_blending = true; - state.alpha_blend.eq = alpha_eq; - state.alpha_blend.src_factor = - regs.framebuffer.output_merger.alpha_blending.factor_source_a; - state.alpha_blend.dst_factor = - regs.framebuffer.output_merger.alpha_blending.factor_dest_a; - } - } - - state.shadow_rendering.Assign(regs.framebuffer.output_merger.fragment_operation_mode == - Pica::FramebufferRegs::FragmentOperationMode::Shadow); - state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0); - - // We only need fragment shader interlock when shadow rendering. - state.use_fragment_shader_interlock.Assign(state.shadow_rendering && - has_fragment_shader_interlock); - state.use_custom_normal_map.Assign(use_custom_normal_map); -} - void PicaGSConfigState::Init(const Pica::Regs& regs, bool use_clip_planes_) { use_clip_planes = use_clip_planes_; diff --git a/src/video_core/shader/generator/shader_gen.h b/src/video_core/shader/generator/shader_gen.h index 4a8ffc6562..c6db249bfa 100644 --- a/src/video_core/shader/generator/shader_gen.h +++ b/src/video_core/shader/generator/shader_gen.h @@ -28,141 +28,6 @@ enum Attributes { ATTRIBUTE_VIEW, }; -// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs() -struct TevStageConfigRaw { - u32 sources_raw; - u32 modifiers_raw; - u32 ops_raw; - u32 scales_raw; - explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept { - return { - .sources_raw = sources_raw, - .modifiers_raw = modifiers_raw, - .ops_raw = ops_raw, - .const_color = 0, - .scales_raw = scales_raw, - }; - } -}; - -struct PicaFSConfigState { - union { - BitField<0, 3, Pica::FramebufferRegs::CompareFunc> alpha_test_func; - BitField<3, 2, Pica::RasterizerRegs::ScissorMode> scissor_test_mode; - BitField<5, 3, Pica::TexturingRegs::TextureConfig::TextureType> texture0_type; - BitField<8, 1, u32> texture2_use_coord1; - BitField<9, 8, u32> combiner_buffer_input; - BitField<17, 1, Pica::RasterizerRegs::DepthBuffering> depthmap_enable; - BitField<18, 3, Pica::TexturingRegs::FogMode> fog_mode; - BitField<21, 1, u32> fog_flip; - BitField<22, 1, u32> emulate_logic_op; - BitField<23, 4, Pica::FramebufferRegs::LogicOp> logic_op; - BitField<27, 1, u32> shadow_rendering; - BitField<28, 1, u32> shadow_texture_orthographic; - BitField<29, 1, u32> use_fragment_shader_interlock; - BitField<30, 1, u32> use_custom_normal_map; - }; - - union { - BitField<0, 1, u32> enable_s; - BitField<1, 1, u32> enable_t; - } texture_border_color[3]; - - std::array tev_stages; - - struct { - union { - BitField<0, 3, u16> num; - BitField<3, 1, u16> directional; - BitField<4, 1, u16> two_sided_diffuse; - BitField<5, 1, u16> dist_atten_enable; - BitField<6, 1, u16> spot_atten_enable; - BitField<7, 1, u16> geometric_factor_0; - BitField<8, 1, u16> geometric_factor_1; - BitField<9, 1, u16> shadow_enable; - } light[8]; - - union { - BitField<0, 1, u32> enable; - BitField<1, 4, u32> src_num; - BitField<5, 2, Pica::LightingRegs::LightingBumpMode> bump_mode; - BitField<7, 2, u32> bump_selector; - BitField<9, 1, u32> bump_renorm; - BitField<10, 1, u32> clamp_highlights; - BitField<11, 4, Pica::LightingRegs::LightingConfig> config; - BitField<15, 1, u32> enable_primary_alpha; - BitField<16, 1, u32> enable_secondary_alpha; - BitField<17, 1, u32> enable_shadow; - BitField<18, 1, u32> shadow_primary; - BitField<19, 1, u32> shadow_secondary; - BitField<20, 1, u32> shadow_invert; - BitField<21, 1, u32> shadow_alpha; - BitField<22, 2, u32> shadow_selector; - }; - - struct { - union { - BitField<0, 1, u32> enable; - BitField<1, 1, u32> abs_input; - BitField<2, 3, Pica::LightingRegs::LightingLutInput> type; - }; - float scale; - } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb; - } lighting; - - struct { - union { - BitField<0, 1, u32> enable; - BitField<1, 2, u32> coord; - BitField<3, 3, Pica::TexturingRegs::ProcTexClamp> u_clamp; - BitField<6, 3, Pica::TexturingRegs::ProcTexClamp> v_clamp; - BitField<9, 4, Pica::TexturingRegs::ProcTexCombiner> color_combiner; - BitField<13, 4, Pica::TexturingRegs::ProcTexCombiner> alpha_combiner; - BitField<17, 3, Pica::TexturingRegs::ProcTexFilter> lut_filter; - BitField<20, 1, u32> separate_alpha; - BitField<21, 1, u32> noise_enable; - BitField<22, 2, Pica::TexturingRegs::ProcTexShift> u_shift; - BitField<24, 2, Pica::TexturingRegs::ProcTexShift> v_shift; - }; - s32 lut_width; - s32 lut_offset0; - s32 lut_offset1; - s32 lut_offset2; - s32 lut_offset3; - u8 lod_min; - u8 lod_max; - } proctex; - - struct { - bool emulate_blending; - Pica::FramebufferRegs::BlendEquation eq; - Pica::FramebufferRegs::BlendFactor src_factor; - Pica::FramebufferRegs::BlendFactor dst_factor; - } rgb_blend, alpha_blend; -}; - -/** - * This struct contains all state used to generate the GLSL fragment shader that emulates the - * current Pica register configuration. This struct is used as a cache key for generated GLSL shader - * programs. The functions in glsl_shader_gen.cpp should retrieve state from this struct only, not - * by directly accessing Pica registers. This should reduce the risk of bugs in shader generation - * where Pica state is not being captured in the shader cache key, thereby resulting in (what should - * be) two separate shaders sharing the same key. - */ -struct PicaFSConfig : Common::HashableStruct { - PicaFSConfig(const Pica::Regs& regs, bool has_fragment_shader_interlock, bool emulate_logic_op, - bool emulate_custom_border_color, bool emulate_blend_minmax_factor, - bool use_custom_normal_map = false); - - [[nodiscard]] bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { - return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); - } - - [[nodiscard]] bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { - return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); - } -}; - enum class AttribLoadFlags { Float = 1 << 0, Sint = 1 << 1, @@ -238,13 +103,6 @@ struct PicaFixedGSConfig : Common::HashableStruct { } // namespace Pica::Shader::Generator namespace std { -template <> -struct hash { - std::size_t operator()(const Pica::Shader::Generator::PicaFSConfig& k) const noexcept { - return k.Hash(); - } -}; - template <> struct hash { std::size_t operator()(const Pica::Shader::Generator::PicaVSConfig& k) const noexcept { diff --git a/src/video_core/shader/generator/spv_shader_gen.cpp b/src/video_core/shader/generator/spv_fs_shader_gen.cpp similarity index 93% rename from src/video_core/shader/generator/spv_shader_gen.cpp rename to src/video_core/shader/generator/spv_fs_shader_gen.cpp index 8c2743e18c..4268314d56 100644 --- a/src/video_core/shader/generator/spv_shader_gen.cpp +++ b/src/video_core/shader/generator/spv_fs_shader_gen.cpp @@ -2,9 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "core/core.h" -#include "core/telemetry_session.h" -#include "video_core/shader/generator/spv_shader_gen.h" +#include "video_core/shader/generator/spv_fs_shader_gen.h" + +namespace Pica::Shader::Generator::SPIRV { using Pica::FramebufferRegs; using Pica::LightingRegs; @@ -12,12 +12,10 @@ using Pica::RasterizerRegs; using Pica::TexturingRegs; using TevStageConfig = TexturingRegs::TevStageConfig; -namespace Pica::Shader::Generator::SPIRV { - constexpr u32 SPIRV_VERSION_1_3 = 0x00010300; -FragmentModule::FragmentModule(Core::TelemetrySession& telemetry_, const PicaFSConfig& config_) - : Sirit::Module{SPIRV_VERSION_1_3}, telemetry{telemetry_}, config{config_} { +FragmentModule::FragmentModule(const FSConfig& config_) + : Sirit::Module{SPIRV_VERSION_1_3}, config{config_} { DefineArithmeticTypes(); DefineUniformStructs(); DefineInterface(); @@ -37,38 +35,32 @@ void FragmentModule::Generate() { secondary_fragment_color = ConstF32(0.f, 0.f, 0.f, 0.f); // Do not do any sort of processing if it's obvious we're not going to pass the alpha test - if (config.state.alpha_test_func == Pica::FramebufferRegs::CompareFunc::Never) { + if (config.framebuffer.alpha_test_func == Pica::FramebufferRegs::CompareFunc::Never) { OpKill(); OpFunctionEnd(); return; } - // Check if the fragment is outside scissor rectangle + // Append the scissor and depth tests + WriteDepth(); WriteScissor(); // Write shader bytecode to emulate all enabled PICA lights - if (config.state.lighting.enable) { - WriteLighting(); - } + WriteLighting(); combiner_buffer = ConstF32(0.f, 0.f, 0.f, 0.f); next_combiner_buffer = GetShaderDataMember(vec_ids.Get(4), ConstS32(26)); last_tex_env_out = rounded_primary_color; // Write shader bytecode to emulate PICA TEV stages - for (std::size_t index = 0; index < config.state.tev_stages.size(); ++index) { - WriteTevStage(static_cast(index)); + for (u32 index = 0; index < config.texture.tev_stages.size(); ++index) { + WriteTevStage(index); } - WriteAlphaTestCondition(config.state.alpha_test_func); - - // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use - // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then - // do our own transformation according to PICA specification. - WriteDepth(); + WriteAlphaTestCondition(config.framebuffer.alpha_test_func); // Emulate the fog - switch (config.state.fog_mode) { + switch (config.texture.fog_mode) { case TexturingRegs::FogMode::Fog: WriteFog(); break; @@ -80,29 +72,27 @@ void FragmentModule::Generate() { } Id color{Byteround(last_tex_env_out, 4)}; - if (config.state.emulate_logic_op) { - switch (config.state.logic_op) { - case FramebufferRegs::LogicOp::Clear: - color = ConstF32(0.f, 0.f, 0.f, 0.f); - break; - case FramebufferRegs::LogicOp::Set: - color = ConstF32(1.f, 1.f, 1.f, 1.f); - break; - case FramebufferRegs::LogicOp::Copy: - // Take the color output as-is - break; - case FramebufferRegs::LogicOp::CopyInverted: - // out += "color = ~color;\n"; - break; - case FramebufferRegs::LogicOp::NoOp: - // We need to discard the color, but not necessarily the depth. This is not possible - // with fragment shader alone, so we emulate this behavior with the color mask. - break; - default: - LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", - static_cast(config.state.logic_op.Value())); - UNIMPLEMENTED(); - } + switch (config.framebuffer.logic_op) { + case FramebufferRegs::LogicOp::Clear: + color = ConstF32(0.f, 0.f, 0.f, 0.f); + break; + case FramebufferRegs::LogicOp::Set: + color = ConstF32(1.f, 1.f, 1.f, 1.f); + break; + case FramebufferRegs::LogicOp::Copy: + // Take the color output as-is + break; + case FramebufferRegs::LogicOp::CopyInverted: + // out += "color = ~color;\n"; + break; + case FramebufferRegs::LogicOp::NoOp: + // We need to discard the color, but not necessarily the depth. This is not possible + // with fragment shader alone, so we emulate this behavior with the color mask. + break; + default: + LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", + static_cast(config.framebuffer.logic_op.Value())); + UNIMPLEMENTED(); } // Write output color @@ -119,7 +109,7 @@ void FragmentModule::WriteDepth() { const Id depth_scale{GetShaderDataMember(f32_id, ConstS32(2))}; const Id depth_offset{GetShaderDataMember(f32_id, ConstS32(3))}; depth = OpFma(f32_id, z_over_w, depth_scale, depth_offset); - if (config.state.depthmap_enable == Pica::RasterizerRegs::DepthBuffering::WBuffering) { + if (config.framebuffer.depthmap_enable == Pica::RasterizerRegs::DepthBuffering::WBuffering) { const Id gl_frag_coord_w{ OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(3u)))}; depth = OpFDiv(f32_id, depth, gl_frag_coord_w); @@ -128,7 +118,7 @@ void FragmentModule::WriteDepth() { } void FragmentModule::WriteScissor() { - if (config.state.scissor_test_mode == RasterizerRegs::ScissorMode::Disabled) { + if (config.framebuffer.scissor_test_mode == RasterizerRegs::ScissorMode::Disabled) { return; } @@ -149,7 +139,7 @@ void FragmentModule::WriteScissor() { const Id cond2{OpFOrdLessThan(bvec_ids.Get(2), gl_frag_coord_xy, scissor_2)}; Id result{OpAll(bool_id, OpCompositeConstruct(bvec_ids.Get(4), cond1, cond2))}; - if (config.state.scissor_test_mode == RasterizerRegs::ScissorMode::Include) { + if (config.framebuffer.scissor_test_mode == RasterizerRegs::ScissorMode::Include) { result = OpLogicalNot(bool_id, result); } @@ -167,7 +157,7 @@ void FragmentModule::WriteScissor() { void FragmentModule::WriteFog() { // Get index into fog LUT Id fog_index{}; - if (config.state.fog_flip) { + if (config.texture.fog_flip) { fog_index = OpFMul(f32_id, OpFSub(f32_id, ConstF32(1.f), depth), ConstF32(128.f)); } else { fog_index = OpFMul(f32_id, depth, ConstF32(128.f)); @@ -201,14 +191,17 @@ void FragmentModule::WriteFog() { void FragmentModule::WriteGas() { // TODO: Implement me - telemetry.AddField(Common::Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", true); LOG_CRITICAL(Render, "Unimplemented gas mode"); OpKill(); OpFunctionEnd(); } void FragmentModule::WriteLighting() { - const auto& lighting = config.state.lighting; + if (!config.lighting.enable) { + return; + } + + const auto& lighting = config.lighting; // Define lighting globals Id diffuse_sum{ConstF32(0.f, 0.f, 0.f, 1.f)}; @@ -363,7 +356,7 @@ void FragmentModule::WriteLighting() { const Id sampler_index{ConstU32(static_cast(sampler))}; if (abs) { // LUT index is in the range of (0.0, 1.0) - index = lighting.light[light_num].two_sided_diffuse + index = lighting.lights[light_num].two_sided_diffuse ? OpFAbs(f32_id, index) : OpFMax(f32_id, index, ConstF32(0.f)); return lookup_lighting_lut_unsigned(sampler_index, index); @@ -375,11 +368,12 @@ void FragmentModule::WriteLighting() { // Write the code to emulate each enabled light for (u32 light_index = 0; light_index < lighting.src_num; ++light_index) { - const auto& light_config = lighting.light[light_index]; + const auto& light_config = lighting.lights[light_index]; const auto GetLightMember = [&](s32 member) -> Id { const Id member_type = member < 6 ? vec_ids.Get(3) : f32_id; - const Id light_num{ConstS32(static_cast(lighting.light[light_index].num.Value()))}; + const Id light_num{ + ConstS32(static_cast(lighting.lights[light_index].num.Value()))}; return GetShaderDataMember(member_type, ConstS32(24), light_num, ConstS32(member)); }; @@ -595,7 +589,7 @@ void FragmentModule::WriteLighting() { void FragmentModule::WriteTevStage(s32 index) { const TexturingRegs::TevStageConfig stage = - static_cast(config.state.tev_stages[index]); + static_cast(config.texture.tev_stages[index]); // Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) const auto is_passthrough_tev_stage = [](const TevStageConfig& stage) { @@ -860,8 +854,6 @@ Id FragmentModule::AppendProcTexCombineAndMap(ProcTexCombiner combiner, Id u, Id } void FragmentModule::DefineTexSampler(u32 texture_unit) { - const PicaFSConfigState& state = config.state; - const Id func_type{TypeFunction(vec_ids.Get(4))}; sample_tex_unit_func[texture_unit] = OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type); @@ -869,14 +861,15 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) { const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)}; - if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) { + if (texture_unit == 0 && + config.texture.texture0_type == TexturingRegs::TextureConfig::Disabled) { OpReturnValue(zero_vec); OpFunctionEnd(); return; } if (texture_unit == 3) { - if (state.proctex.enable) { + if (config.proctex.enable) { OpReturnValue(ProcTexSampler()); } else { OpReturnValue(zero_vec); @@ -888,10 +881,10 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) { const Id border_label{OpLabel()}; const Id not_border_label{OpLabel()}; - u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit; + u32 texcoord_num = texture_unit == 2 && config.texture.texture2_use_coord1 ? 1 : texture_unit; const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[texcoord_num])}; - auto& texture_border_color = state.texture_border_color[texture_unit]; + const auto& texture_border_color = config.texture.texture_border_color[texture_unit]; if (texture_border_color.enable_s || texture_border_color.enable_t) { const Id texcoord_s{OpCompositeExtract(f32_id, texcoord, 0)}; const Id texcoord_t{OpCompositeExtract(f32_id, texcoord, 1)}; @@ -960,7 +953,7 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) { switch (texture_unit) { case 0: // Only unit 0 respects the texturing type - switch (state.texture0_type) { + switch (config.texture.texture0_type) { case Pica::TexturingRegs::TextureConfig::Texture2D: ret_val = sample_lod(tex0_id); break; @@ -976,7 +969,8 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) { // return "shadowTextureCube(texcoord0, texcoord0_w)"; break; default: - LOG_CRITICAL(Render, "Unhandled texture type {:x}", state.texture0_type.Value()); + LOG_CRITICAL(Render, "Unhandled texture type {:x}", + config.texture.texture0_type.Value()); UNIMPLEMENTED(); ret_val = zero_vec; break; @@ -999,7 +993,7 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) { Id FragmentModule::ProcTexSampler() { // Define noise tables at the beginning of the function - if (config.state.proctex.noise_enable) { + if (config.proctex.noise_enable) { noise1d_table = DefineVar(TypeArray(i32_id, ConstU32(16u)), spv::StorageClass::Function); noise2d_table = @@ -1008,8 +1002,8 @@ Id FragmentModule::ProcTexSampler() { lut_offsets = DefineVar(TypeArray(i32_id, ConstU32(8u)), spv::StorageClass::Function); Id uv{}; - if (config.state.proctex.coord < 3) { - const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[config.state.proctex.coord.Value()])}; + if (config.proctex.coord < 3) { + const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[config.proctex.coord.Value()])}; uv = OpFAbs(vec_ids.Get(2), texcoord); } else { LOG_CRITICAL(Render, "Unexpected proctex.coord >= 3"); @@ -1027,26 +1021,24 @@ Id FragmentModule::ProcTexSampler() { // unlike normal texture, the bias is inside the log2 const Id proctex_bias{GetShaderDataMember(f32_id, ConstS32(16))}; const Id bias{ - OpFMul(f32_id, ConstF32(static_cast(config.state.proctex.lut_width)), proctex_bias)}; + OpFMul(f32_id, ConstF32(static_cast(config.proctex.lut_width)), proctex_bias)}; const Id duv_xy{ OpFAdd(f32_id, OpCompositeExtract(f32_id, duv, 0), OpCompositeExtract(f32_id, duv, 1))}; Id lod{OpLog2(f32_id, OpFMul(f32_id, OpFAbs(f32_id, bias), duv_xy))}; lod = OpSelect(f32_id, OpFOrdEqual(bool_id, proctex_bias, ConstF32(0.f)), ConstF32(0.f), lod); - lod = OpFClamp(f32_id, lod, - ConstF32(std::max(0.0f, static_cast(config.state.proctex.lod_min))), - ConstF32(std::min(7.0f, static_cast(config.state.proctex.lod_max)))); + lod = + OpFClamp(f32_id, lod, ConstF32(std::max(0.0f, static_cast(config.proctex.lod_min))), + ConstF32(std::min(7.0f, static_cast(config.proctex.lod_max)))); // Get shift offset before noise generation const Id u_shift{AppendProcTexShiftOffset(OpCompositeExtract(f32_id, uv, 1), - config.state.proctex.u_shift, - config.state.proctex.u_clamp)}; + config.proctex.u_shift, config.proctex.u_clamp)}; const Id v_shift{AppendProcTexShiftOffset(OpCompositeExtract(f32_id, uv, 0), - config.state.proctex.v_shift, - config.state.proctex.v_clamp)}; + config.proctex.v_shift, config.proctex.v_clamp)}; // Generate noise - if (config.state.proctex.noise_enable) { + if (config.proctex.noise_enable) { const Id proctex_noise_a{GetShaderDataMember(vec_ids.Get(2), ConstS32(21))}; const Id noise_coef{ProcTexNoiseCoef(uv)}; uv = OpFAdd(vec_ids.Get(2), uv, @@ -1059,16 +1051,16 @@ Id FragmentModule::ProcTexSampler() { Id v{OpFAdd(f32_id, OpCompositeExtract(f32_id, uv, 1), v_shift)}; // Clamp - u = AppendProcTexClamp(u, config.state.proctex.u_clamp); - v = AppendProcTexClamp(v, config.state.proctex.v_clamp); + u = AppendProcTexClamp(u, config.proctex.u_clamp); + v = AppendProcTexClamp(v, config.proctex.v_clamp); // Combine and map const Id proctex_color_map_offset{GetShaderDataMember(i32_id, ConstS32(12))}; - const Id lut_coord{AppendProcTexCombineAndMap(config.state.proctex.color_combiner, u, v, - proctex_color_map_offset)}; + const Id lut_coord{ + AppendProcTexCombineAndMap(config.proctex.color_combiner, u, v, proctex_color_map_offset)}; Id final_color{}; - switch (config.state.proctex.lut_filter) { + switch (config.proctex.lut_filter) { case ProcTexFilter::Linear: case ProcTexFilter::Nearest: { final_color = SampleProcTexColor(lut_coord, ConstS32(0)); @@ -1090,9 +1082,9 @@ Id FragmentModule::ProcTexSampler() { } } - if (config.state.proctex.separate_alpha) { + if (config.proctex.separate_alpha) { const Id proctex_alpha_map_offset{GetShaderDataMember(i32_id, ConstS32(13))}; - const Id final_alpha{AppendProcTexCombineAndMap(config.state.proctex.alpha_combiner, u, v, + const Id final_alpha{AppendProcTexCombineAndMap(config.proctex.alpha_combiner, u, v, proctex_alpha_map_offset)}; final_color = OpCompositeInsert(vec_ids.Get(4), final_alpha, final_color, 3); } @@ -1189,13 +1181,11 @@ Id FragmentModule::ProcTexNoiseCoef(Id x) { } Id FragmentModule::SampleProcTexColor(Id lut_coord, Id level) { - const Id lut_width{ - OpShiftRightArithmetic(i32_id, ConstS32(config.state.proctex.lut_width), level)}; + const Id lut_width{OpShiftRightArithmetic(i32_id, ConstS32(config.proctex.lut_width), level)}; const Id lut_ptr{TypePointer(spv::StorageClass::Function, i32_id)}; // Offsets for level 4-7 seem to be hardcoded - InitTableS32(lut_offsets, config.state.proctex.lut_offset0, config.state.proctex.lut_offset1, - config.state.proctex.lut_offset2, config.state.proctex.lut_offset3, 0xF0, 0xF8, - 0xFC, 0xFE); + InitTableS32(lut_offsets, config.proctex.lut_offset0, config.proctex.lut_offset1, + config.proctex.lut_offset2, config.proctex.lut_offset3, 0xF0, 0xF8, 0xFC, 0xFE); const Id lut_offset{OpLoad(i32_id, OpAccessChain(lut_ptr, lut_offsets, level))}; // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] lut_coord = @@ -1209,7 +1199,7 @@ Id FragmentModule::SampleProcTexColor(Id lut_coord, Id level) { const Id proctex_lut_offset{GetShaderDataMember(i32_id, ConstS32(14))}; const Id lut_rgba{OpImage(image_buffer_id, texture_buffer_lut_rgba)}; - switch (config.state.proctex.lut_filter) { + switch (config.proctex.lut_filter) { case ProcTexFilter::Linear: case ProcTexFilter::LinearMipmapLinear: case ProcTexFilter::LinearMipmapNearest: { @@ -1549,9 +1539,8 @@ void FragmentModule::DefineInterface() { Decorate(gl_frag_depth_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); } -std::vector GenerateFragmentShader(const PicaFSConfig& config) { - auto& telemetry = Core::System::GetInstance().TelemetrySession(); - FragmentModule module{telemetry, config}; +std::vector GenerateFragmentShader(const FSConfig& config) { + FragmentModule module{config}; module.Generate(); return module.Assemble(); } diff --git a/src/video_core/shader/generator/spv_shader_gen.h b/src/video_core/shader/generator/spv_fs_shader_gen.h similarity index 96% rename from src/video_core/shader/generator/spv_shader_gen.h rename to src/video_core/shader/generator/spv_fs_shader_gen.h index 4030a7a8e4..7c24d4383b 100644 --- a/src/video_core/shader/generator/spv_shader_gen.h +++ b/src/video_core/shader/generator/spv_fs_shader_gen.h @@ -7,11 +7,7 @@ #include #include -#include "video_core/shader/generator/shader_gen.h" - -namespace Core { -class TelemetrySession; -} +#include "video_core/shader/generator/pica_fs_config.h" namespace Pica::Shader::Generator::SPIRV { @@ -34,7 +30,7 @@ class FragmentModule : public Sirit::Module { static constexpr u32 NUM_NON_PROC_TEX_UNITS = 3; public: - explicit FragmentModule(Core::TelemetrySession& telemetry, const PicaFSConfig& config); + explicit FragmentModule(const FSConfig& config); ~FragmentModule(); /// Emits SPIR-V bytecode corresponding to the provided pica fragment configuration @@ -218,8 +214,7 @@ private: Id CompareShadow(Id pixel, Id z); private: - Core::TelemetrySession& telemetry; - PicaFSConfig config; + const FSConfig& config; Id void_id{}; Id bool_id{}; Id f32_id{}; @@ -289,6 +284,6 @@ private: * @param separable_shader generates shader that can be used for separate shader object * @returns String of the shader source code */ -std::vector GenerateFragmentShader(const PicaFSConfig& config); +std::vector GenerateFragmentShader(const FSConfig& config); } // namespace Pica::Shader::Generator::SPIRV