From 6ddf4b241f19ecec89b867fc8d5fa30d1645fd12 Mon Sep 17 00:00:00 2001 From: Steveice10 <1269164+Steveice10@users.noreply.github.com> Date: Thu, 17 Aug 2023 13:22:25 -0700 Subject: [PATCH] renderer/vulkan: Emulate custom border colors in shaders when unavailable. (#6878) --- CMakeModules/GenerateSCMRev.cmake | 8 + src/video_core/rasterizer_accelerated.cpp | 21 ++ src/video_core/rasterizer_accelerated.h | 3 + .../renderer_vulkan/vk_instance.cpp | 4 +- .../renderer_vulkan/vk_shader_gen.cpp | 162 ++++++---- .../renderer_vulkan/vk_shader_gen.h | 5 + .../renderer_vulkan/vk_shader_gen_spv.cpp | 292 ++++++++++-------- .../renderer_vulkan/vk_shader_gen_spv.h | 18 +- src/video_core/shader/shader_uniforms.cpp | 1 + src/video_core/shader/shader_uniforms.h | 3 +- 10 files changed, 322 insertions(+), 195 deletions(-) diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index f0c8e7da63..ee077cbbd9 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -12,8 +12,16 @@ set(HASH_FILES "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h" "${VIDEO_CORE}/renderer_opengl/gl_shader_util.cpp" "${VIDEO_CORE}/renderer_opengl/gl_shader_util.h" + "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen.cpp" + "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen.h" + "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen_spv.cpp" + "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen_spv.h" + "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.cpp" + "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.h" "${VIDEO_CORE}/shader/shader.cpp" "${VIDEO_CORE}/shader/shader.h" + "${VIDEO_CORE}/shader/shader_uniforms.cpp" + "${VIDEO_CORE}/shader/shader_uniforms.h" "${VIDEO_CORE}/pica.cpp" "${VIDEO_CORE}/pica.h" "${VIDEO_CORE}/regs_framebuffer.h" diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index 2dcae06f8a..bcd40354dd 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -599,6 +599,17 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) { SyncTextureLodBias(2); break; + // Texture borders + case PICA_REG_INDEX(texturing.texture0.border_color): + SyncTextureBorderColor(0); + break; + case PICA_REG_INDEX(texturing.texture1.border_color): + SyncTextureBorderColor(1); + break; + case PICA_REG_INDEX(texturing.texture2.border_color): + SyncTextureBorderColor(2); + break; + // Clipping plane case PICA_REG_INDEX(rasterizer.clip_coef[0]): case PICA_REG_INDEX(rasterizer.clip_coef[1]): @@ -821,6 +832,16 @@ void RasterizerAccelerated::SyncTextureLodBias(int tex_index) { } } +void RasterizerAccelerated::SyncTextureBorderColor(int tex_index) { + const auto pica_textures = regs.texturing.GetTextures(); + const auto params = pica_textures[tex_index].config; + const Common::Vec4f border_color = ColorRGBA8(params.border_color.raw); + if (border_color != uniform_block_data.data.tex_border_color[tex_index]) { + uniform_block_data.data.tex_border_color[tex_index] = border_color; + uniform_block_data.dirty = true; + } +} + void RasterizerAccelerated::SyncClipCoef() { const auto raw_clip_coef = regs.rasterizer.GetClipCoef(); const Common::Vec4f new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index d9191226be..2ad11ee32a 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -97,6 +97,9 @@ protected: /// Syncs the texture LOD bias to match the PICA register void SyncTextureLodBias(int tex_index); + /// Syncs the texture border color to match the PICA registers + void SyncTextureBorderColor(int tex_index); + /// Syncs the clip coefficients to match the PICA register void SyncClipCoef(); diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 2388032a85..9d1b1935e0 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -409,7 +409,9 @@ bool Instance::CreateDevice() { const bool has_extended_dynamic_state = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, is_arm || is_qualcomm, "it is broken on Qualcomm and ARM drivers"); - const bool has_custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + const bool has_custom_border_color = + add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, is_qualcomm, + "it is broken on most Qualcomm driver versions"); const bool has_index_type_uint8 = add_extension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME); const bool has_pipeline_creation_cache_control = add_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME); diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index 8ad0eb4d83..bfd954b840 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -69,6 +69,17 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) { state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0); + const auto pica_textures = regs.texturing.GetTextures(); + for (u32 tex_index = 0; tex_index < 3; tex_index++) { + const auto config = pica_textures[tex_index].config; + state.texture_border_color[tex_index].enable_s.Assign( + !instance.IsCustomBorderColorSupported() && + config.wrap_s == TexturingRegs::TextureConfig::WrapMode::ClampToBorder); + state.texture_border_color[tex_index].enable_t.Assign( + !instance.IsCustomBorderColorSupported() && + config.wrap_t == TexturingRegs::TextureConfig::WrapMode::ClampToBorder); + } + // Emulate logic op in the shader if not supported. This is mostly for mobile GPUs const bool emulate_logic_op = instance.NeedsLogicOpEmulation() && !Pica::g_state.regs.framebuffer.output_merger.alphablend_enable; @@ -284,54 +295,6 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) { stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); } -static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) { - const auto& state = config.state; - switch (texture_unit) { - case 0: - // Only unit 0 respects the texturing type - switch (state.texture0_type) { - case TexturingRegs::TextureConfig::Texture2D: - return "textureLod(tex0, texcoord0, getLod(texcoord0 * " - "vec2(textureSize(tex0, 0))) + tex_lod_bias[0])"; - case TexturingRegs::TextureConfig::Projection2D: - // TODO (wwylele): find the exact LOD formula for projection texture - return "textureProj(tex0, vec3(texcoord0, texcoord0_w))"; - case TexturingRegs::TextureConfig::TextureCube: - return "texture(tex_cube, vec3(texcoord0, texcoord0_w))"; - case TexturingRegs::TextureConfig::Shadow2D: - return "shadowTexture(texcoord0, texcoord0_w)"; - case TexturingRegs::TextureConfig::ShadowCube: - return "shadowTextureCube(texcoord0, texcoord0_w)"; - case TexturingRegs::TextureConfig::Disabled: - return "vec4(0.0)"; - default: - LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type); - UNIMPLEMENTED(); - return "texture(tex0, texcoord0)"; - } - case 1: - return "textureLod(tex1, texcoord1, getLod(texcoord1 * " - "vec2(textureSize(tex1, 0))) + tex_lod_bias[1])"; - case 2: - if (state.texture2_use_coord1) - return "textureLod(tex2, texcoord1, getLod(texcoord1 * " - "vec2(textureSize(tex2, 0))) + tex_lod_bias[2])"; - else - return "textureLod(tex2, texcoord2, getLod(texcoord2 * " - "vec2(textureSize(tex2, 0))) + tex_lod_bias[2])"; - case 3: - if (state.proctex.enable) { - return "ProcTex()"; - } else { - LOG_DEBUG(Render_OpenGL, "Using Texture3 without enabling it"); - return "vec4(0.0)"; - } - default: - UNREACHABLE(); - return ""; - } -} - /// Writes the specified TEV stage source component(s) static void AppendSource(std::string& out, const PicaFSConfig& config, TevStageConfig::Source source, std::string_view index_name) { @@ -347,16 +310,16 @@ static void AppendSource(std::string& out, const PicaFSConfig& config, out += "secondary_fragment_color"; break; case Source::Texture0: - out += SampleTexture(config, 0); + out += "sampleTexUnit0()"; break; case Source::Texture1: - out += SampleTexture(config, 1); + out += "sampleTexUnit1()"; break; case Source::Texture2: - out += SampleTexture(config, 2); + out += "sampleTexUnit2()"; break; case Source::Texture3: - out += SampleTexture(config, 3); + out += "sampleTexUnit3()"; break; case Source::PreviousBuffer: out += "combiner_buffer"; @@ -656,7 +619,7 @@ static void WriteLighting(std::string& out, const PicaFSConfig& config) { // Compute fragment normals and tangents const auto perturbation = [&] { - return fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, lighting.bump_selector)); + return fmt::format("2.0 * (sampleTexUnit{}()).rgb - 1.0", lighting.bump_selector); }; switch (lighting.bump_mode) { @@ -700,7 +663,7 @@ static void WriteLighting(std::string& out, const PicaFSConfig& config) { "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; if (lighting.enable_shadow) { - std::string shadow_texture = SampleTexture(config, lighting.shadow_selector); + std::string shadow_texture = fmt::format("sampleTexUnit{}()", lighting.shadow_selector); if (lighting.shadow_invert) { out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture); } else { @@ -1310,6 +1273,7 @@ float mix2(vec4 s, vec2 a) { vec4 shadowTexture(vec2 uv, float w) { )"; + if (!config.state.shadow_texture_orthographic) { out += "uv /= w;"; } @@ -1344,9 +1308,7 @@ vec4 shadowTextureCube(vec2 uv, float w) { uv = -c.xy; if (c.z > 0.0) uv.x = -uv.x; } -)"; - out += "uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));"; - out += R"( + uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); vec2 coord_floor = floor(coord); vec2 f = coord - coord_floor; @@ -1409,10 +1371,92 @@ vec4 shadowTextureCube(vec2 uv, float w) { CompareShadow(pixels.w, z)); return vec4(mix2(s, f)); } -)"; + )"; - if (config.state.proctex.enable) + if (config.state.proctex.enable) { AppendProcTexSampler(out, config); + } + + for (u32 texture_unit = 0; texture_unit < 4; texture_unit++) { + out += fmt::format("vec4 sampleTexUnit{}() {{", texture_unit); + if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) { + out += "return vec4(0.0);}"; + continue; + } else if (texture_unit == 3) { + if (state.proctex.enable) { + out += "return ProcTex();}"; + } else { + out += "return vec4(0.0);}"; + } + continue; + } + + u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit; + if (config.state.texture_border_color[texture_unit].enable_s) { + out += fmt::format(R"( + if (texcoord{}.x < 0 || texcoord{}.x > 1) {{ + return tex_border_color[{}]; + }} + )", + texcoord_num, texcoord_num, texture_unit); + } + if (config.state.texture_border_color[texture_unit].enable_t) { + out += fmt::format(R"( + if (texcoord{}.y < 0 || texcoord{}.y > 1) {{ + return tex_border_color[{}]; + }} + )", + texcoord_num, texcoord_num, texture_unit); + } + // TODO: 3D border? + + switch (texture_unit) { + case 0: + // Only unit 0 respects the texturing type + switch (state.texture0_type) { + case TexturingRegs::TextureConfig::Texture2D: + out += "return textureLod(tex0, texcoord0, getLod(texcoord0 * " + "vec2(textureSize(tex0, 0))) + tex_lod_bias[0]);"; + break; + case TexturingRegs::TextureConfig::Projection2D: + // TODO (wwylele): find the exact LOD formula for projection texture + out += "return textureProj(tex0, vec3(texcoord0, texcoord0_w));"; + break; + case TexturingRegs::TextureConfig::TextureCube: + out += "return texture(tex_cube, vec3(texcoord0, texcoord0_w));"; + break; + case TexturingRegs::TextureConfig::Shadow2D: + out += "return shadowTexture(texcoord0, texcoord0_w);"; + break; + case TexturingRegs::TextureConfig::ShadowCube: + out += "return shadowTextureCube(texcoord0, texcoord0_w);"; + break; + default: + LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type); + UNIMPLEMENTED(); + out += "return texture(tex0, texcoord0);"; + break; + } + case 1: + out += "return textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, " + "0))) + tex_lod_bias[1]);"; + break; + case 2: + if (state.texture2_use_coord1) { + out += "return textureLod(tex2, texcoord1, getLod(texcoord1 * " + "vec2(textureSize(tex2, 0))) + tex_lod_bias[1]);"; + } else { + out += "return textureLod(tex2, texcoord2, getLod(texcoord2 * " + "vec2(textureSize(tex2, 0))) + tex_lod_bias[2]);"; + } + break; + default: + UNREACHABLE(); + break; + } + + out += "}"; + } // We round the interpolated primary color to the nearest 1/255th // This maintains the PICA's 8 bits of precision diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index bd901b0060..52fa832ff6 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -57,6 +57,11 @@ struct PicaFSConfigState { BitField<28, 1, u32> shadow_texture_orthographic; }; + union { + BitField<0, 1, u32> enable_s; + BitField<1, 1, u32> enable_t; + } texture_border_color[3]; + std::array tev_stages; struct { diff --git a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp index cf26a927f3..3320219448 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp @@ -21,8 +21,8 @@ FragmentModule::FragmentModule(Core::TelemetrySession& telemetry_, const PicaFSC DefineArithmeticTypes(); DefineUniformStructs(); DefineInterface(); - if (config.state.proctex.enable) { - DefineProcTexSampler(); + for (u32 i = 0; i < NUM_TEX_UNITS; i++) { + DefineTexSampler(i); } DefineEntryPoint(); } @@ -225,7 +225,8 @@ void FragmentModule::WriteLighting() { // Compute fragment normals and tangents const auto perturbation = [&]() -> Id { - const Id texel{SampleTexture(lighting.bump_selector)}; + const Id texel{ + OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[lighting.bump_selector])}; const Id texel_rgb{OpVectorShuffle(vec_ids.Get(3), texel, texel, 0, 1, 2)}; const Id rgb_mul_two{OpVectorTimesScalar(vec_ids.Get(3), texel_rgb, ConstF32(2.f))}; return OpFSub(vec_ids.Get(3), rgb_mul_two, ConstF32(1.f, 1.f, 1.f)); @@ -284,7 +285,7 @@ void FragmentModule::WriteLighting() { Id shadow{ConstF32(1.f, 1.f, 1.f, 1.f)}; if (lighting.enable_shadow) { - shadow = SampleTexture(lighting.shadow_selector); + shadow = OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[lighting.shadow_selector]); if (lighting.shadow_invert) { shadow = OpFSub(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f, 1.f), shadow); } @@ -710,89 +711,6 @@ void FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) } } -Id FragmentModule::SampleTexture(u32 texture_unit) { - const PicaFSConfigState& state = config.state; - const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)}; - - // PICA's LOD formula for 2D textures. - // This LOD formula is the same as the LOD lower limit defined in OpenGL. - // f(x, y) >= max{m_u, m_v, m_w} - // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) - const auto sample_lod = [this, texture_unit](Id tex_id, Id texcoord_id) { - const Id sampled_image{OpLoad(TypeSampledImage(image2d_id), tex_id)}; - const Id tex_image{OpImage(image2d_id, sampled_image)}; - const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))}; - const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)}; - const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))}; - const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))}; - const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))}; - const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)}; - const Id dx_dy_max{ - OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))}; - const Id lod{OpLog2(f32_id, dx_dy_max)}; - const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(28), ConstU32(texture_unit))}; - const Id biased_lod{OpFAdd(f32_id, lod, lod_bias)}; - return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord, - spv::ImageOperandsMask::Lod, biased_lod); - }; - - const auto sample = [this](Id tex_id, bool projection) { - const Id image_type = tex_id.value == tex_cube_id.value ? image_cube_id : image2d_id; - const Id sampled_image{OpLoad(TypeSampledImage(image_type), tex_id)}; - const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)}; - const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)}; - const Id coord{OpCompositeConstruct(vec_ids.Get(3), - OpCompositeExtract(f32_id, texcoord0, 0), - OpCompositeExtract(f32_id, texcoord0, 1), texcoord0_w)}; - if (projection) { - return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord); - } else { - return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord); - } - }; - - switch (texture_unit) { - case 0: - // Only unit 0 respects the texturing type - switch (state.texture0_type) { - case Pica::TexturingRegs::TextureConfig::Texture2D: - return sample_lod(tex0_id, texcoord0_id); - case Pica::TexturingRegs::TextureConfig::Projection2D: - return sample(tex0_id, true); - case Pica::TexturingRegs::TextureConfig::TextureCube: - return sample(tex_cube_id, false); - case Pica::TexturingRegs::TextureConfig::Shadow2D: - return SampleShadow(); - // case Pica::TexturingRegs::TextureConfig::ShadowCube: - // return "shadowTextureCube(texcoord0, texcoord0_w)"; - case Pica::TexturingRegs::TextureConfig::Disabled: - return zero_vec; - default: - LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type); - UNIMPLEMENTED(); - return zero_vec; - } - case 1: - return sample_lod(tex1_id, texcoord1_id); - case 2: - if (state.texture2_use_coord1) { - return sample_lod(tex2_id, texcoord1_id); - } else { - return sample_lod(tex2_id, texcoord2_id); - } - case 3: - if (state.proctex.enable) { - return OpFunctionCall(vec_ids.Get(4), proctex_func); - } else { - LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it"); - return zero_vec; - } - default: - UNREACHABLE(); - return void_id; - } -} - Id FragmentModule::CompareShadow(Id pixel, Id z) { const Id pixel_d24{OpShiftRightLogical(u32_id, pixel, ConstS32(8))}; const Id pixel_s8{OpConvertUToF(f32_id, OpBitwiseAnd(u32_id, pixel, ConstU32(255u)))}; @@ -802,7 +720,7 @@ Id FragmentModule::CompareShadow(Id pixel, Id z) { } Id FragmentModule::SampleShadow() { - const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)}; + const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord_id[0])}; const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)}; const Id abs_min_w{OpFMul(f32_id, OpFMin(f32_id, OpFAbs(f32_id, texcoord0_w), ConstF32(1.f)), ConstF32(16777215.f))}; @@ -941,11 +859,145 @@ Id FragmentModule::AppendProcTexCombineAndMap(ProcTexCombiner combiner, Id u, Id return ProcTexLookupLUT(offset, combined); } -void FragmentModule::DefineProcTexSampler() { +void FragmentModule::DefineTexSampler(u32 texture_unit) { + const PicaFSConfigState& state = config.state; + const Id func_type{TypeFunction(vec_ids.Get(4))}; - proctex_func = OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type); + sample_tex_unit_func[texture_unit] = + OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type); AddLabel(OpLabel()); + const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)}; + + if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) { + OpReturnValue(zero_vec); + OpFunctionEnd(); + return; + } + + if (texture_unit == 3) { + if (state.proctex.enable) { + OpReturnValue(ProcTexSampler()); + } else { + OpReturnValue(zero_vec); + } + OpFunctionEnd(); + return; + } + + const Id border_label{OpLabel()}; + const Id not_border_label{OpLabel()}; + + u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit; + const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[texcoord_num])}; + + auto& texture_border_color = state.texture_border_color[texture_unit]; + if (texture_border_color.enable_s || texture_border_color.enable_t) { + const Id texcoord_s{OpCompositeExtract(f32_id, texcoord, 0)}; + const Id texcoord_t{OpCompositeExtract(f32_id, texcoord, 1)}; + + const Id s_lt_zero{OpFOrdLessThan(bool_id, texcoord_s, ConstF32(0.0f))}; + const Id s_gt_one{OpFOrdGreaterThan(bool_id, texcoord_s, ConstF32(1.0f))}; + const Id t_lt_zero{OpFOrdLessThan(bool_id, texcoord_t, ConstF32(0.0f))}; + const Id t_gt_one{OpFOrdGreaterThan(bool_id, texcoord_t, ConstF32(1.0f))}; + + Id cond{}; + if (texture_border_color.enable_s && texture_border_color.enable_t) { + cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(4), s_lt_zero, s_gt_one, + t_lt_zero, t_gt_one)); + } else if (texture_border_color.enable_s) { + cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(2), s_lt_zero, s_gt_one)); + } else if (texture_border_color.enable_t) { + cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(2), t_lt_zero, t_gt_one)); + } + + OpSelectionMerge(not_border_label, spv::SelectionControlMask::MaskNone); + OpBranchConditional(cond, border_label, not_border_label); + + AddLabel(border_label); + const Id border_color{ + GetShaderDataMember(vec_ids.Get(4), ConstS32(29), ConstU32(texture_unit))}; + OpReturnValue(border_color); + + AddLabel(not_border_label); + } + + // PICA's LOD formula for 2D textures. + // This LOD formula is the same as the LOD lower limit defined in OpenGL. + // f(x, y) >= max{m_u, m_v, m_w} + // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) + const auto sample_lod = [&](Id tex_id) { + const Id sampled_image{OpLoad(TypeSampledImage(image2d_id), tex_id)}; + const Id tex_image{OpImage(image2d_id, sampled_image)}; + const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))}; + const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))}; + const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))}; + const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))}; + const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)}; + const Id dx_dy_max{ + OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))}; + const Id lod{OpLog2(f32_id, dx_dy_max)}; + const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(28), ConstU32(texture_unit))}; + const Id biased_lod{OpFAdd(f32_id, lod, lod_bias)}; + return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord, + spv::ImageOperandsMask::Lod, biased_lod); + }; + + const auto sample_3d = [&](Id tex_id, bool projection) { + const Id image_type = tex_id.value == tex_cube_id.value ? image_cube_id : image2d_id; + const Id sampled_image{OpLoad(TypeSampledImage(image_type), tex_id)}; + const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)}; + const Id coord{OpCompositeConstruct(vec_ids.Get(3), OpCompositeExtract(f32_id, texcoord, 0), + OpCompositeExtract(f32_id, texcoord, 1), texcoord0_w)}; + if (projection) { + return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord); + } else { + return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord); + } + }; + + Id ret_val{void_id}; + switch (texture_unit) { + case 0: + // Only unit 0 respects the texturing type + switch (state.texture0_type) { + case Pica::TexturingRegs::TextureConfig::Texture2D: + ret_val = sample_lod(tex0_id); + break; + case Pica::TexturingRegs::TextureConfig::Projection2D: + ret_val = sample_3d(tex0_id, true); + break; + case Pica::TexturingRegs::TextureConfig::TextureCube: + ret_val = sample_3d(tex_cube_id, false); + break; + case Pica::TexturingRegs::TextureConfig::Shadow2D: + ret_val = SampleShadow(); + // case Pica::TexturingRegs::TextureConfig::ShadowCube: + // return "shadowTextureCube(texcoord0, texcoord0_w)"; + break; + default: + LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type); + UNIMPLEMENTED(); + ret_val = zero_vec; + break; + } + break; + case 1: + ret_val = sample_lod(tex1_id); + break; + case 2: + ret_val = sample_lod(tex2_id); + break; + default: + UNREACHABLE(); + break; + } + + OpReturnValue(ret_val); + OpFunctionEnd(); +} + +Id FragmentModule::ProcTexSampler() { // Define noise tables at the beginning of the function if (config.state.proctex.noise_enable) { noise1d_table = @@ -957,24 +1009,11 @@ void FragmentModule::DefineProcTexSampler() { Id uv{}; if (config.state.proctex.coord < 3) { - Id texcoord_id{}; - switch (config.state.proctex.coord.Value()) { - case 0: - texcoord_id = texcoord0_id; - break; - case 1: - texcoord_id = texcoord1_id; - break; - case 2: - texcoord_id = texcoord2_id; - break; - } - - const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)}; + const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[config.state.proctex.coord.Value()])}; uv = OpFAbs(vec_ids.Get(2), texcoord); } else { LOG_CRITICAL(Render_Vulkan, "Unexpected proctex.coord >= 3"); - uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord0_id)); + uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord_id[0])); } // This LOD formula is the same as the LOD upper limit defined in OpenGL. @@ -1058,8 +1097,7 @@ void FragmentModule::DefineProcTexSampler() { final_color = OpCompositeInsert(vec_ids.Get(4), final_alpha, final_color, 3); } - OpReturnValue(final_color); - OpFunctionEnd(); + return final_color; } Id FragmentModule::Byteround(Id variable_id, u32 size) { @@ -1226,13 +1264,13 @@ Id FragmentModule::AppendSource(TevStageConfig::Source source, s32 index) { case Source::SecondaryFragmentColor: return secondary_fragment_color; case Source::Texture0: - return SampleTexture(0); + return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[0]); case Source::Texture1: - return SampleTexture(1); + return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[1]); case Source::Texture2: - return SampleTexture(2); + return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[2]); case Source::Texture3: - return SampleTexture(3); + return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[3]); case Source::PreviousBuffer: return combiner_buffer; case Source::Constant: @@ -1428,9 +1466,9 @@ void FragmentModule::DefineEntryPoint() { const Id main_type{TypeFunction(TypeVoid())}; const Id main_func{OpFunction(TypeVoid(), spv::FunctionControlMask::MaskNone, main_type)}; - AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id, texcoord0_id, - texcoord1_id, texcoord2_id, texcoord0_w_id, normquat_id, view_id, color_id, - gl_frag_coord_id, gl_frag_depth_id); + AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id, + texcoord_id[0], texcoord_id[1], texcoord_id[2], texcoord0_w_id, normquat_id, + view_id, color_id, gl_frag_coord_id, gl_frag_depth_id); AddExecutionMode(main_func, spv::ExecutionMode::OriginUpperLeft); AddExecutionMode(main_func, spv::ExecutionMode::DepthReplacing); } @@ -1443,21 +1481,25 @@ void FragmentModule::DefineUniformStructs() { const Id light_src_array_id{TypeArray(light_src_struct_id, ConstU32(NUM_LIGHTS))}; const Id lighting_lut_array_id{TypeArray(ivec_ids.Get(4), ConstU32(NUM_LIGHTING_SAMPLERS / 4))}; const Id const_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_TEV_STAGES))}; + const Id border_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_NON_PROC_TEX_UNITS))}; - const Id shader_data_struct_id{TypeStruct( - i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id, i32_id, i32_id, i32_id, i32_id, - i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, u32_id, lighting_lut_array_id, - vec_ids.Get(3), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(3), - light_src_array_id, const_color_array_id, vec_ids.Get(4), vec_ids.Get(3), vec_ids.Get(4))}; + const Id shader_data_struct_id{ + TypeStruct(i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id, i32_id, i32_id, i32_id, + i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, u32_id, + lighting_lut_array_id, vec_ids.Get(3), vec_ids.Get(2), vec_ids.Get(2), + vec_ids.Get(2), vec_ids.Get(3), light_src_array_id, const_color_array_id, + vec_ids.Get(4), vec_ids.Get(3), border_color_array_id, vec_ids.Get(4))}; constexpr std::array light_src_offsets{0u, 16u, 32u, 48u, 64u, 80u, 92u, 96u}; - constexpr std::array shader_data_offsets{ - 0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u, 32u, 36u, 40u, 44u, 48u, 52u, 56u, - 60u, 64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u, 224u, 240u, 1136u, 1232u, 1248u, 1264u}; + constexpr std::array shader_data_offsets{0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u, + 32u, 36u, 40u, 44u, 48u, 52u, 56u, 60u, + 64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u, + 224u, 240u, 1136u, 1232u, 1248u, 1264u, 1312u}; Decorate(lighting_lut_array_id, spv::Decoration::ArrayStride, 16u); Decorate(light_src_array_id, spv::Decoration::ArrayStride, 112u); Decorate(const_color_array_id, spv::Decoration::ArrayStride, 16u); + Decorate(border_color_array_id, spv::Decoration::ArrayStride, 16u); for (u32 i = 0; i < static_cast(light_src_offsets.size()); i++) { MemberDecorate(light_src_struct_id, i, spv::Decoration::Offset, light_src_offsets[i]); } @@ -1475,9 +1517,9 @@ void FragmentModule::DefineUniformStructs() { void FragmentModule::DefineInterface() { // Define interface block primary_color_id = DefineInput(vec_ids.Get(4), 1); - texcoord0_id = DefineInput(vec_ids.Get(2), 2); - texcoord1_id = DefineInput(vec_ids.Get(2), 3); - texcoord2_id = DefineInput(vec_ids.Get(2), 4); + texcoord_id[0] = DefineInput(vec_ids.Get(2), 2); + texcoord_id[1] = DefineInput(vec_ids.Get(2), 3); + texcoord_id[2] = DefineInput(vec_ids.Get(2), 4); texcoord0_w_id = DefineInput(f32_id, 5); normquat_id = DefineInput(vec_ids.Get(4), 6); view_id = DefineInput(vec_ids.Get(3), 7); diff --git a/src/video_core/renderer_vulkan/vk_shader_gen_spv.h b/src/video_core/renderer_vulkan/vk_shader_gen_spv.h index 98631423f5..32bac11c28 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen_spv.h @@ -30,6 +30,8 @@ class FragmentModule : public Sirit::Module { static constexpr u32 NUM_TEV_STAGES = 6; static constexpr u32 NUM_LIGHTS = 8; static constexpr u32 NUM_LIGHTING_SAMPLERS = 24; + static constexpr u32 NUM_TEX_UNITS = 4; + static constexpr u32 NUM_NON_PROC_TEX_UNITS = 3; public: explicit FragmentModule(Core::TelemetrySession& telemetry, const PicaFSConfig& config); @@ -57,15 +59,15 @@ private: /// Writes the code to emulate the specified TEV stage void WriteTevStage(s32 index); - /// Defines the tex3 proctex sampling function - void DefineProcTexSampler(); + /// Defines the basic texture sampling functions for a unit + void DefineTexSampler(u32 texture_unit); + + /// Function for sampling the procedurally generated texture unit. + Id ProcTexSampler(); /// Writes the if-statement condition used to evaluate alpha testing. void WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func); - /// Samples the current fragment texel from the provided texture unit - [[nodiscard]] Id SampleTexture(u32 texture_unit); - /// Samples the current fragment texel from shadow plane [[nodiscard]] Id SampleShadow(); @@ -237,9 +239,7 @@ private: Id shader_data_id{}; Id primary_color_id{}; - Id texcoord0_id{}; - Id texcoord1_id{}; - Id texcoord2_id{}; + Id texcoord_id[NUM_NON_PROC_TEX_UNITS]{}; Id texcoord0_w_id{}; Id normquat_id{}; Id view_id{}; @@ -276,7 +276,7 @@ private: Id alpha_results_2{}; Id alpha_results_3{}; - Id proctex_func{}; + Id sample_tex_unit_func[NUM_TEX_UNITS]{}; Id noise1d_table{}; Id noise2d_table{}; Id lut_offsets{}; diff --git a/src/video_core/shader/shader_uniforms.cpp b/src/video_core/shader/shader_uniforms.cpp index 8247d697b3..baa4964744 100644 --- a/src/video_core/shader/shader_uniforms.cpp +++ b/src/video_core/shader/shader_uniforms.cpp @@ -67,6 +67,7 @@ layout ({}std140) uniform shader_data {{ vec4 const_color[NUM_TEV_STAGES]; vec4 tev_combiner_buffer_color; vec3 tex_lod_bias; + vec4 tex_border_color[3]; vec4 clip_coef; }}; )"; diff --git a/src/video_core/shader/shader_uniforms.h b/src/video_core/shader/shader_uniforms.h index 1cc8e574f8..168f3f14c4 100644 --- a/src/video_core/shader/shader_uniforms.h +++ b/src/video_core/shader/shader_uniforms.h @@ -64,10 +64,11 @@ struct UniformData { alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages alignas(16) Common::Vec4f tev_combiner_buffer_color; alignas(16) Common::Vec3f tex_lod_bias; + alignas(16) Common::Vec4f tex_border_color[3]; alignas(16) Common::Vec4f clip_coef; }; -static_assert(sizeof(UniformData) == 0x500, +static_assert(sizeof(UniformData) == 0x530, "The size of the UniformData does not match the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");