Shader_IR: Fix TLD4 and add Bindless Variant.
This commit fixes an issue where not all 4 results of tld4 were being written, the color component was defaulted to red, among other things. It also implements the bindless variant.
This commit is contained in:
		
							parent
							
								
									9f93ad08a5
								
							
						
					
					
						commit
						9293c3a0f2
					
				| @ -1237,6 +1237,32 @@ union Instruction { | |||||||
|         } |         } | ||||||
|     } tld4; |     } tld4; | ||||||
| 
 | 
 | ||||||
|  |     union { | ||||||
|  |         BitField<35, 1, u64> ndv_flag; | ||||||
|  |         BitField<49, 1, u64> nodep_flag; | ||||||
|  |         BitField<50, 1, u64> dc_flag; | ||||||
|  |         BitField<33, 2, u64> info; | ||||||
|  |         BitField<37, 2, u64> component; | ||||||
|  | 
 | ||||||
|  |         bool UsesMiscMode(TextureMiscMode mode) const { | ||||||
|  |             switch (mode) { | ||||||
|  |             case TextureMiscMode::NDV: | ||||||
|  |                 return ndv_flag != 0; | ||||||
|  |             case TextureMiscMode::NODEP: | ||||||
|  |                 return nodep_flag != 0; | ||||||
|  |             case TextureMiscMode::DC: | ||||||
|  |                 return dc_flag != 0; | ||||||
|  |             case TextureMiscMode::AOFFI: | ||||||
|  |                 return info == 1; | ||||||
|  |             case TextureMiscMode::PTP: | ||||||
|  |                 return info == 2; | ||||||
|  |             default: | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |     } tld4_b; | ||||||
|  | 
 | ||||||
|     union { |     union { | ||||||
|         BitField<49, 1, u64> nodep_flag; |         BitField<49, 1, u64> nodep_flag; | ||||||
|         BitField<50, 1, u64> dc_flag; |         BitField<50, 1, u64> dc_flag; | ||||||
| @ -1590,7 +1616,8 @@ public: | |||||||
|         TEXS,   // Texture Fetch with scalar/non-vec4 source/destinations
 |         TEXS,   // Texture Fetch with scalar/non-vec4 source/destinations
 | ||||||
|         TLD,    // Texture Load
 |         TLD,    // Texture Load
 | ||||||
|         TLDS,   // Texture Load with scalar/non-vec4 source/destinations
 |         TLDS,   // Texture Load with scalar/non-vec4 source/destinations
 | ||||||
|         TLD4,   // Texture Load 4
 |         TLD4,   // Texture Gather 4
 | ||||||
|  |         TLD4_B, // Texture Gather 4 Bindless
 | ||||||
|         TLD4S,  // Texture Load 4 with scalar / non - vec4 source / destinations
 |         TLD4S,  // Texture Load 4 with scalar / non - vec4 source / destinations
 | ||||||
|         TMML_B, // Texture Mip Map Level
 |         TMML_B, // Texture Mip Map Level
 | ||||||
|         TMML,   // Texture Mip Map Level
 |         TMML,   // Texture Mip Map Level
 | ||||||
| @ -1881,6 +1908,7 @@ private: | |||||||
|             INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), |             INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), | ||||||
|             INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), |             INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), | ||||||
|             INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |             INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), | ||||||
|  |             INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), | ||||||
|             INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), |             INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), | ||||||
|             INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |             INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | ||||||
|             INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), |             INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), | ||||||
|  | |||||||
| @ -96,6 +96,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  |     case OpCode::Id::TLD4_B: { | ||||||
|  |         is_bindless = true; | ||||||
|  |         [[fallthrough]]; | ||||||
|  |     } | ||||||
|     case OpCode::Id::TLD4: { |     case OpCode::Id::TLD4: { | ||||||
|         ASSERT(instr.tld4.array == 0); |         ASSERT(instr.tld4.array == 0); | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), |         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||||||
| @ -108,11 +112,14 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         const auto texture_type = instr.tld4.texture_type.Value(); |         const auto texture_type = instr.tld4.texture_type.Value(); | ||||||
|         const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); |         const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) | ||||||
|  |                                                : instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||||||
|         const bool is_array = instr.tld4.array != 0; |         const bool is_array = instr.tld4.array != 0; | ||||||
|         const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); |         const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) | ||||||
|  |                                           : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); | ||||||
|         WriteTexInstructionFloat( |         WriteTexInstructionFloat( | ||||||
|             bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); |             bb, instr, | ||||||
|  |             GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless), true); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case OpCode::Id::TLD4S: { |     case OpCode::Id::TLD4S: { | ||||||
| @ -359,10 +366,11 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, | |||||||
|     return *used_samplers.emplace(entry).first; |     return *used_samplers.emplace(entry).first; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, | ||||||
|  |                                         bool is_tld4) { | ||||||
|     u32 dest_elem = 0; |     u32 dest_elem = 0; | ||||||
|     for (u32 elem = 0; elem < 4; ++elem) { |     for (u32 elem = 0; elem < 4; ++elem) { | ||||||
|         if (!instr.tex.IsComponentEnabled(elem)) { |         if (!is_tld4 && !instr.tex.IsComponentEnabled(elem)) { | ||||||
|             // Skip disabled components
 |             // Skip disabled components
 | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
| @ -583,7 +591,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||||||
|                             bool is_array, bool is_aoffi) { |                             bool is_array, bool is_aoffi, bool is_bindless) { | ||||||
|     const std::size_t coord_count = GetCoordCount(texture_type); |     const std::size_t coord_count = GetCoordCount(texture_type); | ||||||
| 
 | 
 | ||||||
|     // If enabled arrays index is always stored in the gpr8 field
 |     // If enabled arrays index is always stored in the gpr8 field
 | ||||||
| @ -597,6 +605,12 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     u64 parameter_register = instr.gpr20.Value(); |     u64 parameter_register = instr.gpr20.Value(); | ||||||
|  | 
 | ||||||
|  |     const auto& sampler = | ||||||
|  |         is_bindless | ||||||
|  |             ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}}) | ||||||
|  |             : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); | ||||||
|  | 
 | ||||||
|     std::vector<Node> aoffi; |     std::vector<Node> aoffi; | ||||||
|     if (is_aoffi) { |     if (is_aoffi) { | ||||||
|         aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); |         aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); | ||||||
| @ -607,12 +621,14 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||||||
|         dc = GetRegister(parameter_register++); |         dc = GetRegister(parameter_register++); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); |     const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) | ||||||
|  |                                        : Immediate(static_cast<u32>(instr.tld4.component)); | ||||||
| 
 | 
 | ||||||
|     Node4 values; |     Node4 values; | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |     for (u32 element = 0; element < values.size(); ++element) { | ||||||
|         auto coords_copy = coords; |         auto coords_copy = coords; | ||||||
|         MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; |         MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component, | ||||||
|  |                          element}; | ||||||
|         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -326,7 +326,7 @@ private: | |||||||
|     Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); |     Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); | ||||||
| 
 | 
 | ||||||
|     void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |     void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | ||||||
|                                   const Node4& components); |                                   const Node4& components, bool is_tld4 = false); | ||||||
| 
 | 
 | ||||||
|     void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |     void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | ||||||
|                                    const Node4& components, bool ignore_mask = false); |                                    const Node4& components, bool ignore_mask = false); | ||||||
| @ -343,7 +343,7 @@ private: | |||||||
|                       bool is_array); |                       bool is_array); | ||||||
| 
 | 
 | ||||||
|     Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |     Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||||||
|                       bool depth_compare, bool is_array, bool is_aoffi); |                       bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless); | ||||||
| 
 | 
 | ||||||
|     Node4 GetTldCode(Tegra::Shader::Instruction instr); |     Node4 GetTldCode(Tegra::Shader::Instruction instr); | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Fernando Sahmkow
						Fernando Sahmkow