Merge pull request #4807 from ReinUsesLisp/glasm-robust-ssbo
gl_arb_decompiler: Implement robust buffer operations
This commit is contained in:
		
						commit
						536c51912d
					
				| @ -376,9 +376,11 @@ private: | ||||
|         std::string temporary = AllocTemporary(); | ||||
|         std::string address; | ||||
|         std::string_view opname; | ||||
|         bool robust = false; | ||||
|         if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||||
|             address = GlobalMemoryPointer(*gmem); | ||||
|             opname = "ATOM"; | ||||
|             robust = true; | ||||
|         } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||||
|             address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | ||||
|             opname = "ATOMS"; | ||||
| @ -386,7 +388,15 @@ private: | ||||
|             UNREACHABLE(); | ||||
|             return "{0, 0, 0, 0}"; | ||||
|         } | ||||
|         if (robust) { | ||||
|             AddLine("IF NE.x;"); | ||||
|         } | ||||
|         AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); | ||||
|         if (robust) { | ||||
|             AddLine("ELSE;"); | ||||
|             AddLine("MOV.S {}, 0;", temporary); | ||||
|             AddLine("ENDIF;"); | ||||
|         } | ||||
|         return temporary; | ||||
|     } | ||||
| 
 | ||||
| @ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() { | ||||
| } | ||||
| 
 | ||||
| void ARBDecompiler::DeclareGlobalMemory() { | ||||
|     const std::size_t num_entries = ir.GetGlobalMemory().size(); | ||||
|     const size_t num_entries = ir.GetGlobalMemory().size(); | ||||
|     if (num_entries > 0) { | ||||
|         const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; | ||||
|         AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1); | ||||
|         AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| @ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) { | ||||
| 
 | ||||
|     if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||||
|         std::string temporary = AllocTemporary(); | ||||
|         AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem)); | ||||
|         AddLine("MOV {}, 0;", temporary); | ||||
|         AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); | ||||
|         return temporary; | ||||
|     } | ||||
| 
 | ||||
| @ -1441,18 +1451,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) { | ||||
| } | ||||
| 
 | ||||
| std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { | ||||
|     // Read a bindless SSBO, return its address and set CC accordingly
 | ||||
|     // address = c[binding].xy
 | ||||
|     // length  = c[binding].z
 | ||||
|     const u32 binding = global_memory_names.at(gmem.GetDescriptor()); | ||||
|     const char result_swizzle = binding % 2 == 0 ? 'x' : 'y'; | ||||
| 
 | ||||
|     const std::string pointer = AllocLongVectorTemporary(); | ||||
|     std::string temporary = AllocTemporary(); | ||||
| 
 | ||||
|     const u32 local_index = binding / 2; | ||||
|     AddLine("PK64.U {}, c[{}];", pointer, local_index); | ||||
|     AddLine("PK64.U {}, c[{}];", pointer, binding); | ||||
|     AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), | ||||
|             Visit(gmem.GetBaseAddress())); | ||||
|     AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); | ||||
|     AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); | ||||
|     AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); | ||||
|     // Compare offset to length and set CC
 | ||||
|     AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); | ||||
|     return fmt::format("{}.x", pointer); | ||||
| } | ||||
| 
 | ||||
| @ -1552,7 +1565,9 @@ std::string ARBDecompiler::Assign(Operation operation) { | ||||
|         ResetTemporaries(); | ||||
|         return {}; | ||||
|     } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||||
|         AddLine("IF NE.x;"); | ||||
|         AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); | ||||
|         AddLine("ENDIF;"); | ||||
|         ResetTemporaries(); | ||||
|         return {}; | ||||
|     } else { | ||||
|  | ||||
| @ -139,16 +139,12 @@ void oglEnable(GLenum cap, bool state) { | ||||
|     (state ? glEnable : glDisable)(cap); | ||||
| } | ||||
| 
 | ||||
| void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { | ||||
|     if (num_entries == 0) { | ||||
| void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) { | ||||
|     if (num_ssbos == 0) { | ||||
|         return; | ||||
|     } | ||||
|     if (num_entries % 2 == 1) { | ||||
|         pointers[num_entries] = 0; | ||||
|     } | ||||
|     const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2); | ||||
|     glProgramLocalParametersI4uivNV(target, 0, num_vectors, | ||||
|                                     reinterpret_cast<const GLuint*>(pointers)); | ||||
|     glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos), | ||||
|                                     reinterpret_cast<const GLuint*>(ssbos)); | ||||
| } | ||||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| @ -900,11 +896,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { | ||||
|     static constexpr std::array PARAMETER_LUT = { | ||||
|         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | ||||
|     static constexpr std::array PARAMETER_LUT{ | ||||
|         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | ||||
|         GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | ||||
|         GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; | ||||
| 
 | ||||
|         GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, | ||||
|     }; | ||||
|     MICROPROFILE_SCOPE(OpenGL_UBO); | ||||
|     const auto& stages = maxwell3d.state.shader_stages; | ||||
|     const auto& shader_stage = stages[stage_index]; | ||||
| @ -1007,8 +1003,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh | ||||
|     const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; | ||||
|     const auto& entries{shader->GetEntries().global_memory_entries}; | ||||
| 
 | ||||
|     std::array<GLuint64EXT, 32> pointers; | ||||
|     ASSERT(entries.size() < pointers.size()); | ||||
|     std::array<BindlessSSBO, 32> ssbos; | ||||
|     ASSERT(entries.size() < ssbos.size()); | ||||
| 
 | ||||
|     const bool assembly_shaders = device.UseAssemblyShaders(); | ||||
|     u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; | ||||
| @ -1016,11 +1012,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh | ||||
|         const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; | ||||
|         const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | ||||
|         const u32 size{gpu_memory.Read<u32>(addr + 8)}; | ||||
|         SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); | ||||
|         SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); | ||||
|         ++binding; | ||||
|     } | ||||
|     if (assembly_shaders) { | ||||
|         UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size()); | ||||
|         UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size()); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| @ -1028,29 +1024,32 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { | ||||
|     const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; | ||||
|     const auto& entries{kernel->GetEntries().global_memory_entries}; | ||||
| 
 | ||||
|     std::array<GLuint64EXT, 32> pointers; | ||||
|     ASSERT(entries.size() < pointers.size()); | ||||
|     std::array<BindlessSSBO, 32> ssbos; | ||||
|     ASSERT(entries.size() < ssbos.size()); | ||||
| 
 | ||||
|     u32 binding = 0; | ||||
|     for (const auto& entry : entries) { | ||||
|         const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; | ||||
|         const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | ||||
|         const u32 size{gpu_memory.Read<u32>(addr + 8)}; | ||||
|         SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); | ||||
|         SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); | ||||
|         ++binding; | ||||
|     } | ||||
|     if (device.UseAssemblyShaders()) { | ||||
|         UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size()); | ||||
|         UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size()); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, | ||||
|                                          GPUVAddr gpu_addr, std::size_t size, | ||||
|                                          GLuint64EXT* pointer) { | ||||
|     const std::size_t alignment{device.GetShaderStorageBufferAlignment()}; | ||||
|                                          GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) { | ||||
|     const size_t alignment{device.GetShaderStorageBufferAlignment()}; | ||||
|     const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); | ||||
|     if (device.UseAssemblyShaders()) { | ||||
|         *pointer = info.address + info.offset; | ||||
|         *ssbo = BindlessSSBO{ | ||||
|             .address = static_cast<GLuint64EXT>(info.address + info.offset), | ||||
|             .length = static_cast<GLsizei>(size), | ||||
|             .padding = 0, | ||||
|         }; | ||||
|     } else { | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, | ||||
|                           static_cast<GLsizeiptr>(size)); | ||||
|  | ||||
| @ -53,6 +53,13 @@ namespace OpenGL { | ||||
| struct ScreenInfo; | ||||
| struct DrawParameters; | ||||
| 
 | ||||
| struct BindlessSSBO { | ||||
|     GLuint64EXT address; | ||||
|     GLsizei length; | ||||
|     GLsizei padding; | ||||
| }; | ||||
| static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); | ||||
| 
 | ||||
| class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | ||||
| public: | ||||
|     explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, | ||||
| @ -126,7 +133,7 @@ private: | ||||
| 
 | ||||
|     /// Configures a global memory buffer.
 | ||||
|     void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||||
|                            std::size_t size, GLuint64EXT* pointer); | ||||
|                            size_t size, BindlessSSBO* ssbo); | ||||
| 
 | ||||
|     /// Configures the current textures to use for the draw command.
 | ||||
|     void SetupDrawTextures(std::size_t stage_index, Shader* shader); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 LC
						LC