Merge pull request #4807 from ReinUsesLisp/glasm-robust-ssbo
gl_arb_decompiler: Implement robust buffer operations
This commit is contained in:
		
						commit
						536c51912d
					
				| @ -376,9 +376,11 @@ private: | |||||||
|         std::string temporary = AllocTemporary(); |         std::string temporary = AllocTemporary(); | ||||||
|         std::string address; |         std::string address; | ||||||
|         std::string_view opname; |         std::string_view opname; | ||||||
|  |         bool robust = false; | ||||||
|         if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { |         if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||||||
|             address = GlobalMemoryPointer(*gmem); |             address = GlobalMemoryPointer(*gmem); | ||||||
|             opname = "ATOM"; |             opname = "ATOM"; | ||||||
|  |             robust = true; | ||||||
|         } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { |         } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||||||
|             address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); |             address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | ||||||
|             opname = "ATOMS"; |             opname = "ATOMS"; | ||||||
| @ -386,7 +388,15 @@ private: | |||||||
|             UNREACHABLE(); |             UNREACHABLE(); | ||||||
|             return "{0, 0, 0, 0}"; |             return "{0, 0, 0, 0}"; | ||||||
|         } |         } | ||||||
|  |         if (robust) { | ||||||
|  |             AddLine("IF NE.x;"); | ||||||
|  |         } | ||||||
|         AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); |         AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); | ||||||
|  |         if (robust) { | ||||||
|  |             AddLine("ELSE;"); | ||||||
|  |             AddLine("MOV.S {}, 0;", temporary); | ||||||
|  |             AddLine("ENDIF;"); | ||||||
|  |         } | ||||||
|         return temporary; |         return temporary; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() { | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ARBDecompiler::DeclareGlobalMemory() { | void ARBDecompiler::DeclareGlobalMemory() { | ||||||
|     const std::size_t num_entries = ir.GetGlobalMemory().size(); |     const size_t num_entries = ir.GetGlobalMemory().size(); | ||||||
|     if (num_entries > 0) { |     if (num_entries > 0) { | ||||||
|         const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; |         AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); | ||||||
|         AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) { | |||||||
| 
 | 
 | ||||||
|     if (const auto gmem = std::get_if<GmemNode>(&*node)) { |     if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||||||
|         std::string temporary = AllocTemporary(); |         std::string temporary = AllocTemporary(); | ||||||
|         AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem)); |         AddLine("MOV {}, 0;", temporary); | ||||||
|  |         AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); | ||||||
|         return temporary; |         return temporary; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @ -1441,18 +1451,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) { | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { | std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { | ||||||
|  |     // Read a bindless SSBO, return its address and set CC accordingly
 | ||||||
|  |     // address = c[binding].xy
 | ||||||
|  |     // length  = c[binding].z
 | ||||||
|     const u32 binding = global_memory_names.at(gmem.GetDescriptor()); |     const u32 binding = global_memory_names.at(gmem.GetDescriptor()); | ||||||
|     const char result_swizzle = binding % 2 == 0 ? 'x' : 'y'; |  | ||||||
| 
 | 
 | ||||||
|     const std::string pointer = AllocLongVectorTemporary(); |     const std::string pointer = AllocLongVectorTemporary(); | ||||||
|     std::string temporary = AllocTemporary(); |     std::string temporary = AllocTemporary(); | ||||||
| 
 | 
 | ||||||
|     const u32 local_index = binding / 2; |     AddLine("PK64.U {}, c[{}];", pointer, binding); | ||||||
|     AddLine("PK64.U {}, c[{}];", pointer, local_index); |  | ||||||
|     AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), |     AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), | ||||||
|             Visit(gmem.GetBaseAddress())); |             Visit(gmem.GetBaseAddress())); | ||||||
|     AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); |     AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); | ||||||
|     AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); |     AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); | ||||||
|  |     // Compare offset to length and set CC
 | ||||||
|  |     AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); | ||||||
|     return fmt::format("{}.x", pointer); |     return fmt::format("{}.x", pointer); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -1552,7 +1565,9 @@ std::string ARBDecompiler::Assign(Operation operation) { | |||||||
|         ResetTemporaries(); |         ResetTemporaries(); | ||||||
|         return {}; |         return {}; | ||||||
|     } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |     } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||||||
|  |         AddLine("IF NE.x;"); | ||||||
|         AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); |         AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); | ||||||
|  |         AddLine("ENDIF;"); | ||||||
|         ResetTemporaries(); |         ResetTemporaries(); | ||||||
|         return {}; |         return {}; | ||||||
|     } else { |     } else { | ||||||
|  | |||||||
| @ -139,16 +139,12 @@ void oglEnable(GLenum cap, bool state) { | |||||||
|     (state ? glEnable : glDisable)(cap); |     (state ? glEnable : glDisable)(cap); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { | void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) { | ||||||
|     if (num_entries == 0) { |     if (num_ssbos == 0) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     if (num_entries % 2 == 1) { |     glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos), | ||||||
|         pointers[num_entries] = 0; |                                     reinterpret_cast<const GLuint*>(ssbos)); | ||||||
|     } |  | ||||||
|     const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2); |  | ||||||
|     glProgramLocalParametersI4uivNV(target, 0, num_vectors, |  | ||||||
|                                     reinterpret_cast<const GLuint*>(pointers)); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| @ -900,11 +896,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { | ||||||
|     static constexpr std::array PARAMETER_LUT = { |     static constexpr std::array PARAMETER_LUT{ | ||||||
|         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | ||||||
|         GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, |         GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | ||||||
|         GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; |         GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, | ||||||
| 
 |     }; | ||||||
|     MICROPROFILE_SCOPE(OpenGL_UBO); |     MICROPROFILE_SCOPE(OpenGL_UBO); | ||||||
|     const auto& stages = maxwell3d.state.shader_stages; |     const auto& stages = maxwell3d.state.shader_stages; | ||||||
|     const auto& shader_stage = stages[stage_index]; |     const auto& shader_stage = stages[stage_index]; | ||||||
| @ -1007,8 +1003,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh | |||||||
|     const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; |     const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; | ||||||
|     const auto& entries{shader->GetEntries().global_memory_entries}; |     const auto& entries{shader->GetEntries().global_memory_entries}; | ||||||
| 
 | 
 | ||||||
|     std::array<GLuint64EXT, 32> pointers; |     std::array<BindlessSSBO, 32> ssbos; | ||||||
|     ASSERT(entries.size() < pointers.size()); |     ASSERT(entries.size() < ssbos.size()); | ||||||
| 
 | 
 | ||||||
|     const bool assembly_shaders = device.UseAssemblyShaders(); |     const bool assembly_shaders = device.UseAssemblyShaders(); | ||||||
|     u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; |     u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; | ||||||
| @ -1016,11 +1012,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh | |||||||
|         const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; |         const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; | ||||||
|         const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; |         const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | ||||||
|         const u32 size{gpu_memory.Read<u32>(addr + 8)}; |         const u32 size{gpu_memory.Read<u32>(addr + 8)}; | ||||||
|         SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); |         SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); | ||||||
|         ++binding; |         ++binding; | ||||||
|     } |     } | ||||||
|     if (assembly_shaders) { |     if (assembly_shaders) { | ||||||
|         UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size()); |         UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size()); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -1028,29 +1024,32 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { | |||||||
|     const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; |     const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; | ||||||
|     const auto& entries{kernel->GetEntries().global_memory_entries}; |     const auto& entries{kernel->GetEntries().global_memory_entries}; | ||||||
| 
 | 
 | ||||||
|     std::array<GLuint64EXT, 32> pointers; |     std::array<BindlessSSBO, 32> ssbos; | ||||||
|     ASSERT(entries.size() < pointers.size()); |     ASSERT(entries.size() < ssbos.size()); | ||||||
| 
 | 
 | ||||||
|     u32 binding = 0; |     u32 binding = 0; | ||||||
|     for (const auto& entry : entries) { |     for (const auto& entry : entries) { | ||||||
|         const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; |         const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; | ||||||
|         const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; |         const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | ||||||
|         const u32 size{gpu_memory.Read<u32>(addr + 8)}; |         const u32 size{gpu_memory.Read<u32>(addr + 8)}; | ||||||
|         SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); |         SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); | ||||||
|         ++binding; |         ++binding; | ||||||
|     } |     } | ||||||
|     if (device.UseAssemblyShaders()) { |     if (device.UseAssemblyShaders()) { | ||||||
|         UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size()); |         UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size()); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, | ||||||
|                                          GPUVAddr gpu_addr, std::size_t size, |                                          GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) { | ||||||
|                                          GLuint64EXT* pointer) { |     const size_t alignment{device.GetShaderStorageBufferAlignment()}; | ||||||
|     const std::size_t alignment{device.GetShaderStorageBufferAlignment()}; |  | ||||||
|     const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); |     const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); | ||||||
|     if (device.UseAssemblyShaders()) { |     if (device.UseAssemblyShaders()) { | ||||||
|         *pointer = info.address + info.offset; |         *ssbo = BindlessSSBO{ | ||||||
|  |             .address = static_cast<GLuint64EXT>(info.address + info.offset), | ||||||
|  |             .length = static_cast<GLsizei>(size), | ||||||
|  |             .padding = 0, | ||||||
|  |         }; | ||||||
|     } else { |     } else { | ||||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, |         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, | ||||||
|                           static_cast<GLsizeiptr>(size)); |                           static_cast<GLsizeiptr>(size)); | ||||||
|  | |||||||
| @ -53,6 +53,13 @@ namespace OpenGL { | |||||||
| struct ScreenInfo; | struct ScreenInfo; | ||||||
| struct DrawParameters; | struct DrawParameters; | ||||||
| 
 | 
 | ||||||
|  | struct BindlessSSBO { | ||||||
|  |     GLuint64EXT address; | ||||||
|  |     GLsizei length; | ||||||
|  |     GLsizei padding; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); | ||||||
|  | 
 | ||||||
| class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | ||||||
| public: | public: | ||||||
|     explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, |     explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, | ||||||
| @ -126,7 +133,7 @@ private: | |||||||
| 
 | 
 | ||||||
|     /// Configures a global memory buffer.
 |     /// Configures a global memory buffer.
 | ||||||
|     void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |     void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||||||
|                            std::size_t size, GLuint64EXT* pointer); |                            size_t size, BindlessSSBO* ssbo); | ||||||
| 
 | 
 | ||||||
|     /// Configures the current textures to use for the draw command.
 |     /// Configures the current textures to use for the draw command.
 | ||||||
|     void SetupDrawTextures(std::size_t stage_index, Shader* shader); |     void SetupDrawTextures(std::size_t stage_index, Shader* shader); | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 LC
						LC