gl_shader_cache: Specialize shared memory size
Shared memory was being declared with an undefined size. Specialize from guest GPU parameters the compute shader's shared memory size.
This commit is contained in:
		
							parent
							
								
									4f5d8e4342
								
							
						
					
					
						commit
						dbeb523879
					
				@ -731,7 +731,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
 | 
			
		||||
 | 
			
		||||
    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
 | 
			
		||||
    const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
 | 
			
		||||
                                 launch_desc.block_dim_z);
 | 
			
		||||
                                 launch_desc.block_dim_z, launch_desc.shared_alloc);
 | 
			
		||||
    std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
 | 
			
		||||
    state.draw.program_pipeline = 0;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -8,7 +8,9 @@
 | 
			
		||||
#include <thread>
 | 
			
		||||
#include <unordered_set>
 | 
			
		||||
#include <boost/functional/hash.hpp>
 | 
			
		||||
#include "common/alignment.h"
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "common/logging/log.h"
 | 
			
		||||
#include "common/scope_exit.h"
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "core/frontend/emu_window.h"
 | 
			
		||||
@ -322,6 +324,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
 | 
			
		||||
        source +=
 | 
			
		||||
            fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
 | 
			
		||||
                        variant.block_x, variant.block_y, variant.block_z);
 | 
			
		||||
 | 
			
		||||
        if (variant.shared_memory_size > 0) {
 | 
			
		||||
            source += fmt::format("shared uint smem[{}];",
 | 
			
		||||
                                  Common::AlignUp(variant.shared_memory_size, 4) / 4);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    source += '\n';
 | 
			
		||||
 | 
			
		||||
@ -223,7 +223,7 @@ private:
 | 
			
		||||
    Type type{};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
constexpr const char* GetTypeString(Type type) {
 | 
			
		||||
const char* GetTypeString(Type type) {
 | 
			
		||||
    switch (type) {
 | 
			
		||||
    case Type::Bool:
 | 
			
		||||
        return "bool";
 | 
			
		||||
@ -243,7 +243,7 @@ constexpr const char* GetTypeString(Type type) {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
constexpr const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
 | 
			
		||||
const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
 | 
			
		||||
    switch (image_type) {
 | 
			
		||||
    case Tegra::Shader::ImageType::Texture1D:
 | 
			
		||||
        return "1D";
 | 
			
		||||
@ -522,13 +522,6 @@ private:
 | 
			
		||||
        code.AddNewLine();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void DeclareSharedMemory() {
 | 
			
		||||
        if (stage != ProgramType::Compute) {
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        code.AddLine("shared uint {}[];", GetSharedMemory());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void DeclareInternalFlags() {
 | 
			
		||||
        for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
 | 
			
		||||
            const auto flag_code = static_cast<InternalFlag>(flag);
 | 
			
		||||
@ -867,9 +860,7 @@ private:
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (const auto smem = std::get_if<SmemNode>(&*node)) {
 | 
			
		||||
            return {
 | 
			
		||||
                fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
 | 
			
		||||
                Type::Uint};
 | 
			
		||||
            return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
 | 
			
		||||
@ -1245,9 +1236,7 @@ private:
 | 
			
		||||
                Type::Uint};
 | 
			
		||||
        } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
 | 
			
		||||
            ASSERT(stage == ProgramType::Compute);
 | 
			
		||||
            target = {
 | 
			
		||||
                fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
 | 
			
		||||
                Type::Uint};
 | 
			
		||||
            target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
 | 
			
		||||
        } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
 | 
			
		||||
            const std::string real = Visit(gmem->GetRealAddress()).AsUint();
 | 
			
		||||
            const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
 | 
			
		||||
@ -2170,10 +2159,6 @@ private:
 | 
			
		||||
        return "lmem_" + suffix;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string GetSharedMemory() const {
 | 
			
		||||
        return fmt::format("smem_{}", suffix);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string GetInternalFlag(InternalFlag flag) const {
 | 
			
		||||
        constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
 | 
			
		||||
                                                  "overflow_flag"};
 | 
			
		||||
 | 
			
		||||
@ -52,11 +52,11 @@ struct BindlessSamplerKey {
 | 
			
		||||
    Tegra::Engines::SamplerDescriptor sampler{};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
constexpr u32 NativeVersion = 7;
 | 
			
		||||
constexpr u32 NativeVersion = 8;
 | 
			
		||||
 | 
			
		||||
// Making sure sizes doesn't change by accident
 | 
			
		||||
static_assert(sizeof(BaseBindings) == 16);
 | 
			
		||||
static_assert(sizeof(ProgramVariant) == 28);
 | 
			
		||||
static_assert(sizeof(ProgramVariant) == 32);
 | 
			
		||||
 | 
			
		||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
 | 
			
		||||
    ShaderCacheVersionHash hash{};
 | 
			
		||||
 | 
			
		||||
@ -64,9 +64,10 @@ struct ProgramVariant final {
 | 
			
		||||
        : base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
 | 
			
		||||
 | 
			
		||||
    /// Compute constructor.
 | 
			
		||||
    explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z) noexcept
 | 
			
		||||
        : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)} {
 | 
			
		||||
    }
 | 
			
		||||
    explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z,
 | 
			
		||||
                                      u32 shared_memory_size) noexcept
 | 
			
		||||
        : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
 | 
			
		||||
          shared_memory_size{shared_memory_size} {}
 | 
			
		||||
 | 
			
		||||
    // Graphics specific parameters.
 | 
			
		||||
    BaseBindings base_bindings{};
 | 
			
		||||
@ -76,11 +77,13 @@ struct ProgramVariant final {
 | 
			
		||||
    u32 block_x{};
 | 
			
		||||
    u16 block_y{};
 | 
			
		||||
    u16 block_z{};
 | 
			
		||||
    u32 shared_memory_size{};
 | 
			
		||||
 | 
			
		||||
    bool operator==(const ProgramVariant& rhs) const noexcept {
 | 
			
		||||
        return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z) ==
 | 
			
		||||
               std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y,
 | 
			
		||||
                        rhs.block_z);
 | 
			
		||||
        return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z,
 | 
			
		||||
                        shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode,
 | 
			
		||||
                                                        rhs.block_x, rhs.block_y, rhs.block_z,
 | 
			
		||||
                                                        rhs.shared_memory_size);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    bool operator!=(const ProgramVariant& rhs) const noexcept {
 | 
			
		||||
@ -129,7 +132,8 @@ struct hash<OpenGL::ProgramVariant> {
 | 
			
		||||
               (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
 | 
			
		||||
               static_cast<std::size_t>(variant.block_x) ^
 | 
			
		||||
               (static_cast<std::size_t>(variant.block_y) << 32) ^
 | 
			
		||||
               (static_cast<std::size_t>(variant.block_z) << 48);
 | 
			
		||||
               (static_cast<std::size_t>(variant.block_z) << 48) ^
 | 
			
		||||
               (static_cast<std::size_t>(variant.shared_memory_size) << 16);
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user