Merge pull request #1960 from ReinUsesLisp/shader-ir-ldg
video_core: Implement LDG through heuristics based on IR
This commit is contained in:
		
						commit
						52bb524526
					
				@ -87,6 +87,7 @@ add_library(video_core STATIC
 | 
			
		||||
    shader/decode.cpp
 | 
			
		||||
    shader/shader_ir.cpp
 | 
			
		||||
    shader/shader_ir.h
 | 
			
		||||
    shader/track.cpp
 | 
			
		||||
    surface.cpp
 | 
			
		||||
    surface.h
 | 
			
		||||
    textures/astc.cpp
 | 
			
		||||
 | 
			
		||||
@ -208,6 +208,8 @@ enum class UniformType : u64 {
 | 
			
		||||
    SignedShort = 3,
 | 
			
		||||
    Single = 4,
 | 
			
		||||
    Double = 5,
 | 
			
		||||
    Quad = 6,
 | 
			
		||||
    UnsignedQuad = 7,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum class StoreType : u64 {
 | 
			
		||||
@ -784,6 +786,12 @@ union Instruction {
 | 
			
		||||
        BitField<44, 2, u64> unknown;
 | 
			
		||||
    } st_l;
 | 
			
		||||
 | 
			
		||||
    union {
 | 
			
		||||
        BitField<48, 3, UniformType> type;
 | 
			
		||||
        BitField<46, 2, u64> cache_mode;
 | 
			
		||||
        BitField<20, 24, s64> immediate_offset;
 | 
			
		||||
    } ldg;
 | 
			
		||||
 | 
			
		||||
    union {
 | 
			
		||||
        BitField<0, 3, u64> pred0;
 | 
			
		||||
        BitField<3, 3, u64> pred3;
 | 
			
		||||
 | 
			
		||||
@ -4,8 +4,13 @@
 | 
			
		||||
 | 
			
		||||
#include <glad/glad.h>
 | 
			
		||||
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "common/logging/log.h"
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "core/memory.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_global_cache.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 | 
			
		||||
#include "video_core/renderer_opengl/utils.h"
 | 
			
		||||
 | 
			
		||||
namespace OpenGL {
 | 
			
		||||
@ -18,7 +23,72 @@ CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{
 | 
			
		||||
    LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CachedGlobalRegion::Reload(u32 size_) {
 | 
			
		||||
    constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
 | 
			
		||||
 | 
			
		||||
    size = size_;
 | 
			
		||||
    if (size > max_size) {
 | 
			
		||||
        size = max_size;
 | 
			
		||||
        LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
 | 
			
		||||
                     max_size);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
 | 
			
		||||
    glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
 | 
			
		||||
    glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
 | 
			
		||||
    const auto search{reserve.find(addr)};
 | 
			
		||||
    if (search == reserve.end()) {
 | 
			
		||||
        return {};
 | 
			
		||||
    }
 | 
			
		||||
    return search->second;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
 | 
			
		||||
    GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
 | 
			
		||||
    if (!region) {
 | 
			
		||||
        // No reserved surface available, create a new one and reserve it
 | 
			
		||||
        region = std::make_shared<CachedGlobalRegion>(addr, size);
 | 
			
		||||
        ReserveGlobalRegion(region);
 | 
			
		||||
    }
 | 
			
		||||
    region->Reload(size);
 | 
			
		||||
    return region;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
 | 
			
		||||
    reserve[region->GetAddr()] = region;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
 | 
			
		||||
    : RasterizerCache{rasterizer} {}
 | 
			
		||||
 | 
			
		||||
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
 | 
			
		||||
    const GLShader::GlobalMemoryEntry& global_region,
 | 
			
		||||
    Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
 | 
			
		||||
 | 
			
		||||
    auto& gpu{Core::System::GetInstance().GPU()};
 | 
			
		||||
    const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
 | 
			
		||||
    const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
 | 
			
		||||
        cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
 | 
			
		||||
    ASSERT(cbuf_addr);
 | 
			
		||||
 | 
			
		||||
    const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
 | 
			
		||||
    const auto size = Memory::Read32(*cbuf_addr + 8);
 | 
			
		||||
    const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
 | 
			
		||||
    ASSERT(actual_addr);
 | 
			
		||||
 | 
			
		||||
    // Look up global region in the cache based on address
 | 
			
		||||
    GlobalRegion region = TryGet(*actual_addr);
 | 
			
		||||
 | 
			
		||||
    if (!region) {
 | 
			
		||||
        // No global region found - create a new one
 | 
			
		||||
        region = GetUncachedGlobalRegion(*actual_addr, size);
 | 
			
		||||
        Register(region);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return region;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace OpenGL
 | 
			
		||||
 | 
			
		||||
@ -5,9 +5,13 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <unordered_map>
 | 
			
		||||
 | 
			
		||||
#include <glad/glad.h>
 | 
			
		||||
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/rasterizer_cache.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
 | 
			
		||||
 | 
			
		||||
@ -40,6 +44,9 @@ public:
 | 
			
		||||
        return buffer.handle;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Reloads the global region from guest memory
 | 
			
		||||
    void Reload(u32 size_);
 | 
			
		||||
 | 
			
		||||
    // TODO(Rodrigo): When global memory is written (STG), implement flushing
 | 
			
		||||
    void Flush() override {
 | 
			
		||||
        UNIMPLEMENTED();
 | 
			
		||||
@ -55,6 +62,17 @@ private:
 | 
			
		||||
class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
 | 
			
		||||
public:
 | 
			
		||||
    explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
 | 
			
		||||
 | 
			
		||||
    /// Gets the current specified shader stage program
 | 
			
		||||
    GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
 | 
			
		||||
                                 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
 | 
			
		||||
    GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
 | 
			
		||||
    void ReserveGlobalRegion(const GlobalRegion& region);
 | 
			
		||||
 | 
			
		||||
    std::unordered_map<VAddr, GlobalRegion> reserve;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace OpenGL
 | 
			
		||||
 | 
			
		||||
@ -300,6 +300,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
 | 
			
		||||
    // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
 | 
			
		||||
    // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
 | 
			
		||||
    u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
 | 
			
		||||
    u32 current_gmem_bindpoint = 0;
 | 
			
		||||
    u32 current_texture_bindpoint = 0;
 | 
			
		||||
    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
 | 
			
		||||
 | 
			
		||||
@ -358,6 +359,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
 | 
			
		||||
            SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
 | 
			
		||||
                              current_constbuffer_bindpoint);
 | 
			
		||||
 | 
			
		||||
        // Configure global memory regions for this shader stage.
 | 
			
		||||
        current_gmem_bindpoint = SetupGlobalRegions(static_cast<Maxwell::ShaderStage>(stage),
 | 
			
		||||
                                                    shader, primitive_mode, current_gmem_bindpoint);
 | 
			
		||||
 | 
			
		||||
        // Configure the textures for this shader stage.
 | 
			
		||||
        current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
 | 
			
		||||
                                                  primitive_mode, current_texture_bindpoint);
 | 
			
		||||
@ -993,6 +998,23 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
 | 
			
		||||
    return current_bindpoint + static_cast<u32>(entries.size());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
u32 RasterizerOpenGL::SetupGlobalRegions(Maxwell::ShaderStage stage, Shader& shader,
 | 
			
		||||
                                         GLenum primitive_mode, u32 current_bindpoint) {
 | 
			
		||||
    for (const auto& global_region : shader->GetShaderEntries().global_memory_entries) {
 | 
			
		||||
        const auto& region =
 | 
			
		||||
            global_cache.GetGlobalRegion(global_region, static_cast<Maxwell::ShaderStage>(stage));
 | 
			
		||||
        const GLuint block_index{shader->GetProgramResourceIndex(global_region)};
 | 
			
		||||
        ASSERT(block_index != GL_INVALID_INDEX);
 | 
			
		||||
 | 
			
		||||
        glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
 | 
			
		||||
        glShaderStorageBlockBinding(shader->GetProgramHandle(primitive_mode), block_index,
 | 
			
		||||
                                    current_bindpoint);
 | 
			
		||||
        ++current_bindpoint;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return current_bindpoint;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
 | 
			
		||||
                                    GLenum primitive_mode, u32 current_unit) {
 | 
			
		||||
    MICROPROFILE_SCOPE(OpenGL_Texture);
 | 
			
		||||
 | 
			
		||||
@ -137,6 +137,16 @@ private:
 | 
			
		||||
    u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
 | 
			
		||||
                          GLenum primitive_mode, u32 current_bindpoint);
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Configures the current global memory regions to use for the draw command.
 | 
			
		||||
     * @param stage The shader stage to configure buffers for.
 | 
			
		||||
     * @param shader The shader object that contains the specified stage.
 | 
			
		||||
     * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
 | 
			
		||||
     * @returns The next available bindpoint for use in the next shader stage.
 | 
			
		||||
     */
 | 
			
		||||
    u32 SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
 | 
			
		||||
                           GLenum primitive_mode, u32 current_bindpoint);
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Configures the current textures to use for the draw command.
 | 
			
		||||
     * @param stage The shader stage to configure textures for.
 | 
			
		||||
 | 
			
		||||
@ -108,11 +108,23 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
 | 
			
		||||
    const auto search{resource_cache.find(buffer.GetHash())};
 | 
			
		||||
    if (search == resource_cache.end()) {
 | 
			
		||||
    const auto search{cbuf_resource_cache.find(buffer.GetHash())};
 | 
			
		||||
    if (search == cbuf_resource_cache.end()) {
 | 
			
		||||
        const GLuint index{
 | 
			
		||||
            glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
 | 
			
		||||
        resource_cache[buffer.GetHash()] = index;
 | 
			
		||||
        cbuf_resource_cache[buffer.GetHash()] = index;
 | 
			
		||||
        return index;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return search->second;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
GLuint CachedShader::GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem) {
 | 
			
		||||
    const auto search{gmem_resource_cache.find(global_mem.GetHash())};
 | 
			
		||||
    if (search == gmem_resource_cache.end()) {
 | 
			
		||||
        const GLuint index{glGetProgramResourceIndex(program.handle, GL_SHADER_STORAGE_BLOCK,
 | 
			
		||||
                                                     global_mem.GetName().c_str())};
 | 
			
		||||
        gmem_resource_cache[global_mem.GetHash()] = index;
 | 
			
		||||
        return index;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -76,6 +76,9 @@ public:
 | 
			
		||||
    /// Gets the GL program resource location for the specified resource, caching as needed
 | 
			
		||||
    GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
 | 
			
		||||
 | 
			
		||||
    /// Gets the GL program resource location for the specified resource, caching as needed
 | 
			
		||||
    GLuint GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem);
 | 
			
		||||
 | 
			
		||||
    /// Gets the GL uniform location for the specified resource, caching as needed
 | 
			
		||||
    GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
 | 
			
		||||
 | 
			
		||||
@ -107,7 +110,8 @@ private:
 | 
			
		||||
        OGLProgram triangles_adjacency;
 | 
			
		||||
    } geometry_programs;
 | 
			
		||||
 | 
			
		||||
    std::map<u32, GLuint> resource_cache;
 | 
			
		||||
    std::map<u32, GLuint> cbuf_resource_cache;
 | 
			
		||||
    std::map<u32, GLuint> gmem_resource_cache;
 | 
			
		||||
    std::map<u32, GLint> uniform_cache;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -34,6 +34,8 @@ using Operation = const OperationNode&;
 | 
			
		||||
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
 | 
			
		||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
 | 
			
		||||
    static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
 | 
			
		||||
constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
 | 
			
		||||
    static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
 | 
			
		||||
 | 
			
		||||
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
 | 
			
		||||
 | 
			
		||||
@ -143,6 +145,7 @@ public:
 | 
			
		||||
        DeclareInputAttributes();
 | 
			
		||||
        DeclareOutputAttributes();
 | 
			
		||||
        DeclareConstantBuffers();
 | 
			
		||||
        DeclareGlobalMemory();
 | 
			
		||||
        DeclareSamplers();
 | 
			
		||||
 | 
			
		||||
        code.AddLine("void execute_" + suffix + "() {");
 | 
			
		||||
@ -190,12 +193,15 @@ public:
 | 
			
		||||
    ShaderEntries GetShaderEntries() const {
 | 
			
		||||
        ShaderEntries entries;
 | 
			
		||||
        for (const auto& cbuf : ir.GetConstantBuffers()) {
 | 
			
		||||
            ConstBufferEntry desc(cbuf.second, stage, GetConstBufferBlock(cbuf.first), cbuf.first);
 | 
			
		||||
            entries.const_buffers.push_back(desc);
 | 
			
		||||
            entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first),
 | 
			
		||||
                                               cbuf.first);
 | 
			
		||||
        }
 | 
			
		||||
        for (const auto& sampler : ir.GetSamplers()) {
 | 
			
		||||
            SamplerEntry desc(sampler, stage, GetSampler(sampler));
 | 
			
		||||
            entries.samplers.push_back(desc);
 | 
			
		||||
            entries.samplers.emplace_back(sampler, stage, GetSampler(sampler));
 | 
			
		||||
        }
 | 
			
		||||
        for (const auto& gmem : ir.GetGlobalMemoryBases()) {
 | 
			
		||||
            entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage,
 | 
			
		||||
                                                       GetGlobalMemoryBlock(gmem));
 | 
			
		||||
        }
 | 
			
		||||
        entries.clip_distances = ir.GetClipDistances();
 | 
			
		||||
        entries.shader_length = ir.GetLength();
 | 
			
		||||
@ -375,6 +381,15 @@ private:
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void DeclareGlobalMemory() {
 | 
			
		||||
        for (const auto& entry : ir.GetGlobalMemoryBases()) {
 | 
			
		||||
            code.AddLine("layout (std430) buffer " + GetGlobalMemoryBlock(entry) + " {");
 | 
			
		||||
            code.AddLine("    float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
 | 
			
		||||
            code.AddLine("};");
 | 
			
		||||
            code.AddNewLine();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void DeclareSamplers() {
 | 
			
		||||
        const auto& samplers = ir.GetSamplers();
 | 
			
		||||
        for (const auto& sampler : samplers) {
 | 
			
		||||
@ -538,6 +553,12 @@ private:
 | 
			
		||||
                UNREACHABLE_MSG("Unmanaged offset node type");
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        } else if (const auto gmem = std::get_if<GmemNode>(node)) {
 | 
			
		||||
            const std::string real = Visit(gmem->GetRealAddress());
 | 
			
		||||
            const std::string base = Visit(gmem->GetBaseAddress());
 | 
			
		||||
            const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
 | 
			
		||||
            return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
 | 
			
		||||
 | 
			
		||||
        } else if (const auto lmem = std::get_if<LmemNode>(node)) {
 | 
			
		||||
            return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
 | 
			
		||||
 | 
			
		||||
@ -1471,6 +1492,15 @@ private:
 | 
			
		||||
        return GetDeclarationWithSuffix(index, "cbuf");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
 | 
			
		||||
        return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
 | 
			
		||||
        return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
 | 
			
		||||
                           suffix);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string GetConstBufferBlock(u32 index) const {
 | 
			
		||||
        return GetDeclarationWithSuffix(index, "cbuf_block");
 | 
			
		||||
    }
 | 
			
		||||
@ -1505,8 +1535,10 @@ private:
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::string GetCommonDeclarations() {
 | 
			
		||||
    return "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(MAX_CONSTBUFFER_ELEMENTS) +
 | 
			
		||||
           "\n"
 | 
			
		||||
    const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
 | 
			
		||||
    const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
 | 
			
		||||
    return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
 | 
			
		||||
           "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
 | 
			
		||||
           "#define ftoi floatBitsToInt\n"
 | 
			
		||||
           "#define ftou floatBitsToUint\n"
 | 
			
		||||
           "#define itof intBitsToFloat\n"
 | 
			
		||||
 | 
			
		||||
@ -71,9 +71,43 @@ private:
 | 
			
		||||
    Maxwell::ShaderStage stage{};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class GlobalMemoryEntry {
 | 
			
		||||
public:
 | 
			
		||||
    explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
 | 
			
		||||
                               std::string name)
 | 
			
		||||
        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
 | 
			
		||||
 | 
			
		||||
    u32 GetCbufIndex() const {
 | 
			
		||||
        return cbuf_index;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    u32 GetCbufOffset() const {
 | 
			
		||||
        return cbuf_offset;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const std::string& GetName() const {
 | 
			
		||||
        return name;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Maxwell::ShaderStage GetStage() const {
 | 
			
		||||
        return stage;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    u32 GetHash() const {
 | 
			
		||||
        return (static_cast<u32>(stage) << 24) | (cbuf_index << 16) | cbuf_offset;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    u32 cbuf_index{};
 | 
			
		||||
    u32 cbuf_offset{};
 | 
			
		||||
    Maxwell::ShaderStage stage{};
 | 
			
		||||
    std::string name;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct ShaderEntries {
 | 
			
		||||
    std::vector<ConstBufferEntry> const_buffers;
 | 
			
		||||
    std::vector<SamplerEntry> samplers;
 | 
			
		||||
    std::vector<GlobalMemoryEntry> global_memory_entries;
 | 
			
		||||
    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
 | 
			
		||||
    std::size_t shader_length{};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -4,6 +4,7 @@
 | 
			
		||||
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <fmt/format.h>
 | 
			
		||||
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
@ -119,6 +120,54 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
 | 
			
		||||
        }
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case OpCode::Id::LDG: {
 | 
			
		||||
        const u32 count = [&]() {
 | 
			
		||||
            switch (instr.ldg.type) {
 | 
			
		||||
            case Tegra::Shader::UniformType::Single:
 | 
			
		||||
                return 1;
 | 
			
		||||
            case Tegra::Shader::UniformType::Double:
 | 
			
		||||
                return 2;
 | 
			
		||||
            case Tegra::Shader::UniformType::Quad:
 | 
			
		||||
            case Tegra::Shader::UniformType::UnsignedQuad:
 | 
			
		||||
                return 4;
 | 
			
		||||
            default:
 | 
			
		||||
                UNIMPLEMENTED_MSG("Unimplemented LDG size!");
 | 
			
		||||
                return 1;
 | 
			
		||||
            }
 | 
			
		||||
        }();
 | 
			
		||||
 | 
			
		||||
        const Node addr_register = GetRegister(instr.gpr8);
 | 
			
		||||
        const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size()));
 | 
			
		||||
        const auto cbuf = std::get_if<CbufNode>(base_address);
 | 
			
		||||
        ASSERT(cbuf != nullptr);
 | 
			
		||||
        const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
 | 
			
		||||
        ASSERT(cbuf_offset_imm != nullptr);
 | 
			
		||||
        const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4;
 | 
			
		||||
 | 
			
		||||
        bb.push_back(Comment(
 | 
			
		||||
            fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
 | 
			
		||||
 | 
			
		||||
        const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
 | 
			
		||||
        used_global_memory_bases.insert(descriptor);
 | 
			
		||||
 | 
			
		||||
        const Node immediate_offset =
 | 
			
		||||
            Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
 | 
			
		||||
        const Node base_real_address =
 | 
			
		||||
            Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
 | 
			
		||||
 | 
			
		||||
        for (u32 i = 0; i < count; ++i) {
 | 
			
		||||
            const Node it_offset = Immediate(i * 4);
 | 
			
		||||
            const Node real_address =
 | 
			
		||||
                Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset);
 | 
			
		||||
            const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
 | 
			
		||||
 | 
			
		||||
            SetTemporal(bb, i, gmem);
 | 
			
		||||
        }
 | 
			
		||||
        for (u32 i = 0; i < count; ++i) {
 | 
			
		||||
            SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
 | 
			
		||||
        }
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case OpCode::Id::ST_A: {
 | 
			
		||||
        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
 | 
			
		||||
                             "Indirect attribute loads are not supported");
 | 
			
		||||
 | 
			
		||||
@ -257,6 +257,15 @@ private:
 | 
			
		||||
    bool is_indirect{};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct GlobalMemoryBase {
 | 
			
		||||
    u32 cbuf_index{};
 | 
			
		||||
    u32 cbuf_offset{};
 | 
			
		||||
 | 
			
		||||
    bool operator<(const GlobalMemoryBase& rhs) const {
 | 
			
		||||
        return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct MetaArithmetic {
 | 
			
		||||
    bool precise{};
 | 
			
		||||
};
 | 
			
		||||
@ -478,14 +487,26 @@ private:
 | 
			
		||||
/// Global memory node
 | 
			
		||||
class GmemNode final {
 | 
			
		||||
public:
 | 
			
		||||
    explicit constexpr GmemNode(Node address) : address{address} {}
 | 
			
		||||
    explicit constexpr GmemNode(Node real_address, Node base_address,
 | 
			
		||||
                                const GlobalMemoryBase& descriptor)
 | 
			
		||||
        : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {}
 | 
			
		||||
 | 
			
		||||
    Node GetAddress() const {
 | 
			
		||||
        return address;
 | 
			
		||||
    Node GetRealAddress() const {
 | 
			
		||||
        return real_address;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Node GetBaseAddress() const {
 | 
			
		||||
        return base_address;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const GlobalMemoryBase& GetDescriptor() const {
 | 
			
		||||
        return descriptor;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    const Node address;
 | 
			
		||||
    const Node real_address;
 | 
			
		||||
    const Node base_address;
 | 
			
		||||
    const GlobalMemoryBase descriptor;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/// Commentary, can be dropped
 | 
			
		||||
@ -543,6 +564,10 @@ public:
 | 
			
		||||
        return used_clip_distances;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const {
 | 
			
		||||
        return used_global_memory_bases;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::size_t GetLength() const {
 | 
			
		||||
        return static_cast<std::size_t>(coverage_end * sizeof(u64));
 | 
			
		||||
    }
 | 
			
		||||
@ -734,6 +759,10 @@ private:
 | 
			
		||||
    void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
 | 
			
		||||
                              Node op_c, Node imm_lut, bool sets_cc);
 | 
			
		||||
 | 
			
		||||
    Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor);
 | 
			
		||||
 | 
			
		||||
    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor);
 | 
			
		||||
 | 
			
		||||
    template <typename... T>
 | 
			
		||||
    Node Operation(OperationCode code, const T*... operands) {
 | 
			
		||||
        return StoreNode(OperationNode(code, operands...));
 | 
			
		||||
@ -786,6 +815,7 @@ private:
 | 
			
		||||
    std::map<u32, ConstBuffer> used_cbufs;
 | 
			
		||||
    std::set<Sampler> used_samplers;
 | 
			
		||||
    std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
 | 
			
		||||
    std::set<GlobalMemoryBase> used_global_memory_bases;
 | 
			
		||||
 | 
			
		||||
    Tegra::Shader::Header header;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										76
									
								
								src/video_core/shader/track.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								src/video_core/shader/track.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,76 @@
 | 
			
		||||
// Copyright 2018 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <utility>
 | 
			
		||||
#include <variant>
 | 
			
		||||
 | 
			
		||||
#include "video_core/shader/shader_ir.h"
 | 
			
		||||
 | 
			
		||||
namespace VideoCommon::Shader {
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor,
 | 
			
		||||
                                   OperationCode operation_code) {
 | 
			
		||||
    for (; cursor >= 0; --cursor) {
 | 
			
		||||
        const Node node = code[cursor];
 | 
			
		||||
        if (const auto operation = std::get_if<OperationNode>(node)) {
 | 
			
		||||
            if (operation->GetCode() == operation_code)
 | 
			
		||||
                return {node, cursor};
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return {};
 | 
			
		||||
}
 | 
			
		||||
} // namespace
 | 
			
		||||
 | 
			
		||||
Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) {
 | 
			
		||||
    if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
 | 
			
		||||
        // Cbuf found, but it has to be immediate
 | 
			
		||||
        return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
 | 
			
		||||
    }
 | 
			
		||||
    if (const auto gpr = std::get_if<GprNode>(tracked)) {
 | 
			
		||||
        if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
 | 
			
		||||
            return nullptr;
 | 
			
		||||
        }
 | 
			
		||||
        // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
 | 
			
		||||
        // register that it uses as operand
 | 
			
		||||
        const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
 | 
			
		||||
        if (!source) {
 | 
			
		||||
            return nullptr;
 | 
			
		||||
        }
 | 
			
		||||
        return TrackCbuf(source, code, new_cursor);
 | 
			
		||||
    }
 | 
			
		||||
    if (const auto operation = std::get_if<OperationNode>(tracked)) {
 | 
			
		||||
        for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
 | 
			
		||||
            if (const auto found = TrackCbuf((*operation)[i], code, cursor)) {
 | 
			
		||||
                // Cbuf found in operand
 | 
			
		||||
                return found;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return nullptr;
 | 
			
		||||
    }
 | 
			
		||||
    return nullptr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code,
 | 
			
		||||
                                             s64 cursor) {
 | 
			
		||||
    for (; cursor >= 0; --cursor) {
 | 
			
		||||
        const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
 | 
			
		||||
        if (!found_node) {
 | 
			
		||||
            return {};
 | 
			
		||||
        }
 | 
			
		||||
        const auto operation = std::get_if<OperationNode>(found_node);
 | 
			
		||||
        ASSERT(operation);
 | 
			
		||||
 | 
			
		||||
        const auto& target = (*operation)[0];
 | 
			
		||||
        if (const auto gpr_target = std::get_if<GprNode>(target)) {
 | 
			
		||||
            if (gpr_target->GetIndex() == tracked->GetIndex()) {
 | 
			
		||||
                return {(*operation)[1], new_cursor};
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return {};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace VideoCommon::Shader
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user