diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 2e1e96c813..acf475289c 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -791,6 +791,12 @@ union Instruction {
         BitField<20, 24, s64> immediate_offset;
     } ldg;
 
+    union {
+        BitField<48, 3, UniformType> type;
+        BitField<46, 2, u64> cache_mode;
+        BitField<20, 24, s64> immediate_offset;
+    } stg;
+
     union {
         BitField<0, 3, u64> pred0;
         BitField<3, 3, u64> pred3;
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 8d9ee81f18..ea4a593afb 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -14,28 +14,28 @@
 
 namespace OpenGL {
 
-CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
-    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} {
+CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
+      max_size{max_size} {
     buffer.Create();
-    // Bind and unbind the buffer so it gets allocated by the driver
-    glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
-    glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
     LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
 }
 
-void CachedGlobalRegion::Reload(u32 size_) {
-    constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
+CachedGlobalRegion::~CachedGlobalRegion() = default;
 
+void CachedGlobalRegion::Reload(u32 size_) {
     size = size_;
     if (size > max_size) {
         size = max_size;
-        LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
+        LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
                      max_size);
     }
+    glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
+}
 
-    // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
-    glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
-    glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
+void CachedGlobalRegion::Flush() {
+    LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
+    glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
 }
 
 GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
@@ -46,14 +46,16 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr,
     return search->second;
 }
 
-GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
-                                                              u8* host_ptr) {
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
+                                                              u32 size) {
     GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
     if (!region) {
         // No reserved surface available, create a new one and reserve it
         auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-        const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
-        region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
+        const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
+        ASSERT(cpu_addr);
+
+        region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
         ReserveGlobalRegion(region);
     }
     region->Reload(size);
@@ -65,7 +67,11 @@ void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
 }
 
 GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
-    : RasterizerCache{rasterizer} {}
+    : RasterizerCache{rasterizer} {
+    GLint max_ssbo_size_;
+    glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
+    max_ssbo_size = static_cast<u32>(max_ssbo_size_);
+}
 
 GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
     const GLShader::GlobalMemoryEntry& global_region,
@@ -73,7 +79,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
 
     auto& gpu{Core::System::GetInstance().GPU()};
     auto& memory_manager{gpu.MemoryManager()};
-    const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
+    const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
     const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
                     global_region.GetCbufOffset()};
     const auto actual_addr{memory_manager.Read<u64>(addr)};
@@ -85,7 +91,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
 
     if (!region) {
         // No global region found - create a new one
-        region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
+        region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
         Register(region);
     }
 
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 5a21ab66f8..196e6e278f 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -19,7 +19,7 @@ namespace OpenGL {
 
 namespace GLShader {
 class GlobalMemoryEntry;
-} // namespace GLShader
+}
 
 class RasterizerOpenGL;
 class CachedGlobalRegion;
@@ -27,7 +27,8 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
 
 class CachedGlobalRegion final : public RasterizerCacheObject {
 public:
-    explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
+    explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
+    ~CachedGlobalRegion();
 
     VAddr GetCpuAddr() const override {
         return cpu_addr;
@@ -45,14 +46,14 @@ public:
     /// Reloads the global region from guest memory
     void Reload(u32 size_);
 
-    // TODO(Rodrigo): When global memory is written (STG), implement flushing
-    void Flush() override {
-        UNIMPLEMENTED();
-    }
+    void Flush() override;
 
 private:
     VAddr cpu_addr{};
+    u8* host_ptr{};
     u32 size{};
+    u32 max_size{};
+
     OGLBuffer buffer;
 };
 
@@ -66,10 +67,11 @@ public:
 
 private:
     GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
-    GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
+    GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
     void ReserveGlobalRegion(GlobalRegion region);
 
     std::unordered_map<CacheAddr, GlobalRegion> reserve;
+    u32 max_ssbo_size{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index d250d5cbb9..ea42fd0609 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -756,6 +756,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
         return;
     }
     res_cache.FlushRegion(addr, size);
+    global_cache.FlushRegion(addr, size);
 }
 
 void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -953,6 +954,9 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
     for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
         const auto& entry{entries[bindpoint]};
         const auto& region{global_cache.GetGlobalRegion(entry, stage)};
+        if (entry.IsWritten()) {
+            region->MarkAsModified(true, global_cache);
+        }
         bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
                                   static_cast<GLsizeiptr>(region->GetSizeInBytes()));
     }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index e4c64ae71e..d4c2cf80ef 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -71,10 +71,6 @@ public:
     static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
                   "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
 
-    static constexpr std::size_t MaxGlobalMemorySize = 0x10000;
-    static_assert(MaxGlobalMemorySize % sizeof(float) == 0,
-                  "The maximum size of a global memory must be a multiple of the size of float");
-
 private:
     class SamplerInfo {
     public:
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 28e490b3cb..445048daf9 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -45,8 +45,6 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>;
 enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
 constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
     static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
-constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
-    static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
 
 class ShaderWriter {
 public:
@@ -208,8 +206,10 @@ public:
         for (const auto& sampler : ir.GetSamplers()) {
             entries.samplers.emplace_back(sampler);
         }
-        for (const auto& gmem : ir.GetGlobalMemoryBases()) {
-            entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
+        for (const auto& gmem_pair : ir.GetGlobalMemory()) {
+            const auto& [base, usage] = gmem_pair;
+            entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
+                                                       usage.is_read, usage.is_written);
         }
         entries.clip_distances = ir.GetClipDistances();
         entries.shader_length = ir.GetLength();
@@ -380,12 +380,22 @@ private:
     }
 
     void DeclareGlobalMemory() {
-        for (const auto& entry : ir.GetGlobalMemoryBases()) {
+        for (const auto& gmem : ir.GetGlobalMemory()) {
+            const auto& [base, usage] = gmem;
+
+            // Since we don't know how the shader will use the shader, hint the driver to disable as
+            // much optimizations as possible
+            std::string qualifier = "coherent volatile";
+            if (usage.is_read && !usage.is_written)
+                qualifier += " readonly";
+            else if (usage.is_written && !usage.is_read)
+                qualifier += " writeonly";
+
             const std::string binding =
-                fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset);
-            code.AddLine("layout (std430, binding = " + binding + ") buffer " +
-                         GetGlobalMemoryBlock(entry) + " {");
-            code.AddLine("    float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
+                fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset);
+            code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " +
+                         GetGlobalMemoryBlock(base) + " {");
+            code.AddLine("    float " + GetGlobalMemory(base) + "[];");
             code.AddLine("};");
             code.AddNewLine();
         }
@@ -868,6 +878,12 @@ private:
         } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
             target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]";
 
+        } else if (const auto gmem = std::get_if<GmemNode>(dest)) {
+            const std::string real = Visit(gmem->GetRealAddress());
+            const std::string base = Visit(gmem->GetBaseAddress());
+            const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
+            target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+
         } else {
             UNREACHABLE_MSG("Assign called without a proper target");
         }
@@ -1621,9 +1637,7 @@ private:
 
 std::string GetCommonDeclarations() {
     const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
-    const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
     return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
-           "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
            "#define ftoi floatBitsToInt\n"
            "#define ftou floatBitsToUint\n"
            "#define itof intBitsToFloat\n"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 4e04ab2f8e..55b3d4d7b6 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -39,8 +39,9 @@ private:
 
 class GlobalMemoryEntry {
 public:
-    explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset)
-        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+    explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, bool is_written)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{
+                                                                                  is_written} {}
 
     u32 GetCbufIndex() const {
         return cbuf_index;
@@ -50,9 +51,19 @@ public:
         return cbuf_offset;
     }
 
+    bool IsRead() const {
+        return is_read;
+    }
+
+    bool IsWritten() const {
+        return is_written;
+    }
+
 private:
     u32 cbuf_index{};
     u32 cbuf_offset{};
+    bool is_read{};
+    bool is_written{};
 };
 
 struct ShaderEntries {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 8a43eb1576..d5890a3752 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -337,11 +337,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
     for (u32 i = 0; i < global_memory_count; ++i) {
         u32 cbuf_index{};
         u32 cbuf_offset{};
+        u8 is_read{};
+        u8 is_written{};
         if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
-            file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) {
+            file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) ||
+            file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) {
             return {};
         }
-        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset);
+        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
+                                                         is_written != 0);
     }
 
     for (auto& clip_distance : entry.entries.clip_distances) {
@@ -397,7 +402,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu
         return false;
     for (const auto& gmem : entries.global_memory_entries) {
         if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
-            file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) {
+            file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 ||
+            file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 ||
+            file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) {
             return false;
         }
     }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index e0a6f5e876..25500f9a36 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -191,8 +191,9 @@ public:
         for (const auto& cbuf : ir.GetConstantBuffers()) {
             entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
         }
-        for (const auto& gmem : ir.GetGlobalMemoryBases()) {
-            entries.global_buffers.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
+        for (const auto& gmem_pair : ir.GetGlobalMemory()) {
+            const auto& [base, usage] = gmem_pair;
+            entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset);
         }
         for (const auto& sampler : ir.GetSamplers()) {
             entries.samplers.emplace_back(sampler);
@@ -225,7 +226,7 @@ private:
             return current_binding;
         };
         const_buffers_base_binding = Allocate(ir.GetConstantBuffers().size());
-        global_buffers_base_binding = Allocate(ir.GetGlobalMemoryBases().size());
+        global_buffers_base_binding = Allocate(ir.GetGlobalMemory().size());
         samplers_base_binding = Allocate(ir.GetSamplers().size());
 
         ASSERT_MSG(binding_iterator - binding_base < STAGE_BINDING_STRIDE,
@@ -390,14 +391,15 @@ private:
 
     void DeclareGlobalBuffers() {
         u32 binding = global_buffers_base_binding;
-        for (const auto& entry : ir.GetGlobalMemoryBases()) {
+        for (const auto& entry : ir.GetGlobalMemory()) {
+            const auto [base, usage] = entry;
             const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
             AddGlobalVariable(
-                Name(id, fmt::format("gmem_{}_{}", entry.cbuf_index, entry.cbuf_offset)));
+                Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset)));
 
             Decorate(id, spv::Decoration::Binding, binding++);
             Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
-            global_buffers.emplace(entry, id);
+            global_buffers.emplace(base, id);
         }
     }
 
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ea3c71eedd..ff19ada55c 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -18,6 +18,23 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 
+namespace {
+u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
+    switch (uniform_type) {
+    case Tegra::Shader::UniformType::Single:
+        return 1;
+    case Tegra::Shader::UniformType::Double:
+        return 2;
+    case Tegra::Shader::UniformType::Quad:
+    case Tegra::Shader::UniformType::UnsignedQuad:
+        return 4;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
+        return 1;
+    }
+}
+} // namespace
+
 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
@@ -126,45 +143,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::LDG: {
-        const u32 count = [&]() {
-            switch (instr.ldg.type) {
-            case Tegra::Shader::UniformType::Single:
-                return 1;
-            case Tegra::Shader::UniformType::Double:
-                return 2;
-            case Tegra::Shader::UniformType::Quad:
-            case Tegra::Shader::UniformType::UnsignedQuad:
-                return 4;
-            default:
-                UNIMPLEMENTED_MSG("Unimplemented LDG size!");
-                return 1;
-            }
-        }();
-
-        const Node addr_register = GetRegister(instr.gpr8);
-        const Node base_address =
-            TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
-        const auto cbuf = std::get_if<CbufNode>(base_address);
-        ASSERT(cbuf != nullptr);
-        const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
-        ASSERT(cbuf_offset_imm != nullptr);
-        const auto cbuf_offset = cbuf_offset_imm->GetValue();
-
-        bb.push_back(Comment(
-            fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
-
-        const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
-        used_global_memory_bases.insert(descriptor);
-
-        const Node immediate_offset =
-            Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
-        const Node base_real_address =
-            Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
+        const auto [real_address_base, base_address, descriptor] =
+            TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
+                                    static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
 
+        const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
         for (u32 i = 0; i < count; ++i) {
             const Node it_offset = Immediate(i * 4);
             const Node real_address =
-                Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset);
+                Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
             const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
 
             SetTemporal(bb, i, gmem);
@@ -174,6 +161,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::STG: {
+        const auto [real_address_base, base_address, descriptor] =
+            TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
+                                    static_cast<u32>(instr.stg.immediate_offset.Value()), true);
+
+        // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
+        SetTemporal(bb, 0, real_address_base);
+
+        const u32 count = GetUniformTypeElementsCount(instr.stg.type);
+        for (u32 i = 0; i < count; ++i) {
+            SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
+        }
+        for (u32 i = 0; i < count; ++i) {
+            const Node it_offset = Immediate(i * 4);
+            const Node real_address =
+                Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
+            const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
+
+            bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
+        }
+        break;
+    }
     case OpCode::Id::ST_A: {
         UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
                              "Indirect attribute loads are not supported");
@@ -236,4 +245,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
+std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
+                                                                           Node addr_register,
+                                                                           u32 immediate_offset,
+                                                                           bool is_write) {
+    const Node base_address{
+        TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
+    const auto cbuf = std::get_if<CbufNode>(base_address);
+    ASSERT(cbuf != nullptr);
+    const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
+    ASSERT(cbuf_offset_imm != nullptr);
+    const auto cbuf_offset = cbuf_offset_imm->GetValue();
+
+    bb.push_back(
+        Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
+
+    const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
+    const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
+    auto& usage = entry->second;
+    if (is_write) {
+        usage.is_written = true;
+    } else {
+        usage.is_read = true;
+    }
+
+    const auto real_address =
+        Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
+
+    return {real_address, base_address, descriptor};
+}
+
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 4888998d34..1afab08c0a 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -276,6 +276,11 @@ struct GlobalMemoryBase {
     }
 };
 
+struct GlobalMemoryUsage {
+    bool is_read{};
+    bool is_written{};
+};
+
 struct MetaArithmetic {
     bool precise{};
 };
@@ -578,8 +583,8 @@ public:
         return used_clip_distances;
     }
 
-    const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const {
-        return used_global_memory_bases;
+    const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
+        return used_global_memory;
     }
 
     std::size_t GetLength() const {
@@ -781,6 +786,11 @@ private:
 
     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
 
+    std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
+                                                                     Node addr_register,
+                                                                     u32 immediate_offset,
+                                                                     bool is_write);
+
     template <typename... T>
     Node Operation(OperationCode code, const T*... operands) {
         return StoreNode(OperationNode(code, operands...));
@@ -834,7 +844,7 @@ private:
     std::map<u32, ConstBuffer> used_cbufs;
     std::set<Sampler> used_samplers;
     std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
-    std::set<GlobalMemoryBase> used_global_memory_bases;
+    std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
 
     Tegra::Shader::Header header;
 };