Merge pull request #329 from bunnei/shader-gen-part-1
OpenGL shader generation part 1
This commit is contained in:
		
						commit
						fdca7b5f7a
					
				@ -32,6 +32,8 @@ add_library(common STATIC
 | 
			
		||||
    break_points.cpp
 | 
			
		||||
    break_points.h
 | 
			
		||||
    chunk_file.h
 | 
			
		||||
    cityhash.cpp
 | 
			
		||||
    cityhash.h
 | 
			
		||||
    code_block.h
 | 
			
		||||
    color.h
 | 
			
		||||
    common_funcs.h
 | 
			
		||||
@ -39,7 +41,6 @@ add_library(common STATIC
 | 
			
		||||
    common_types.h
 | 
			
		||||
    file_util.cpp
 | 
			
		||||
    file_util.h
 | 
			
		||||
    hash.cpp
 | 
			
		||||
    hash.h
 | 
			
		||||
    linear_disk_cache.h
 | 
			
		||||
    logging/backend.cpp
 | 
			
		||||
 | 
			
		||||
@ -115,7 +115,7 @@ private:
 | 
			
		||||
    // assignment would copy the full storage value, rather than just the bits
 | 
			
		||||
    // relevant to this particular bit field.
 | 
			
		||||
    // We don't delete it because we want BitField to be trivially copyable.
 | 
			
		||||
    BitField& operator=(const BitField&) = default;
 | 
			
		||||
    constexpr BitField& operator=(const BitField&) = default;
 | 
			
		||||
 | 
			
		||||
    // StorageType is T for non-enum types and the underlying type of T if
 | 
			
		||||
    // T is an enumeration. Note that T is wrapped within an enable_if in the
 | 
			
		||||
@ -166,20 +166,20 @@ public:
 | 
			
		||||
    // so that we can use this within unions
 | 
			
		||||
    constexpr BitField() = default;
 | 
			
		||||
 | 
			
		||||
    FORCE_INLINE operator T() const {
 | 
			
		||||
    constexpr FORCE_INLINE operator T() const {
 | 
			
		||||
        return Value();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    FORCE_INLINE void Assign(const T& value) {
 | 
			
		||||
    constexpr FORCE_INLINE void Assign(const T& value) {
 | 
			
		||||
        storage = (storage & ~mask) | FormatValue(value);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    FORCE_INLINE T Value() const {
 | 
			
		||||
    constexpr T Value() const {
 | 
			
		||||
        return ExtractValue(storage);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015
 | 
			
		||||
    FORCE_INLINE bool ToBool() const {
 | 
			
		||||
    constexpr FORCE_INLINE bool ToBool() const {
 | 
			
		||||
        return Value() != 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										340
									
								
								src/common/cityhash.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										340
									
								
								src/common/cityhash.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,340 @@
 | 
			
		||||
// Copyright (c) 2011 Google, Inc.
 | 
			
		||||
//
 | 
			
		||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
 | 
			
		||||
// of this software and associated documentation files (the "Software"), to deal
 | 
			
		||||
// in the Software without restriction, including without limitation the rights
 | 
			
		||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 | 
			
		||||
// copies of the Software, and to permit persons to whom the Software is
 | 
			
		||||
// furnished to do so, subject to the following conditions:
 | 
			
		||||
//
 | 
			
		||||
// The above copyright notice and this permission notice shall be included in
 | 
			
		||||
// all copies or substantial portions of the Software.
 | 
			
		||||
//
 | 
			
		||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
			
		||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
			
		||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | 
			
		||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | 
			
		||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 | 
			
		||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 | 
			
		||||
// THE SOFTWARE.
 | 
			
		||||
//
 | 
			
		||||
// CityHash, by Geoff Pike and Jyrki Alakuijala
 | 
			
		||||
//
 | 
			
		||||
// This file provides CityHash64() and related functions.
 | 
			
		||||
//
 | 
			
		||||
// It's probably possible to create even faster hash functions by
 | 
			
		||||
// writing a program that systematically explores some of the space of
 | 
			
		||||
// possible hash functions, by using SIMD instructions, or by
 | 
			
		||||
// compromising on hash quality.
 | 
			
		||||
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <string.h> // for memcpy and memset
 | 
			
		||||
#include "cityhash.h"
 | 
			
		||||
#include "common/swap.h"
 | 
			
		||||
 | 
			
		||||
// #include "config.h"
 | 
			
		||||
#ifdef __GNUC__
 | 
			
		||||
#define HAVE_BUILTIN_EXPECT 1
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef COMMON_BIG_ENDIAN
 | 
			
		||||
#define WORDS_BIGENDIAN 1
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
typedef uint8_t uint8;
 | 
			
		||||
typedef uint32_t uint32;
 | 
			
		||||
typedef uint64_t uint64;
 | 
			
		||||
 | 
			
		||||
namespace Common {
 | 
			
		||||
 | 
			
		||||
static uint64 UNALIGNED_LOAD64(const char* p) {
 | 
			
		||||
    uint64 result;
 | 
			
		||||
    memcpy(&result, p, sizeof(result));
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint32 UNALIGNED_LOAD32(const char* p) {
 | 
			
		||||
    uint32 result;
 | 
			
		||||
    memcpy(&result, p, sizeof(result));
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef WORDS_BIGENDIAN
 | 
			
		||||
#define uint32_in_expected_order(x) (swap32(x))
 | 
			
		||||
#define uint64_in_expected_order(x) (swap64(x))
 | 
			
		||||
#else
 | 
			
		||||
#define uint32_in_expected_order(x) (x)
 | 
			
		||||
#define uint64_in_expected_order(x) (x)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if !defined(LIKELY)
 | 
			
		||||
#if HAVE_BUILTIN_EXPECT
 | 
			
		||||
#define LIKELY(x) (__builtin_expect(!!(x), 1))
 | 
			
		||||
#else
 | 
			
		||||
#define LIKELY(x) (x)
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static uint64 Fetch64(const char* p) {
 | 
			
		||||
    return uint64_in_expected_order(UNALIGNED_LOAD64(p));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint32 Fetch32(const char* p) {
 | 
			
		||||
    return uint32_in_expected_order(UNALIGNED_LOAD32(p));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Some primes between 2^63 and 2^64 for various uses.
 | 
			
		||||
static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
 | 
			
		||||
static const uint64 k1 = 0xb492b66fbe98f273ULL;
 | 
			
		||||
static const uint64 k2 = 0x9ae16a3b2f90404fULL;
 | 
			
		||||
 | 
			
		||||
// Bitwise right rotate.  Normally this will compile to a single
 | 
			
		||||
// instruction, especially if the shift is a manifest constant.
 | 
			
		||||
static uint64 Rotate(uint64 val, int shift) {
 | 
			
		||||
    // Avoid shifting by 64: doing so yields an undefined result.
 | 
			
		||||
    return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint64 ShiftMix(uint64 val) {
 | 
			
		||||
    return val ^ (val >> 47);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint64 HashLen16(uint64 u, uint64 v) {
 | 
			
		||||
    return Hash128to64(uint128(u, v));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
 | 
			
		||||
    // Murmur-inspired hashing.
 | 
			
		||||
    uint64 a = (u ^ v) * mul;
 | 
			
		||||
    a ^= (a >> 47);
 | 
			
		||||
    uint64 b = (v ^ a) * mul;
 | 
			
		||||
    b ^= (b >> 47);
 | 
			
		||||
    b *= mul;
 | 
			
		||||
    return b;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static uint64 HashLen0to16(const char* s, size_t len) {
 | 
			
		||||
    if (len >= 8) {
 | 
			
		||||
        uint64 mul = k2 + len * 2;
 | 
			
		||||
        uint64 a = Fetch64(s) + k2;
 | 
			
		||||
        uint64 b = Fetch64(s + len - 8);
 | 
			
		||||
        uint64 c = Rotate(b, 37) * mul + a;
 | 
			
		||||
        uint64 d = (Rotate(a, 25) + b) * mul;
 | 
			
		||||
        return HashLen16(c, d, mul);
 | 
			
		||||
    }
 | 
			
		||||
    if (len >= 4) {
 | 
			
		||||
        uint64 mul = k2 + len * 2;
 | 
			
		||||
        uint64 a = Fetch32(s);
 | 
			
		||||
        return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
 | 
			
		||||
    }
 | 
			
		||||
    if (len > 0) {
 | 
			
		||||
        uint8 a = s[0];
 | 
			
		||||
        uint8 b = s[len >> 1];
 | 
			
		||||
        uint8 c = s[len - 1];
 | 
			
		||||
        uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
 | 
			
		||||
        uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2);
 | 
			
		||||
        return ShiftMix(y * k2 ^ z * k0) * k2;
 | 
			
		||||
    }
 | 
			
		||||
    return k2;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// This probably works well for 16-byte strings as well, but it may be overkill
 | 
			
		||||
// in that case.
 | 
			
		||||
static uint64 HashLen17to32(const char* s, size_t len) {
 | 
			
		||||
    uint64 mul = k2 + len * 2;
 | 
			
		||||
    uint64 a = Fetch64(s) * k1;
 | 
			
		||||
    uint64 b = Fetch64(s + 8);
 | 
			
		||||
    uint64 c = Fetch64(s + len - 8) * mul;
 | 
			
		||||
    uint64 d = Fetch64(s + len - 16) * k2;
 | 
			
		||||
    return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Return a 16-byte hash for 48 bytes.  Quick and dirty.
 | 
			
		||||
// Callers do best to use "random-looking" values for a and b.
 | 
			
		||||
static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a,
 | 
			
		||||
                                                   uint64 b) {
 | 
			
		||||
    a += w;
 | 
			
		||||
    b = Rotate(b + a + z, 21);
 | 
			
		||||
    uint64 c = a;
 | 
			
		||||
    a += x;
 | 
			
		||||
    a += y;
 | 
			
		||||
    b += Rotate(a, 44);
 | 
			
		||||
    return make_pair(a + z, b + c);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
 | 
			
		||||
static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) {
 | 
			
		||||
    return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a,
 | 
			
		||||
                                  b);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Return an 8-byte hash for 33 to 64 bytes.
 | 
			
		||||
static uint64 HashLen33to64(const char* s, size_t len) {
 | 
			
		||||
    uint64 mul = k2 + len * 2;
 | 
			
		||||
    uint64 a = Fetch64(s) * k2;
 | 
			
		||||
    uint64 b = Fetch64(s + 8);
 | 
			
		||||
    uint64 c = Fetch64(s + len - 24);
 | 
			
		||||
    uint64 d = Fetch64(s + len - 32);
 | 
			
		||||
    uint64 e = Fetch64(s + 16) * k2;
 | 
			
		||||
    uint64 f = Fetch64(s + 24) * 9;
 | 
			
		||||
    uint64 g = Fetch64(s + len - 8);
 | 
			
		||||
    uint64 h = Fetch64(s + len - 16) * mul;
 | 
			
		||||
    uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
 | 
			
		||||
    uint64 v = ((a + g) ^ d) + f + 1;
 | 
			
		||||
    uint64 w = swap64((u + v) * mul) + h;
 | 
			
		||||
    uint64 x = Rotate(e + f, 42) + c;
 | 
			
		||||
    uint64 y = (swap64((v + w) * mul) + g) * mul;
 | 
			
		||||
    uint64 z = e + f + c;
 | 
			
		||||
    a = swap64((x + z) * mul + y) + b;
 | 
			
		||||
    b = ShiftMix((z + a) * mul + d + h) * mul;
 | 
			
		||||
    return b + x;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint64 CityHash64(const char* s, size_t len) {
 | 
			
		||||
    if (len <= 32) {
 | 
			
		||||
        if (len <= 16) {
 | 
			
		||||
            return HashLen0to16(s, len);
 | 
			
		||||
        } else {
 | 
			
		||||
            return HashLen17to32(s, len);
 | 
			
		||||
        }
 | 
			
		||||
    } else if (len <= 64) {
 | 
			
		||||
        return HashLen33to64(s, len);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // For strings over 64 bytes we hash the end first, and then as we
 | 
			
		||||
    // loop we keep 56 bytes of state: v, w, x, y, and z.
 | 
			
		||||
    uint64 x = Fetch64(s + len - 40);
 | 
			
		||||
    uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
 | 
			
		||||
    uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
 | 
			
		||||
    pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
 | 
			
		||||
    pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
 | 
			
		||||
    x = x * k1 + Fetch64(s);
 | 
			
		||||
 | 
			
		||||
    // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
 | 
			
		||||
    len = (len - 1) & ~static_cast<size_t>(63);
 | 
			
		||||
    do {
 | 
			
		||||
        x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
 | 
			
		||||
        y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
 | 
			
		||||
        x ^= w.second;
 | 
			
		||||
        y += v.first + Fetch64(s + 40);
 | 
			
		||||
        z = Rotate(z + w.first, 33) * k1;
 | 
			
		||||
        v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
 | 
			
		||||
        w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
 | 
			
		||||
        std::swap(z, x);
 | 
			
		||||
        s += 64;
 | 
			
		||||
        len -= 64;
 | 
			
		||||
    } while (len != 0);
 | 
			
		||||
    return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
 | 
			
		||||
                     HashLen16(v.second, w.second) + x);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) {
 | 
			
		||||
    return CityHash64WithSeeds(s, len, k2, seed);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) {
 | 
			
		||||
    return HashLen16(CityHash64(s, len) - seed0, seed1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
 | 
			
		||||
// of any length representable in signed long.  Based on City and Murmur.
 | 
			
		||||
static uint128 CityMurmur(const char* s, size_t len, uint128 seed) {
 | 
			
		||||
    uint64 a = Uint128Low64(seed);
 | 
			
		||||
    uint64 b = Uint128High64(seed);
 | 
			
		||||
    uint64 c = 0;
 | 
			
		||||
    uint64 d = 0;
 | 
			
		||||
    signed long l = static_cast<long>(len) - 16;
 | 
			
		||||
    if (l <= 0) { // len <= 16
 | 
			
		||||
        a = ShiftMix(a * k1) * k1;
 | 
			
		||||
        c = b * k1 + HashLen0to16(s, len);
 | 
			
		||||
        d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
 | 
			
		||||
    } else { // len > 16
 | 
			
		||||
        c = HashLen16(Fetch64(s + len - 8) + k1, a);
 | 
			
		||||
        d = HashLen16(b + len, c + Fetch64(s + len - 16));
 | 
			
		||||
        a += d;
 | 
			
		||||
        do {
 | 
			
		||||
            a ^= ShiftMix(Fetch64(s) * k1) * k1;
 | 
			
		||||
            a *= k1;
 | 
			
		||||
            b ^= a;
 | 
			
		||||
            c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
 | 
			
		||||
            c *= k1;
 | 
			
		||||
            d ^= c;
 | 
			
		||||
            s += 16;
 | 
			
		||||
            l -= 16;
 | 
			
		||||
        } while (l > 0);
 | 
			
		||||
    }
 | 
			
		||||
    a = HashLen16(a, c);
 | 
			
		||||
    b = HashLen16(d, b);
 | 
			
		||||
    return uint128(a ^ b, HashLen16(b, a));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) {
 | 
			
		||||
    if (len < 128) {
 | 
			
		||||
        return CityMurmur(s, len, seed);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
 | 
			
		||||
    // v, w, x, y, and z.
 | 
			
		||||
    pair<uint64, uint64> v, w;
 | 
			
		||||
    uint64 x = Uint128Low64(seed);
 | 
			
		||||
    uint64 y = Uint128High64(seed);
 | 
			
		||||
    uint64 z = len * k1;
 | 
			
		||||
    v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
 | 
			
		||||
    v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
 | 
			
		||||
    w.first = Rotate(y + z, 35) * k1 + x;
 | 
			
		||||
    w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
 | 
			
		||||
 | 
			
		||||
    // This is the same inner loop as CityHash64(), manually unrolled.
 | 
			
		||||
    do {
 | 
			
		||||
        x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
 | 
			
		||||
        y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
 | 
			
		||||
        x ^= w.second;
 | 
			
		||||
        y += v.first + Fetch64(s + 40);
 | 
			
		||||
        z = Rotate(z + w.first, 33) * k1;
 | 
			
		||||
        v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
 | 
			
		||||
        w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
 | 
			
		||||
        std::swap(z, x);
 | 
			
		||||
        s += 64;
 | 
			
		||||
        x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
 | 
			
		||||
        y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
 | 
			
		||||
        x ^= w.second;
 | 
			
		||||
        y += v.first + Fetch64(s + 40);
 | 
			
		||||
        z = Rotate(z + w.first, 33) * k1;
 | 
			
		||||
        v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
 | 
			
		||||
        w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
 | 
			
		||||
        std::swap(z, x);
 | 
			
		||||
        s += 64;
 | 
			
		||||
        len -= 128;
 | 
			
		||||
    } while (LIKELY(len >= 128));
 | 
			
		||||
    x += Rotate(v.first + z, 49) * k0;
 | 
			
		||||
    y = y * k0 + Rotate(w.second, 37);
 | 
			
		||||
    z = z * k0 + Rotate(w.first, 27);
 | 
			
		||||
    w.first *= 9;
 | 
			
		||||
    v.first *= k0;
 | 
			
		||||
    // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
 | 
			
		||||
    for (size_t tail_done = 0; tail_done < len;) {
 | 
			
		||||
        tail_done += 32;
 | 
			
		||||
        y = Rotate(x + y, 42) * k0 + v.second;
 | 
			
		||||
        w.first += Fetch64(s + len - tail_done + 16);
 | 
			
		||||
        x = x * k0 + w.first;
 | 
			
		||||
        z += w.second + Fetch64(s + len - tail_done);
 | 
			
		||||
        w.second += v.first;
 | 
			
		||||
        v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
 | 
			
		||||
        v.first *= k0;
 | 
			
		||||
    }
 | 
			
		||||
    // At this point our 56 bytes of state should contain more than
 | 
			
		||||
    // enough information for a strong 128-bit hash.  We use two
 | 
			
		||||
    // different 56-byte-to-8-byte hashes to get a 16-byte final result.
 | 
			
		||||
    x = HashLen16(x, v.first);
 | 
			
		||||
    y = HashLen16(y + z, w.first);
 | 
			
		||||
    return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint128 CityHash128(const char* s, size_t len) {
 | 
			
		||||
    return len >= 16
 | 
			
		||||
               ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0))
 | 
			
		||||
               : CityHash128WithSeed(s, len, uint128(k0, k1));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace Common
 | 
			
		||||
							
								
								
									
										110
									
								
								src/common/cityhash.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								src/common/cityhash.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,110 @@
 | 
			
		||||
// Copyright (c) 2011 Google, Inc.
 | 
			
		||||
//
 | 
			
		||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
 | 
			
		||||
// of this software and associated documentation files (the "Software"), to deal
 | 
			
		||||
// in the Software without restriction, including without limitation the rights
 | 
			
		||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 | 
			
		||||
// copies of the Software, and to permit persons to whom the Software is
 | 
			
		||||
// furnished to do so, subject to the following conditions:
 | 
			
		||||
//
 | 
			
		||||
// The above copyright notice and this permission notice shall be included in
 | 
			
		||||
// all copies or substantial portions of the Software.
 | 
			
		||||
//
 | 
			
		||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
			
		||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
			
		||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | 
			
		||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | 
			
		||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 | 
			
		||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 | 
			
		||||
// THE SOFTWARE.
 | 
			
		||||
//
 | 
			
		||||
// CityHash, by Geoff Pike and Jyrki Alakuijala
 | 
			
		||||
//
 | 
			
		||||
// http://code.google.com/p/cityhash/
 | 
			
		||||
//
 | 
			
		||||
// This file provides a few functions for hashing strings.  All of them are
 | 
			
		||||
// high-quality functions in the sense that they pass standard tests such
 | 
			
		||||
// as Austin Appleby's SMHasher.  They are also fast.
 | 
			
		||||
//
 | 
			
		||||
// For 64-bit x86 code, on short strings, we don't know of anything faster than
 | 
			
		||||
// CityHash64 that is of comparable quality.  We believe our nearest competitor
 | 
			
		||||
// is Murmur3.  For 64-bit x86 code, CityHash64 is an excellent choice for hash
 | 
			
		||||
// tables and most other hashing (excluding cryptography).
 | 
			
		||||
//
 | 
			
		||||
// For 64-bit x86 code, on long strings, the picture is more complicated.
 | 
			
		||||
// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
 | 
			
		||||
// CityHashCrc128 appears to be faster than all competitors of comparable
 | 
			
		||||
// quality.  CityHash128 is also good but not quite as fast.  We believe our
 | 
			
		||||
// nearest competitor is Bob Jenkins' Spooky.  We don't have great data for
 | 
			
		||||
// other 64-bit CPUs, but for long strings we know that Spooky is slightly
 | 
			
		||||
// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
 | 
			
		||||
// Note that CityHashCrc128 is declared in citycrc.h.
 | 
			
		||||
//
 | 
			
		||||
// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
 | 
			
		||||
// is of comparable quality.  We believe our nearest competitor is Murmur3A.
 | 
			
		||||
// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
 | 
			
		||||
//
 | 
			
		||||
// Functions in the CityHash family are not suitable for cryptography.
 | 
			
		||||
//
 | 
			
		||||
// Please see CityHash's README file for more details on our performance
 | 
			
		||||
// measurements and so on.
 | 
			
		||||
//
 | 
			
		||||
// WARNING: This code has been only lightly tested on big-endian platforms!
 | 
			
		||||
// It is known to work well on little-endian platforms that have a small penalty
 | 
			
		||||
// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
 | 
			
		||||
// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
 | 
			
		||||
// bug reports are welcome.
 | 
			
		||||
//
 | 
			
		||||
// By the way, for some hash functions, given strings a and b, the hash
 | 
			
		||||
// of a+b is easily derived from the hashes of a and b.  This property
 | 
			
		||||
// doesn't hold for any hash functions in this file.
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <utility>
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
#include <stdlib.h> // for size_t.
 | 
			
		||||
 | 
			
		||||
namespace Common {
 | 
			
		||||
 | 
			
		||||
typedef std::pair<uint64_t, uint64_t> uint128;
 | 
			
		||||
 | 
			
		||||
inline uint64_t Uint128Low64(const uint128& x) {
 | 
			
		||||
    return x.first;
 | 
			
		||||
}
 | 
			
		||||
inline uint64_t Uint128High64(const uint128& x) {
 | 
			
		||||
    return x.second;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Hash function for a byte array.
 | 
			
		||||
uint64_t CityHash64(const char* buf, size_t len);
 | 
			
		||||
 | 
			
		||||
// Hash function for a byte array.  For convenience, a 64-bit seed is also
 | 
			
		||||
// hashed into the result.
 | 
			
		||||
uint64_t CityHash64WithSeed(const char* buf, size_t len, uint64_t seed);
 | 
			
		||||
 | 
			
		||||
// Hash function for a byte array.  For convenience, two seeds are also
 | 
			
		||||
// hashed into the result.
 | 
			
		||||
uint64_t CityHash64WithSeeds(const char* buf, size_t len, uint64_t seed0, uint64_t seed1);
 | 
			
		||||
 | 
			
		||||
// Hash function for a byte array.
 | 
			
		||||
uint128 CityHash128(const char* s, size_t len);
 | 
			
		||||
 | 
			
		||||
// Hash function for a byte array.  For convenience, a 128-bit seed is also
 | 
			
		||||
// hashed into the result.
 | 
			
		||||
uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed);
 | 
			
		||||
 | 
			
		||||
// Hash 128 input bits down to 64 bits of output.
 | 
			
		||||
// This is intended to be a reasonably good hash function.
 | 
			
		||||
inline uint64_t Hash128to64(const uint128& x) {
 | 
			
		||||
    // Murmur-inspired hashing.
 | 
			
		||||
    const uint64_t kMul = 0x9ddfea08eb382d69ULL;
 | 
			
		||||
    uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
 | 
			
		||||
    a ^= (a >> 47);
 | 
			
		||||
    uint64_t b = (Uint128High64(x) ^ a) * kMul;
 | 
			
		||||
    b ^= (b >> 47);
 | 
			
		||||
    b *= kMul;
 | 
			
		||||
    return b;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace Common
 | 
			
		||||
@ -1,141 +0,0 @@
 | 
			
		||||
// Copyright 2015 Citra Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#if defined(_MSC_VER)
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#endif
 | 
			
		||||
#include "common/common_funcs.h"
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "common/hash.h"
 | 
			
		||||
 | 
			
		||||
namespace Common {
 | 
			
		||||
 | 
			
		||||
// MurmurHash3 was written by Austin Appleby, and is placed in the public
 | 
			
		||||
// domain. The author hereby disclaims copyright to this source code.
 | 
			
		||||
 | 
			
		||||
// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do
 | 
			
		||||
// the conversion here
 | 
			
		||||
static FORCE_INLINE u64 getblock64(const u64* p, size_t i) {
 | 
			
		||||
    return p[i];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Finalization mix - force all bits of a hash block to avalanche
 | 
			
		||||
static FORCE_INLINE u64 fmix64(u64 k) {
 | 
			
		||||
    k ^= k >> 33;
 | 
			
		||||
    k *= 0xff51afd7ed558ccdllu;
 | 
			
		||||
    k ^= k >> 33;
 | 
			
		||||
    k *= 0xc4ceb9fe1a85ec53llu;
 | 
			
		||||
    k ^= k >> 33;
 | 
			
		||||
 | 
			
		||||
    return k;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit
 | 
			
		||||
// platforms (MurmurHash3_x64_128). It was taken from:
 | 
			
		||||
// https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
 | 
			
		||||
void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) {
 | 
			
		||||
    const u8* data = (const u8*)key;
 | 
			
		||||
    const size_t nblocks = len / 16;
 | 
			
		||||
 | 
			
		||||
    u64 h1 = seed;
 | 
			
		||||
    u64 h2 = seed;
 | 
			
		||||
 | 
			
		||||
    const u64 c1 = 0x87c37b91114253d5llu;
 | 
			
		||||
    const u64 c2 = 0x4cf5ad432745937fllu;
 | 
			
		||||
 | 
			
		||||
    // Body
 | 
			
		||||
 | 
			
		||||
    const u64* blocks = (const u64*)(data);
 | 
			
		||||
 | 
			
		||||
    for (size_t i = 0; i < nblocks; i++) {
 | 
			
		||||
        u64 k1 = getblock64(blocks, i * 2 + 0);
 | 
			
		||||
        u64 k2 = getblock64(blocks, i * 2 + 1);
 | 
			
		||||
 | 
			
		||||
        k1 *= c1;
 | 
			
		||||
        k1 = _rotl64(k1, 31);
 | 
			
		||||
        k1 *= c2;
 | 
			
		||||
        h1 ^= k1;
 | 
			
		||||
 | 
			
		||||
        h1 = _rotl64(h1, 27);
 | 
			
		||||
        h1 += h2;
 | 
			
		||||
        h1 = h1 * 5 + 0x52dce729;
 | 
			
		||||
 | 
			
		||||
        k2 *= c2;
 | 
			
		||||
        k2 = _rotl64(k2, 33);
 | 
			
		||||
        k2 *= c1;
 | 
			
		||||
        h2 ^= k2;
 | 
			
		||||
 | 
			
		||||
        h2 = _rotl64(h2, 31);
 | 
			
		||||
        h2 += h1;
 | 
			
		||||
        h2 = h2 * 5 + 0x38495ab5;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Tail
 | 
			
		||||
 | 
			
		||||
    const u8* tail = (const u8*)(data + nblocks * 16);
 | 
			
		||||
 | 
			
		||||
    u64 k1 = 0;
 | 
			
		||||
    u64 k2 = 0;
 | 
			
		||||
 | 
			
		||||
    switch (len & 15) {
 | 
			
		||||
    case 15:
 | 
			
		||||
        k2 ^= ((u64)tail[14]) << 48;
 | 
			
		||||
    case 14:
 | 
			
		||||
        k2 ^= ((u64)tail[13]) << 40;
 | 
			
		||||
    case 13:
 | 
			
		||||
        k2 ^= ((u64)tail[12]) << 32;
 | 
			
		||||
    case 12:
 | 
			
		||||
        k2 ^= ((u64)tail[11]) << 24;
 | 
			
		||||
    case 11:
 | 
			
		||||
        k2 ^= ((u64)tail[10]) << 16;
 | 
			
		||||
    case 10:
 | 
			
		||||
        k2 ^= ((u64)tail[9]) << 8;
 | 
			
		||||
    case 9:
 | 
			
		||||
        k2 ^= ((u64)tail[8]) << 0;
 | 
			
		||||
        k2 *= c2;
 | 
			
		||||
        k2 = _rotl64(k2, 33);
 | 
			
		||||
        k2 *= c1;
 | 
			
		||||
        h2 ^= k2;
 | 
			
		||||
 | 
			
		||||
    case 8:
 | 
			
		||||
        k1 ^= ((u64)tail[7]) << 56;
 | 
			
		||||
    case 7:
 | 
			
		||||
        k1 ^= ((u64)tail[6]) << 48;
 | 
			
		||||
    case 6:
 | 
			
		||||
        k1 ^= ((u64)tail[5]) << 40;
 | 
			
		||||
    case 5:
 | 
			
		||||
        k1 ^= ((u64)tail[4]) << 32;
 | 
			
		||||
    case 4:
 | 
			
		||||
        k1 ^= ((u64)tail[3]) << 24;
 | 
			
		||||
    case 3:
 | 
			
		||||
        k1 ^= ((u64)tail[2]) << 16;
 | 
			
		||||
    case 2:
 | 
			
		||||
        k1 ^= ((u64)tail[1]) << 8;
 | 
			
		||||
    case 1:
 | 
			
		||||
        k1 ^= ((u64)tail[0]) << 0;
 | 
			
		||||
        k1 *= c1;
 | 
			
		||||
        k1 = _rotl64(k1, 31);
 | 
			
		||||
        k1 *= c2;
 | 
			
		||||
        h1 ^= k1;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // Finalization
 | 
			
		||||
 | 
			
		||||
    h1 ^= len;
 | 
			
		||||
    h2 ^= len;
 | 
			
		||||
 | 
			
		||||
    h1 += h2;
 | 
			
		||||
    h2 += h1;
 | 
			
		||||
 | 
			
		||||
    h1 = fmix64(h1);
 | 
			
		||||
    h2 = fmix64(h2);
 | 
			
		||||
 | 
			
		||||
    h1 += h2;
 | 
			
		||||
    h2 += h1;
 | 
			
		||||
 | 
			
		||||
    ((u64*)out)[0] = h1;
 | 
			
		||||
    ((u64*)out)[1] = h2;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace Common
 | 
			
		||||
@ -5,12 +5,12 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <cstddef>
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include "common/cityhash.h"
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
 | 
			
		||||
namespace Common {
 | 
			
		||||
 | 
			
		||||
void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Computes a 64-bit hash over the specified block of data
 | 
			
		||||
 * @param data Block of data to compute hash over
 | 
			
		||||
@ -18,9 +18,54 @@ void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out);
 | 
			
		||||
 * @returns 64-bit hash value that was computed over the data block
 | 
			
		||||
 */
 | 
			
		||||
static inline u64 ComputeHash64(const void* data, size_t len) {
 | 
			
		||||
    u64 res[2];
 | 
			
		||||
    MurmurHash3_128(data, len, 0, res);
 | 
			
		||||
    return res[0];
 | 
			
		||||
    return CityHash64(static_cast<const char*>(data), len);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Computes a 64-bit hash of a struct. In addition to being trivially copyable, it is also critical
 | 
			
		||||
 * that either the struct includes no padding, or that any padding is initialized to a known value
 | 
			
		||||
 * by memsetting the struct to 0 before filling it in.
 | 
			
		||||
 */
 | 
			
		||||
template <typename T>
 | 
			
		||||
static inline u64 ComputeStructHash64(const T& data) {
 | 
			
		||||
    static_assert(std::is_trivially_copyable<T>(),
 | 
			
		||||
                  "Type passed to ComputeStructHash64 must be trivially copyable");
 | 
			
		||||
    return ComputeHash64(&data, sizeof(data));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
 | 
			
		||||
template <typename T>
 | 
			
		||||
struct HashableStruct {
 | 
			
		||||
    // In addition to being trivially copyable, T must also have a trivial default constructor,
 | 
			
		||||
    // because any member initialization would be overridden by memset
 | 
			
		||||
    static_assert(std::is_trivial<T>(), "Type passed to HashableStruct must be trivial");
 | 
			
		||||
    /*
 | 
			
		||||
     * We use a union because "implicitly-defined copy/move constructor for a union X copies the
 | 
			
		||||
     * object representation of X." and "implicitly-defined copy assignment operator for a union X
 | 
			
		||||
     * copies the object representation (3.9) of X." = Bytewise copy instead of memberwise copy.
 | 
			
		||||
     * This is important because the padding bytes are included in the hash and comparison between
 | 
			
		||||
     * objects.
 | 
			
		||||
     */
 | 
			
		||||
    union {
 | 
			
		||||
        T state;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    HashableStruct() {
 | 
			
		||||
        // Memset structure to zero padding bits, so that they will be deterministic when hashing
 | 
			
		||||
        std::memset(&state, 0, sizeof(T));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    bool operator==(const HashableStruct<T>& o) const {
 | 
			
		||||
        return std::memcmp(&state, &o.state, sizeof(T)) == 0;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    bool operator!=(const HashableStruct<T>& o) const {
 | 
			
		||||
        return !(*this == o);
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    size_t Hash() const {
 | 
			
		||||
        return Common::ComputeStructHash64(state);
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace Common
 | 
			
		||||
 | 
			
		||||
@ -9,6 +9,7 @@ add_library(video_core STATIC
 | 
			
		||||
    engines/maxwell_3d.h
 | 
			
		||||
    engines/maxwell_compute.cpp
 | 
			
		||||
    engines/maxwell_compute.h
 | 
			
		||||
    engines/shader_bytecode.h
 | 
			
		||||
    gpu.cpp
 | 
			
		||||
    gpu.h
 | 
			
		||||
    macro_interpreter.cpp
 | 
			
		||||
@ -27,6 +28,8 @@ add_library(video_core STATIC
 | 
			
		||||
    renderer_opengl/gl_shader_decompiler.h
 | 
			
		||||
    renderer_opengl/gl_shader_gen.cpp
 | 
			
		||||
    renderer_opengl/gl_shader_gen.h
 | 
			
		||||
    renderer_opengl/gl_shader_manager.cpp
 | 
			
		||||
    renderer_opengl/gl_shader_manager.h
 | 
			
		||||
    renderer_opengl/gl_shader_util.cpp
 | 
			
		||||
    renderer_opengl/gl_shader_util.h
 | 
			
		||||
    renderer_opengl/gl_state.cpp
 | 
			
		||||
 | 
			
		||||
@ -427,14 +427,11 @@ public:
 | 
			
		||||
                        BitField<0, 1, u32> enable;
 | 
			
		||||
                        BitField<4, 4, ShaderProgram> program;
 | 
			
		||||
                    };
 | 
			
		||||
                    u32 start_id;
 | 
			
		||||
                    INSERT_PADDING_WORDS(1);
 | 
			
		||||
                    u32 gpr_alloc;
 | 
			
		||||
                    ShaderStage type;
 | 
			
		||||
                    INSERT_PADDING_WORDS(9);
 | 
			
		||||
                    u32 offset;
 | 
			
		||||
                    INSERT_PADDING_WORDS(14);
 | 
			
		||||
                } shader_config[MaxShaderProgram];
 | 
			
		||||
 | 
			
		||||
                INSERT_PADDING_WORDS(0x8C);
 | 
			
		||||
                INSERT_PADDING_WORDS(0x80);
 | 
			
		||||
 | 
			
		||||
                struct {
 | 
			
		||||
                    u32 cb_size;
 | 
			
		||||
@ -507,6 +504,7 @@ public:
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    State state{};
 | 
			
		||||
    MemoryManager& memory_manager;
 | 
			
		||||
 | 
			
		||||
    /// Reads a register value located at the input method address
 | 
			
		||||
    u32 GetRegisterValue(u32 method) const;
 | 
			
		||||
@ -521,8 +519,6 @@ public:
 | 
			
		||||
    std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    MemoryManager& memory_manager;
 | 
			
		||||
 | 
			
		||||
    std::unordered_map<u32, std::vector<u32>> uploaded_macros;
 | 
			
		||||
 | 
			
		||||
    /// Macro method that is currently being executed / being fed parameters.
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										327
									
								
								src/video_core/engines/shader_bytecode.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										327
									
								
								src/video_core/engines/shader_bytecode.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,327 @@
 | 
			
		||||
// Copyright 2018 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <map>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "common/bit_field.h"
 | 
			
		||||
 | 
			
		||||
namespace Tegra {
 | 
			
		||||
namespace Shader {
 | 
			
		||||
 | 
			
		||||
struct Register {
 | 
			
		||||
    Register() = default;
 | 
			
		||||
 | 
			
		||||
    constexpr Register(u64 value) : value(value) {}
 | 
			
		||||
 | 
			
		||||
    constexpr u64 GetIndex() const {
 | 
			
		||||
        return value;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    constexpr operator u64() const {
 | 
			
		||||
        return value;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template <typename T>
 | 
			
		||||
    constexpr u64 operator-(const T& oth) const {
 | 
			
		||||
        return value - oth;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template <typename T>
 | 
			
		||||
    constexpr u64 operator&(const T& oth) const {
 | 
			
		||||
        return value & oth;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    constexpr u64 operator&(const Register& oth) const {
 | 
			
		||||
        return value & oth.value;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    constexpr u64 operator~() const {
 | 
			
		||||
        return ~value;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    u64 value;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
union Attribute {
 | 
			
		||||
    Attribute() = default;
 | 
			
		||||
 | 
			
		||||
    constexpr Attribute(u64 value) : value(value) {}
 | 
			
		||||
 | 
			
		||||
    enum class Index : u64 {
 | 
			
		||||
        Position = 7,
 | 
			
		||||
        Attribute_0 = 8,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    union {
 | 
			
		||||
        BitField<22, 2, u64> element;
 | 
			
		||||
        BitField<24, 6, Index> index;
 | 
			
		||||
        BitField<47, 3, u64> size;
 | 
			
		||||
    } fmt20;
 | 
			
		||||
 | 
			
		||||
    union {
 | 
			
		||||
        BitField<30, 2, u64> element;
 | 
			
		||||
        BitField<32, 6, Index> index;
 | 
			
		||||
    } fmt28;
 | 
			
		||||
 | 
			
		||||
    BitField<39, 8, u64> reg;
 | 
			
		||||
    u64 value;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
union Uniform {
 | 
			
		||||
    BitField<20, 14, u64> offset;
 | 
			
		||||
    BitField<34, 5, u64> index;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
union OpCode {
 | 
			
		||||
    enum class Id : u64 {
 | 
			
		||||
        TEXS = 0x6C,
 | 
			
		||||
        IPA = 0xE0,
 | 
			
		||||
        FFMA_IMM = 0x65,
 | 
			
		||||
        FFMA_CR = 0x93,
 | 
			
		||||
        FFMA_RC = 0xA3,
 | 
			
		||||
        FFMA_RR = 0xB3,
 | 
			
		||||
 | 
			
		||||
        FADD_C = 0x98B,
 | 
			
		||||
        FMUL_C = 0x98D,
 | 
			
		||||
        MUFU = 0xA10,
 | 
			
		||||
        FADD_R = 0xB8B,
 | 
			
		||||
        FMUL_R = 0xB8D,
 | 
			
		||||
        LD_A = 0x1DFB,
 | 
			
		||||
        ST_A = 0x1DFE,
 | 
			
		||||
 | 
			
		||||
        FSETP_R = 0x5BB,
 | 
			
		||||
        FSETP_C = 0x4BB,
 | 
			
		||||
        EXIT = 0xE30,
 | 
			
		||||
        KIL = 0xE33,
 | 
			
		||||
 | 
			
		||||
        FMUL_IMM = 0x70D,
 | 
			
		||||
        FMUL_IMM_x = 0x72D,
 | 
			
		||||
        FADD_IMM = 0x70B,
 | 
			
		||||
        FADD_IMM_x = 0x72B,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    enum class Type {
 | 
			
		||||
        Trivial,
 | 
			
		||||
        Arithmetic,
 | 
			
		||||
        Ffma,
 | 
			
		||||
        Flow,
 | 
			
		||||
        Memory,
 | 
			
		||||
        Unknown,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    struct Info {
 | 
			
		||||
        Type type;
 | 
			
		||||
        std::string name;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    OpCode() = default;
 | 
			
		||||
 | 
			
		||||
    constexpr OpCode(Id value) : value(static_cast<u64>(value)) {}
 | 
			
		||||
 | 
			
		||||
    constexpr OpCode(u64 value) : value{value} {}
 | 
			
		||||
 | 
			
		||||
    constexpr Id EffectiveOpCode() const {
 | 
			
		||||
        switch (op1) {
 | 
			
		||||
        case Id::TEXS:
 | 
			
		||||
            return op1;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        switch (op2) {
 | 
			
		||||
        case Id::IPA:
 | 
			
		||||
            return op2;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        switch (op3) {
 | 
			
		||||
        case Id::FFMA_IMM:
 | 
			
		||||
        case Id::FFMA_CR:
 | 
			
		||||
        case Id::FFMA_RC:
 | 
			
		||||
        case Id::FFMA_RR:
 | 
			
		||||
            return op3;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        switch (op4) {
 | 
			
		||||
        case Id::EXIT:
 | 
			
		||||
        case Id::FSETP_R:
 | 
			
		||||
        case Id::FSETP_C:
 | 
			
		||||
        case Id::KIL:
 | 
			
		||||
            return op4;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        switch (op5) {
 | 
			
		||||
        case Id::MUFU:
 | 
			
		||||
        case Id::LD_A:
 | 
			
		||||
        case Id::ST_A:
 | 
			
		||||
        case Id::FADD_R:
 | 
			
		||||
        case Id::FADD_C:
 | 
			
		||||
        case Id::FMUL_R:
 | 
			
		||||
        case Id::FMUL_C:
 | 
			
		||||
            return op5;
 | 
			
		||||
 | 
			
		||||
        case Id::FMUL_IMM:
 | 
			
		||||
        case Id::FMUL_IMM_x:
 | 
			
		||||
            return Id::FMUL_IMM;
 | 
			
		||||
 | 
			
		||||
        case Id::FADD_IMM:
 | 
			
		||||
        case Id::FADD_IMM_x:
 | 
			
		||||
            return Id::FADD_IMM;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return static_cast<Id>(value);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static const Info& GetInfo(const OpCode& opcode) {
 | 
			
		||||
        static const std::map<Id, Info> info_table{BuildInfoTable()};
 | 
			
		||||
        const auto& search{info_table.find(opcode.EffectiveOpCode())};
 | 
			
		||||
        if (search != info_table.end()) {
 | 
			
		||||
            return search->second;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        static const Info unknown{Type::Unknown, "UNK"};
 | 
			
		||||
        return unknown;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    constexpr operator Id() const {
 | 
			
		||||
        return static_cast<Id>(value);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    constexpr OpCode operator<<(size_t bits) const {
 | 
			
		||||
        return value << bits;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    constexpr OpCode operator>>(size_t bits) const {
 | 
			
		||||
        return value >> bits;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template <typename T>
 | 
			
		||||
    constexpr u64 operator-(const T& oth) const {
 | 
			
		||||
        return value - oth;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    constexpr u64 operator&(const OpCode& oth) const {
 | 
			
		||||
        return value & oth.value;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    constexpr u64 operator~() const {
 | 
			
		||||
        return ~value;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    static std::map<Id, Info> BuildInfoTable() {
 | 
			
		||||
        std::map<Id, Info> info_table;
 | 
			
		||||
        info_table[Id::TEXS] = {Type::Memory, "texs"};
 | 
			
		||||
        info_table[Id::LD_A] = {Type::Memory, "ld_a"};
 | 
			
		||||
        info_table[Id::ST_A] = {Type::Memory, "st_a"};
 | 
			
		||||
        info_table[Id::MUFU] = {Type::Arithmetic, "mufu"};
 | 
			
		||||
        info_table[Id::FFMA_IMM] = {Type::Ffma, "ffma_imm"};
 | 
			
		||||
        info_table[Id::FFMA_CR] = {Type::Ffma, "ffma_cr"};
 | 
			
		||||
        info_table[Id::FFMA_RC] = {Type::Ffma, "ffma_rc"};
 | 
			
		||||
        info_table[Id::FFMA_RR] = {Type::Ffma, "ffma_rr"};
 | 
			
		||||
        info_table[Id::FADD_R] = {Type::Arithmetic, "fadd_r"};
 | 
			
		||||
        info_table[Id::FADD_C] = {Type::Arithmetic, "fadd_c"};
 | 
			
		||||
        info_table[Id::FADD_IMM] = {Type::Arithmetic, "fadd_imm"};
 | 
			
		||||
        info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"};
 | 
			
		||||
        info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"};
 | 
			
		||||
        info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"};
 | 
			
		||||
        info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"};
 | 
			
		||||
        info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"};
 | 
			
		||||
        info_table[Id::EXIT] = {Type::Trivial, "exit"};
 | 
			
		||||
        info_table[Id::IPA] = {Type::Trivial, "ipa"};
 | 
			
		||||
        info_table[Id::KIL] = {Type::Flow, "kil"};
 | 
			
		||||
        return info_table;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    BitField<57, 7, Id> op1;
 | 
			
		||||
    BitField<56, 8, Id> op2;
 | 
			
		||||
    BitField<55, 9, Id> op3;
 | 
			
		||||
    BitField<52, 12, Id> op4;
 | 
			
		||||
    BitField<51, 13, Id> op5;
 | 
			
		||||
    u64 value;
 | 
			
		||||
};
 | 
			
		||||
static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size");
 | 
			
		||||
 | 
			
		||||
} // namespace Shader
 | 
			
		||||
} // namespace Tegra
 | 
			
		||||
 | 
			
		||||
namespace std {
 | 
			
		||||
 | 
			
		||||
// TODO(bunne): The below is forbidden by the C++ standard, but works fine. See #330.
 | 
			
		||||
template <>
 | 
			
		||||
struct make_unsigned<Tegra::Shader::Attribute> {
 | 
			
		||||
    using type = Tegra::Shader::Attribute;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
struct make_unsigned<Tegra::Shader::Register> {
 | 
			
		||||
    using type = Tegra::Shader::Register;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
struct make_unsigned<Tegra::Shader::OpCode> {
 | 
			
		||||
    using type = Tegra::Shader::OpCode;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace std
 | 
			
		||||
 | 
			
		||||
namespace Tegra {
 | 
			
		||||
namespace Shader {
 | 
			
		||||
 | 
			
		||||
enum class Pred : u64 {
 | 
			
		||||
    UnusedIndex = 0x7,
 | 
			
		||||
    NeverExecute = 0xf,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum class SubOp : u64 {
 | 
			
		||||
    Cos = 0x0,
 | 
			
		||||
    Sin = 0x1,
 | 
			
		||||
    Ex2 = 0x2,
 | 
			
		||||
    Lg2 = 0x3,
 | 
			
		||||
    Rcp = 0x4,
 | 
			
		||||
    Rsq = 0x5,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
union Instruction {
 | 
			
		||||
    Instruction& operator=(const Instruction& instr) {
 | 
			
		||||
        hex = instr.hex;
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OpCode opcode;
 | 
			
		||||
    BitField<0, 8, Register> gpr0;
 | 
			
		||||
    BitField<8, 8, Register> gpr8;
 | 
			
		||||
    BitField<16, 4, Pred> pred;
 | 
			
		||||
    BitField<20, 8, Register> gpr20;
 | 
			
		||||
    BitField<20, 7, SubOp> sub_op;
 | 
			
		||||
    BitField<28, 8, Register> gpr28;
 | 
			
		||||
    BitField<36, 13, u64> imm36;
 | 
			
		||||
    BitField<39, 8, Register> gpr39;
 | 
			
		||||
 | 
			
		||||
    union {
 | 
			
		||||
        BitField<45, 1, u64> negate_b;
 | 
			
		||||
        BitField<46, 1, u64> abs_a;
 | 
			
		||||
        BitField<48, 1, u64> negate_a;
 | 
			
		||||
        BitField<49, 1, u64> abs_b;
 | 
			
		||||
        BitField<50, 1, u64> abs_d;
 | 
			
		||||
    } alu;
 | 
			
		||||
 | 
			
		||||
    union {
 | 
			
		||||
        BitField<48, 1, u64> negate_b;
 | 
			
		||||
        BitField<49, 1, u64> negate_c;
 | 
			
		||||
    } ffma;
 | 
			
		||||
 | 
			
		||||
    BitField<60, 1, u64> is_b_gpr;
 | 
			
		||||
    BitField<59, 1, u64> is_c_gpr;
 | 
			
		||||
 | 
			
		||||
    Attribute attribute;
 | 
			
		||||
    Uniform uniform;
 | 
			
		||||
 | 
			
		||||
    u64 hex;
 | 
			
		||||
};
 | 
			
		||||
static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
 | 
			
		||||
static_assert(std::is_standard_layout<Instruction>::value,
 | 
			
		||||
              "Structure does not have standard layout");
 | 
			
		||||
 | 
			
		||||
} // namespace Shader
 | 
			
		||||
} // namespace Tegra
 | 
			
		||||
@ -34,33 +34,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
 | 
			
		||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
 | 
			
		||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
 | 
			
		||||
 | 
			
		||||
enum class UniformBindings : GLuint { Common, VS, FS };
 | 
			
		||||
 | 
			
		||||
static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding,
 | 
			
		||||
                                         size_t expected_size) {
 | 
			
		||||
    GLuint ub_index = glGetUniformBlockIndex(shader, name);
 | 
			
		||||
    if (ub_index != GL_INVALID_INDEX) {
 | 
			
		||||
        GLint ub_size = 0;
 | 
			
		||||
        glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
 | 
			
		||||
        ASSERT_MSG(ub_size == expected_size,
 | 
			
		||||
                   "Uniform block size did not match! Got %d, expected %zu",
 | 
			
		||||
                   static_cast<int>(ub_size), expected_size);
 | 
			
		||||
        glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void SetShaderUniformBlockBindings(GLuint shader) {
 | 
			
		||||
    SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common,
 | 
			
		||||
                                 sizeof(RasterizerOpenGL::UniformData));
 | 
			
		||||
    SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS,
 | 
			
		||||
                                 sizeof(RasterizerOpenGL::VSUniformData));
 | 
			
		||||
    SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS,
 | 
			
		||||
                                 sizeof(RasterizerOpenGL::FSUniformData));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
RasterizerOpenGL::RasterizerOpenGL() {
 | 
			
		||||
    shader_dirty = true;
 | 
			
		||||
 | 
			
		||||
    has_ARB_buffer_storage = false;
 | 
			
		||||
    has_ARB_direct_state_access = false;
 | 
			
		||||
    has_ARB_separate_shader_objects = false;
 | 
			
		||||
@ -88,6 +62,8 @@ RasterizerOpenGL::RasterizerOpenGL() {
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported");
 | 
			
		||||
 | 
			
		||||
    // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
 | 
			
		||||
    state.clip_distance[0] = true;
 | 
			
		||||
 | 
			
		||||
@ -102,36 +78,31 @@ RasterizerOpenGL::RasterizerOpenGL() {
 | 
			
		||||
    state.draw.uniform_buffer = uniform_buffer.handle;
 | 
			
		||||
    state.Apply();
 | 
			
		||||
 | 
			
		||||
    glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW);
 | 
			
		||||
    glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle);
 | 
			
		||||
 | 
			
		||||
    uniform_block_data.dirty = true;
 | 
			
		||||
 | 
			
		||||
    // Create render framebuffer
 | 
			
		||||
    framebuffer.Create();
 | 
			
		||||
 | 
			
		||||
    if (has_ARB_separate_shader_objects) {
 | 
			
		||||
        hw_vao.Create();
 | 
			
		||||
        hw_vao_enabled_attributes.fill(false);
 | 
			
		||||
    hw_vao.Create();
 | 
			
		||||
    hw_vao_enabled_attributes.fill(false);
 | 
			
		||||
 | 
			
		||||
        stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
 | 
			
		||||
        stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
 | 
			
		||||
        state.draw.vertex_buffer = stream_buffer->GetHandle();
 | 
			
		||||
    stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
 | 
			
		||||
    stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
 | 
			
		||||
    state.draw.vertex_buffer = stream_buffer->GetHandle();
 | 
			
		||||
 | 
			
		||||
        pipeline.Create();
 | 
			
		||||
        state.draw.program_pipeline = pipeline.handle;
 | 
			
		||||
        state.draw.shader_program = 0;
 | 
			
		||||
        state.draw.vertex_array = hw_vao.handle;
 | 
			
		||||
        state.Apply();
 | 
			
		||||
    shader_program_manager = std::make_unique<GLShader::ProgramManager>();
 | 
			
		||||
 | 
			
		||||
        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle());
 | 
			
		||||
    state.draw.shader_program = 0;
 | 
			
		||||
    state.draw.vertex_array = hw_vao.handle;
 | 
			
		||||
    state.Apply();
 | 
			
		||||
 | 
			
		||||
        vs_uniform_buffer.Create();
 | 
			
		||||
        glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle);
 | 
			
		||||
        glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY);
 | 
			
		||||
        glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle);
 | 
			
		||||
    } else {
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle());
 | 
			
		||||
 | 
			
		||||
    for (unsigned index = 0; index < uniform_buffers.size(); ++index) {
 | 
			
		||||
        auto& buffer = uniform_buffers[index];
 | 
			
		||||
        buffer.Create();
 | 
			
		||||
        glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
 | 
			
		||||
        glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr,
 | 
			
		||||
                     GL_STREAM_COPY);
 | 
			
		||||
        glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    accelerate_draw = AccelDraw::Disabled;
 | 
			
		||||
@ -200,26 +171,74 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
 | 
			
		||||
    buffer_offset += data_size;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) {
 | 
			
		||||
    MICROPROFILE_SCOPE(OpenGL_VS);
 | 
			
		||||
    LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader.");
 | 
			
		||||
    glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle);
 | 
			
		||||
}
 | 
			
		||||
void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) {
 | 
			
		||||
    // Helper function for uploading uniform data
 | 
			
		||||
    const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
 | 
			
		||||
        if (has_ARB_direct_state_access) {
 | 
			
		||||
            glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
 | 
			
		||||
        } else {
 | 
			
		||||
            glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
 | 
			
		||||
            glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) {
 | 
			
		||||
    MICROPROFILE_SCOPE(OpenGL_FS);
 | 
			
		||||
    UNREACHABLE();
 | 
			
		||||
    auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
 | 
			
		||||
    ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
 | 
			
		||||
 | 
			
		||||
    for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
 | 
			
		||||
        ptr_pos += sizeof(GLShader::MaxwellUniformData);
 | 
			
		||||
 | 
			
		||||
        auto& shader_config = gpu.regs.shader_config[index];
 | 
			
		||||
        const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
 | 
			
		||||
 | 
			
		||||
        // VertexB program is always enabled, despite bit setting
 | 
			
		||||
        const bool is_enabled{shader_config.enable || program == Maxwell::ShaderProgram::VertexB};
 | 
			
		||||
 | 
			
		||||
        // Skip stages that are not enabled
 | 
			
		||||
        if (!is_enabled) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Upload uniform data as one UBO per stage
 | 
			
		||||
        const auto& stage = index - 1; // Stage indices are 0 - 5
 | 
			
		||||
        const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
 | 
			
		||||
        copy_buffer(uniform_buffers[stage].handle, ubo_offset,
 | 
			
		||||
                    sizeof(GLShader::MaxwellUniformData));
 | 
			
		||||
        GLShader::MaxwellUniformData* ub_ptr =
 | 
			
		||||
            reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]);
 | 
			
		||||
        ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]);
 | 
			
		||||
 | 
			
		||||
        // Fetch program code from memory
 | 
			
		||||
        GLShader::ProgramCode program_code;
 | 
			
		||||
        const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
 | 
			
		||||
        const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)};
 | 
			
		||||
        Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64));
 | 
			
		||||
        GLShader::ShaderSetup setup{std::move(program_code)};
 | 
			
		||||
 | 
			
		||||
        switch (program) {
 | 
			
		||||
        case Maxwell::ShaderProgram::VertexB: {
 | 
			
		||||
            GLShader::MaxwellVSConfig vs_config{setup};
 | 
			
		||||
            shader_program_manager->UseProgrammableVertexShader(vs_config, setup);
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
        case Maxwell::ShaderProgram::Fragment: {
 | 
			
		||||
            GLShader::MaxwellFSConfig fs_config{setup};
 | 
			
		||||
            shader_program_manager->UseProgrammableFragmentShader(fs_config, setup);
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
        default:
 | 
			
		||||
            LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index,
 | 
			
		||||
                         shader_config.enable.Value(), shader_config.offset);
 | 
			
		||||
            UNREACHABLE();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    shader_program_manager->UseTrivialGeometryShader();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
 | 
			
		||||
    if (!has_ARB_separate_shader_objects) {
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
 | 
			
		||||
    DrawArrays();
 | 
			
		||||
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -280,18 +299,6 @@ void RasterizerOpenGL::DrawArrays() {
 | 
			
		||||
    // Sync and bind the texture surfaces
 | 
			
		||||
    BindTextures();
 | 
			
		||||
 | 
			
		||||
    // Sync and bind the shader
 | 
			
		||||
    if (shader_dirty) {
 | 
			
		||||
        SetShader();
 | 
			
		||||
        shader_dirty = false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Sync the uniform data
 | 
			
		||||
    if (uniform_block_data.dirty) {
 | 
			
		||||
        glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data);
 | 
			
		||||
        uniform_block_data.dirty = false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
 | 
			
		||||
    // scissor test to prevent drawing outside of the framebuffer region
 | 
			
		||||
    state.scissor.enabled = true;
 | 
			
		||||
@ -311,7 +318,9 @@ void RasterizerOpenGL::DrawArrays() {
 | 
			
		||||
    if (is_indexed) {
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
    }
 | 
			
		||||
    buffer_size += sizeof(VSUniformData);
 | 
			
		||||
 | 
			
		||||
    // Uniform space for the 5 shader stages
 | 
			
		||||
    buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage;
 | 
			
		||||
 | 
			
		||||
    size_t ptr_pos = 0;
 | 
			
		||||
    u8* buffer_ptr;
 | 
			
		||||
@ -327,25 +336,12 @@ void RasterizerOpenGL::DrawArrays() {
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]),
 | 
			
		||||
                      buffer_offset + static_cast<GLintptr>(ptr_pos));
 | 
			
		||||
    const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
 | 
			
		||||
    ptr_pos += sizeof(VSUniformData);
 | 
			
		||||
    SetupShaders(buffer_ptr, buffer_offset, ptr_pos);
 | 
			
		||||
 | 
			
		||||
    stream_buffer->Unmap();
 | 
			
		||||
 | 
			
		||||
    const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
 | 
			
		||||
        if (has_ARB_direct_state_access) {
 | 
			
		||||
            glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
 | 
			
		||||
        } else {
 | 
			
		||||
            glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
 | 
			
		||||
            glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData));
 | 
			
		||||
 | 
			
		||||
    glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle);
 | 
			
		||||
    shader_program_manager->ApplyTo(state);
 | 
			
		||||
    state.Apply();
 | 
			
		||||
 | 
			
		||||
    if (is_indexed) {
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
@ -531,72 +527,6 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::SetShader() {
 | 
			
		||||
    // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to
 | 
			
		||||
    // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell
 | 
			
		||||
    // shaders.
 | 
			
		||||
 | 
			
		||||
    static constexpr char vertex_shader[] = R"(
 | 
			
		||||
#version 150 core
 | 
			
		||||
 | 
			
		||||
in vec2 vert_position;
 | 
			
		||||
in vec2 vert_tex_coord;
 | 
			
		||||
out vec2 frag_tex_coord;
 | 
			
		||||
 | 
			
		||||
void main() {
 | 
			
		||||
    // Multiply input position by the rotscale part of the matrix and then manually translate by
 | 
			
		||||
    // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
 | 
			
		||||
    // to `vec3(vert_position.xy, 1.0)`
 | 
			
		||||
    gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0);
 | 
			
		||||
    frag_tex_coord = vert_tex_coord;
 | 
			
		||||
}
 | 
			
		||||
)";
 | 
			
		||||
 | 
			
		||||
    static constexpr char fragment_shader[] = R"(
 | 
			
		||||
#version 150 core
 | 
			
		||||
 | 
			
		||||
in vec2 frag_tex_coord;
 | 
			
		||||
out vec4 color;
 | 
			
		||||
 | 
			
		||||
uniform sampler2D tex[32];
 | 
			
		||||
 | 
			
		||||
void main() {
 | 
			
		||||
    color = texture(tex[0], frag_tex_coord);
 | 
			
		||||
}
 | 
			
		||||
)";
 | 
			
		||||
 | 
			
		||||
    if (current_shader) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader.");
 | 
			
		||||
 | 
			
		||||
    current_shader = &test_shader;
 | 
			
		||||
    if (has_ARB_separate_shader_objects) {
 | 
			
		||||
        test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true);
 | 
			
		||||
        glActiveShaderProgram(pipeline.handle, test_shader.shader.handle);
 | 
			
		||||
    } else {
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    state.draw.shader_program = test_shader.shader.handle;
 | 
			
		||||
    state.Apply();
 | 
			
		||||
 | 
			
		||||
    for (u32 texture = 0; texture < texture_samplers.size(); ++texture) {
 | 
			
		||||
        // Set the texture samplers to correspond to different texture units
 | 
			
		||||
        std::string uniform_name = "tex[" + std::to_string(texture) + "]";
 | 
			
		||||
        GLint uniform_tex = glGetUniformLocation(test_shader.shader.handle, uniform_name.c_str());
 | 
			
		||||
        if (uniform_tex != -1) {
 | 
			
		||||
            glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (has_ARB_separate_shader_objects) {
 | 
			
		||||
        state.draw.shader_program = 0;
 | 
			
		||||
        state.Apply();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
 | 
			
		||||
                                               const Surface& depth_surface, bool has_stencil) {
 | 
			
		||||
    state.draw.draw_framebuffer = framebuffer.handle;
 | 
			
		||||
 | 
			
		||||
@ -15,10 +15,12 @@
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "common/hash.h"
 | 
			
		||||
#include "common/vector_math.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/rasterizer_interface.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_state.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
 | 
			
		||||
 | 
			
		||||
@ -45,7 +47,7 @@ public:
 | 
			
		||||
    /// OpenGL shader generated for a given Maxwell register state
 | 
			
		||||
    struct MaxwellShader {
 | 
			
		||||
        /// OpenGL shader resource
 | 
			
		||||
        OGLShader shader;
 | 
			
		||||
        OGLProgram shader;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    struct VertexShader {
 | 
			
		||||
@ -56,34 +58,6 @@ public:
 | 
			
		||||
        OGLShader shader;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
 | 
			
		||||
    // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
 | 
			
		||||
    //       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
 | 
			
		||||
    //       Not following that rule will cause problems on some AMD drivers.
 | 
			
		||||
    struct UniformData {};
 | 
			
		||||
 | 
			
		||||
    // static_assert(
 | 
			
		||||
    //    sizeof(UniformData) == 0x460,
 | 
			
		||||
    //    "The size of the UniformData structure has changed, update the structure in the shader");
 | 
			
		||||
    static_assert(sizeof(UniformData) < 16384,
 | 
			
		||||
                  "UniformData structure must be less than 16kb as per the OpenGL spec");
 | 
			
		||||
 | 
			
		||||
    struct VSUniformData {};
 | 
			
		||||
    // static_assert(
 | 
			
		||||
    //    sizeof(VSUniformData) == 1856,
 | 
			
		||||
    //    "The size of the VSUniformData structure has changed, update the structure in the
 | 
			
		||||
    //    shader");
 | 
			
		||||
    static_assert(sizeof(VSUniformData) < 16384,
 | 
			
		||||
                  "VSUniformData structure must be less than 16kb as per the OpenGL spec");
 | 
			
		||||
 | 
			
		||||
    struct FSUniformData {};
 | 
			
		||||
    // static_assert(
 | 
			
		||||
    //    sizeof(FSUniformData) == 1856,
 | 
			
		||||
    //    "The size of the FSUniformData structure has changed, update the structure in the
 | 
			
		||||
    //    shader");
 | 
			
		||||
    static_assert(sizeof(FSUniformData) < 16384,
 | 
			
		||||
                  "FSUniformData structure must be less than 16kb as per the OpenGL spec");
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    class SamplerInfo {
 | 
			
		||||
    public:
 | 
			
		||||
@ -122,9 +96,6 @@ private:
 | 
			
		||||
    /// Syncs the clip coefficients to match the guest state
 | 
			
		||||
    void SyncClipCoef();
 | 
			
		||||
 | 
			
		||||
    /// Sets the OpenGL shader in accordance with the current guest state
 | 
			
		||||
    void SetShader();
 | 
			
		||||
 | 
			
		||||
    /// Syncs the cull mode to match the guest state
 | 
			
		||||
    void SyncCullMode();
 | 
			
		||||
 | 
			
		||||
@ -152,23 +123,12 @@ private:
 | 
			
		||||
 | 
			
		||||
    RasterizerCacheOpenGL res_cache;
 | 
			
		||||
 | 
			
		||||
    /// Shader used for test renderering - to be removed once we have emulated shaders
 | 
			
		||||
    MaxwellShader test_shader{};
 | 
			
		||||
 | 
			
		||||
    const MaxwellShader* current_shader{};
 | 
			
		||||
    bool shader_dirty{};
 | 
			
		||||
 | 
			
		||||
    struct {
 | 
			
		||||
        UniformData data;
 | 
			
		||||
        bool dirty;
 | 
			
		||||
    } uniform_block_data = {};
 | 
			
		||||
 | 
			
		||||
    OGLPipeline pipeline;
 | 
			
		||||
    std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
 | 
			
		||||
    OGLVertexArray sw_vao;
 | 
			
		||||
    OGLVertexArray hw_vao;
 | 
			
		||||
    std::array<bool, 16> hw_vao_enabled_attributes;
 | 
			
		||||
 | 
			
		||||
    std::array<SamplerInfo, 32> texture_samplers;
 | 
			
		||||
    std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
 | 
			
		||||
    static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
 | 
			
		||||
    std::unique_ptr<OGLStreamBuffer> vertex_buffer;
 | 
			
		||||
    OGLBuffer uniform_buffer;
 | 
			
		||||
@ -182,19 +142,9 @@ private:
 | 
			
		||||
    void AnalyzeVertexArray(bool is_indexed);
 | 
			
		||||
    void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset);
 | 
			
		||||
 | 
			
		||||
    OGLBuffer vs_uniform_buffer;
 | 
			
		||||
    std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map;
 | 
			
		||||
    std::unordered_map<std::string, VertexShader> vs_shader_cache;
 | 
			
		||||
    OGLShader vs_default_shader;
 | 
			
		||||
    std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;
 | 
			
		||||
 | 
			
		||||
    void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset);
 | 
			
		||||
 | 
			
		||||
    OGLBuffer fs_uniform_buffer;
 | 
			
		||||
    std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map;
 | 
			
		||||
    std::unordered_map<std::string, FragmentShader> fs_shader_cache;
 | 
			
		||||
    OGLShader fs_default_shader;
 | 
			
		||||
 | 
			
		||||
    void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset);
 | 
			
		||||
    void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos);
 | 
			
		||||
 | 
			
		||||
    enum class AccelDraw { Disabled, Arrays, Indexed };
 | 
			
		||||
    AccelDraw accelerate_draw;
 | 
			
		||||
 | 
			
		||||
@ -818,7 +818,7 @@ void main() {
 | 
			
		||||
    color = texelFetch(tbo, tbo_offset).rabg;
 | 
			
		||||
}
 | 
			
		||||
)";
 | 
			
		||||
    d24s8_abgr_shader.Create(vs_source, nullptr, fs_source);
 | 
			
		||||
    d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source);
 | 
			
		||||
 | 
			
		||||
    OpenGLState state = OpenGLState::GetCurState();
 | 
			
		||||
    GLuint old_program = state.draw.shader_program;
 | 
			
		||||
 | 
			
		||||
@ -334,7 +334,7 @@ private:
 | 
			
		||||
    OGLVertexArray attributeless_vao;
 | 
			
		||||
    OGLBuffer d24s8_abgr_buffer;
 | 
			
		||||
    GLsizeiptr d24s8_abgr_buffer_size;
 | 
			
		||||
    OGLShader d24s8_abgr_shader;
 | 
			
		||||
    OGLProgram d24s8_abgr_shader;
 | 
			
		||||
    GLint d24s8_abgr_tbo_size_u_id;
 | 
			
		||||
    GLint d24s8_abgr_viewport_u_id;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -13,14 +13,16 @@
 | 
			
		||||
class OGLTexture : private NonCopyable {
 | 
			
		||||
public:
 | 
			
		||||
    OGLTexture() = default;
 | 
			
		||||
    OGLTexture(OGLTexture&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLTexture(OGLTexture&& o) : handle(std::exchange(o.handle, 0)) {}
 | 
			
		||||
 | 
			
		||||
    ~OGLTexture() {
 | 
			
		||||
        Release();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLTexture& operator=(OGLTexture&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
        Release();
 | 
			
		||||
        handle = std::exchange(o.handle, 0);
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -46,14 +48,16 @@ public:
 | 
			
		||||
class OGLSampler : private NonCopyable {
 | 
			
		||||
public:
 | 
			
		||||
    OGLSampler() = default;
 | 
			
		||||
    OGLSampler(OGLSampler&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLSampler(OGLSampler&& o) : handle(std::exchange(o.handle, 0)) {}
 | 
			
		||||
 | 
			
		||||
    ~OGLSampler() {
 | 
			
		||||
        Release();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLSampler& operator=(OGLSampler&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
        Release();
 | 
			
		||||
        handle = std::exchange(o.handle, 0);
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -79,25 +83,71 @@ public:
 | 
			
		||||
class OGLShader : private NonCopyable {
 | 
			
		||||
public:
 | 
			
		||||
    OGLShader() = default;
 | 
			
		||||
    OGLShader(OGLShader&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLShader(OGLShader&& o) : handle(std::exchange(o.handle, 0)) {}
 | 
			
		||||
 | 
			
		||||
    ~OGLShader() {
 | 
			
		||||
        Release();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLShader& operator=(OGLShader&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
        Release();
 | 
			
		||||
        handle = std::exchange(o.handle, 0);
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Creates a new internal OpenGL resource and stores the handle
 | 
			
		||||
    void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader,
 | 
			
		||||
                const std::vector<const char*>& feedback_vars = {},
 | 
			
		||||
                bool separable_program = false) {
 | 
			
		||||
    void Create(const char* source, GLenum type) {
 | 
			
		||||
        if (handle != 0)
 | 
			
		||||
            return;
 | 
			
		||||
        handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars,
 | 
			
		||||
                                       separable_program);
 | 
			
		||||
        if (source == nullptr)
 | 
			
		||||
            return;
 | 
			
		||||
        handle = GLShader::LoadShader(source, type);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void Release() {
 | 
			
		||||
        if (handle == 0)
 | 
			
		||||
            return;
 | 
			
		||||
        glDeleteShader(handle);
 | 
			
		||||
        handle = 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    GLuint handle = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class OGLProgram : private NonCopyable {
 | 
			
		||||
public:
 | 
			
		||||
    OGLProgram() = default;
 | 
			
		||||
 | 
			
		||||
    OGLProgram(OGLProgram&& o) : handle(std::exchange(o.handle, 0)) {}
 | 
			
		||||
 | 
			
		||||
    ~OGLProgram() {
 | 
			
		||||
        Release();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLProgram& operator=(OGLProgram&& o) {
 | 
			
		||||
        Release();
 | 
			
		||||
        handle = std::exchange(o.handle, 0);
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template <typename... T>
 | 
			
		||||
    void Create(bool separable_program, T... shaders) {
 | 
			
		||||
        if (handle != 0)
 | 
			
		||||
            return;
 | 
			
		||||
        handle = GLShader::LoadProgram(separable_program, shaders...);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Creates a new internal OpenGL resource and stores the handle
 | 
			
		||||
    void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
 | 
			
		||||
                          bool separable_program = false) {
 | 
			
		||||
        OGLShader vert, geo, frag;
 | 
			
		||||
        if (vert_shader)
 | 
			
		||||
            vert.Create(vert_shader, GL_VERTEX_SHADER);
 | 
			
		||||
        if (geo_shader)
 | 
			
		||||
            geo.Create(geo_shader, GL_GEOMETRY_SHADER);
 | 
			
		||||
        if (frag_shader)
 | 
			
		||||
            frag.Create(frag_shader, GL_FRAGMENT_SHADER);
 | 
			
		||||
        Create(separable_program, vert.handle, geo.handle, frag.handle);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Deletes the internal OpenGL resource
 | 
			
		||||
@ -148,14 +198,16 @@ public:
 | 
			
		||||
class OGLBuffer : private NonCopyable {
 | 
			
		||||
public:
 | 
			
		||||
    OGLBuffer() = default;
 | 
			
		||||
    OGLBuffer(OGLBuffer&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLBuffer(OGLBuffer&& o) : handle(std::exchange(o.handle, 0)) {}
 | 
			
		||||
 | 
			
		||||
    ~OGLBuffer() {
 | 
			
		||||
        Release();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLBuffer& operator=(OGLBuffer&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
        Release();
 | 
			
		||||
        handle = std::exchange(o.handle, 0);
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -214,14 +266,16 @@ public:
 | 
			
		||||
class OGLVertexArray : private NonCopyable {
 | 
			
		||||
public:
 | 
			
		||||
    OGLVertexArray() = default;
 | 
			
		||||
    OGLVertexArray(OGLVertexArray&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLVertexArray(OGLVertexArray&& o) : handle(std::exchange(o.handle, 0)) {}
 | 
			
		||||
 | 
			
		||||
    ~OGLVertexArray() {
 | 
			
		||||
        Release();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLVertexArray& operator=(OGLVertexArray&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
        Release();
 | 
			
		||||
        handle = std::exchange(o.handle, 0);
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -247,14 +301,16 @@ public:
 | 
			
		||||
class OGLFramebuffer : private NonCopyable {
 | 
			
		||||
public:
 | 
			
		||||
    OGLFramebuffer() = default;
 | 
			
		||||
    OGLFramebuffer(OGLFramebuffer&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLFramebuffer(OGLFramebuffer&& o) : handle(std::exchange(o.handle, 0)) {}
 | 
			
		||||
 | 
			
		||||
    ~OGLFramebuffer() {
 | 
			
		||||
        Release();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    OGLFramebuffer& operator=(OGLFramebuffer&& o) {
 | 
			
		||||
        std::swap(handle, o.handle);
 | 
			
		||||
        Release();
 | 
			
		||||
        handle = std::exchange(o.handle, 0);
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -2,57 +2,499 @@
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include <map>
 | 
			
		||||
#include <set>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <queue>
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "video_core/engines/shader_bytecode.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 | 
			
		||||
 | 
			
		||||
namespace Maxwell3D {
 | 
			
		||||
namespace Shader {
 | 
			
		||||
namespace GLShader {
 | 
			
		||||
namespace Decompiler {
 | 
			
		||||
 | 
			
		||||
using Tegra::Shader::Attribute;
 | 
			
		||||
using Tegra::Shader::Instruction;
 | 
			
		||||
using Tegra::Shader::OpCode;
 | 
			
		||||
using Tegra::Shader::Register;
 | 
			
		||||
using Tegra::Shader::SubOp;
 | 
			
		||||
using Tegra::Shader::Uniform;
 | 
			
		||||
 | 
			
		||||
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
 | 
			
		||||
 | 
			
		||||
class Impl {
 | 
			
		||||
class DecompileFail : public std::runtime_error {
 | 
			
		||||
public:
 | 
			
		||||
    Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
 | 
			
		||||
         const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset,
 | 
			
		||||
         const std::function<std::string(u32)>& inputreg_getter,
 | 
			
		||||
         const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul,
 | 
			
		||||
         const std::string& emit_cb, const std::string& setemit_cb)
 | 
			
		||||
        : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset),
 | 
			
		||||
          inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter),
 | 
			
		||||
          sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {}
 | 
			
		||||
    using std::runtime_error::runtime_error;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
    std::string Decompile() {
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
        return {};
 | 
			
		||||
/// Describes the behaviour of code path of a given entry point and a return point.
 | 
			
		||||
enum class ExitMethod {
 | 
			
		||||
    Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
 | 
			
		||||
    AlwaysReturn, ///< All code paths reach the return point.
 | 
			
		||||
    Conditional,  ///< Code path reaches the return point or an END instruction conditionally.
 | 
			
		||||
    AlwaysEnd,    ///< All code paths reach a END instruction.
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction.
 | 
			
		||||
struct Subroutine {
 | 
			
		||||
    /// Generates a name suitable for GLSL source code.
 | 
			
		||||
    std::string GetName() const {
 | 
			
		||||
        return "sub_" + std::to_string(begin) + "_" + std::to_string(end);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    u32 begin;              ///< Entry point of the subroutine.
 | 
			
		||||
    u32 end;                ///< Return point of the subroutine.
 | 
			
		||||
    ExitMethod exit_method; ///< Exit method of the subroutine.
 | 
			
		||||
    std::set<u32> labels;   ///< Addresses refereced by JMP instructions.
 | 
			
		||||
 | 
			
		||||
    bool operator<(const Subroutine& rhs) const {
 | 
			
		||||
        return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/// Analyzes shader code and produces a set of subroutines.
 | 
			
		||||
class ControlFlowAnalyzer {
 | 
			
		||||
public:
 | 
			
		||||
    ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset)
 | 
			
		||||
        : program_code(program_code) {
 | 
			
		||||
 | 
			
		||||
        // Recursively finds all subroutines.
 | 
			
		||||
        const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END);
 | 
			
		||||
        if (program_main.exit_method != ExitMethod::AlwaysEnd)
 | 
			
		||||
            throw DecompileFail("Program does not always end");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::set<Subroutine> GetSubroutines() {
 | 
			
		||||
        return std::move(subroutines);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code;
 | 
			
		||||
    const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data;
 | 
			
		||||
    u32 main_offset;
 | 
			
		||||
    const std::function<std::string(u32)>& inputreg_getter;
 | 
			
		||||
    const std::function<std::string(u32)>& outputreg_getter;
 | 
			
		||||
    bool sanitize_mul;
 | 
			
		||||
    const std::string& emit_cb;
 | 
			
		||||
    const std::string& setemit_cb;
 | 
			
		||||
    const ProgramCode& program_code;
 | 
			
		||||
    std::set<Subroutine> subroutines;
 | 
			
		||||
    std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
 | 
			
		||||
 | 
			
		||||
    /// Adds and analyzes a new subroutine if it is not added yet.
 | 
			
		||||
    const Subroutine& AddSubroutine(u32 begin, u32 end) {
 | 
			
		||||
        auto iter = subroutines.find(Subroutine{begin, end});
 | 
			
		||||
        if (iter != subroutines.end())
 | 
			
		||||
            return *iter;
 | 
			
		||||
 | 
			
		||||
        Subroutine subroutine{begin, end};
 | 
			
		||||
        subroutine.exit_method = Scan(begin, end, subroutine.labels);
 | 
			
		||||
        if (subroutine.exit_method == ExitMethod::Undetermined)
 | 
			
		||||
            throw DecompileFail("Recursive function detected");
 | 
			
		||||
        return *subroutines.insert(std::move(subroutine)).first;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Scans a range of code for labels and determines the exit method.
 | 
			
		||||
    ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
 | 
			
		||||
        auto [iter, inserted] =
 | 
			
		||||
            exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
 | 
			
		||||
        ExitMethod& exit_method = iter->second;
 | 
			
		||||
        if (!inserted)
 | 
			
		||||
            return exit_method;
 | 
			
		||||
 | 
			
		||||
        for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
 | 
			
		||||
            const Instruction instr = {program_code[offset]};
 | 
			
		||||
            switch (instr.opcode.EffectiveOpCode()) {
 | 
			
		||||
            case OpCode::Id::EXIT: {
 | 
			
		||||
                return exit_method = ExitMethod::AlwaysEnd;
 | 
			
		||||
            }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return exit_method = ExitMethod::AlwaysReturn;
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
 | 
			
		||||
                             const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data,
 | 
			
		||||
                             u32 main_offset,
 | 
			
		||||
                             const std::function<std::string(u32)>& inputreg_getter,
 | 
			
		||||
                             const std::function<std::string(u32)>& outputreg_getter,
 | 
			
		||||
                             bool sanitize_mul, const std::string& emit_cb,
 | 
			
		||||
                             const std::string& setemit_cb) {
 | 
			
		||||
    Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter,
 | 
			
		||||
              sanitize_mul, emit_cb, setemit_cb);
 | 
			
		||||
    return impl.Decompile();
 | 
			
		||||
class ShaderWriter {
 | 
			
		||||
public:
 | 
			
		||||
    void AddLine(const std::string& text) {
 | 
			
		||||
        DEBUG_ASSERT(scope >= 0);
 | 
			
		||||
        if (!text.empty()) {
 | 
			
		||||
            shader_source += std::string(static_cast<size_t>(scope) * 4, ' ');
 | 
			
		||||
        }
 | 
			
		||||
        shader_source += text + '\n';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string GetResult() {
 | 
			
		||||
        return std::move(shader_source);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int scope = 0;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    std::string shader_source;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class GLSLGenerator {
 | 
			
		||||
public:
 | 
			
		||||
    GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
 | 
			
		||||
                  u32 main_offset, Maxwell3D::Regs::ShaderStage stage)
 | 
			
		||||
        : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
 | 
			
		||||
          stage(stage) {
 | 
			
		||||
 | 
			
		||||
        Generate();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string GetShaderCode() {
 | 
			
		||||
        return declarations.GetResult() + shader.GetResult();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    /// Gets the Subroutine object corresponding to the specified address.
 | 
			
		||||
    const Subroutine& GetSubroutine(u32 begin, u32 end) const {
 | 
			
		||||
        auto iter = subroutines.find(Subroutine{begin, end});
 | 
			
		||||
        ASSERT(iter != subroutines.end());
 | 
			
		||||
        return *iter;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Generates code representing an input attribute register.
 | 
			
		||||
    std::string GetInputAttribute(Attribute::Index attribute) {
 | 
			
		||||
        declr_input_attribute.insert(attribute);
 | 
			
		||||
 | 
			
		||||
        const u32 index{static_cast<u32>(attribute) -
 | 
			
		||||
                        static_cast<u32>(Attribute::Index::Attribute_0)};
 | 
			
		||||
        if (attribute >= Attribute::Index::Attribute_0) {
 | 
			
		||||
            return "input_attribute_" + std::to_string(index);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index);
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Generates code representing an output attribute register.
 | 
			
		||||
    std::string GetOutputAttribute(Attribute::Index attribute) {
 | 
			
		||||
        switch (attribute) {
 | 
			
		||||
        case Attribute::Index::Position:
 | 
			
		||||
            return "gl_Position";
 | 
			
		||||
        default:
 | 
			
		||||
            const u32 index{static_cast<u32>(attribute) -
 | 
			
		||||
                            static_cast<u32>(Attribute::Index::Attribute_0)};
 | 
			
		||||
            if (attribute >= Attribute::Index::Attribute_0) {
 | 
			
		||||
                declr_output_attribute.insert(attribute);
 | 
			
		||||
                return "output_attribute_" + std::to_string(index);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index);
 | 
			
		||||
            UNREACHABLE();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Generates code representing a temporary (GPR) register.
 | 
			
		||||
    std::string GetRegister(const Register& reg) {
 | 
			
		||||
        return *declr_register.insert("register_" + std::to_string(reg)).first;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Generates code representing a uniform (C buffer) register.
 | 
			
		||||
    std::string GetUniform(const Uniform& reg) const {
 | 
			
		||||
        std::string index = std::to_string(reg.index);
 | 
			
		||||
        return "uniform_" + index + "[" + std::to_string(reg.offset >> 2) + "][" +
 | 
			
		||||
               std::to_string(reg.offset & 3) + "]";
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Adds code that calls a subroutine.
 | 
			
		||||
     * @param subroutine the subroutine to call.
 | 
			
		||||
     */
 | 
			
		||||
    void CallSubroutine(const Subroutine& subroutine) {
 | 
			
		||||
        if (subroutine.exit_method == ExitMethod::AlwaysEnd) {
 | 
			
		||||
            shader.AddLine(subroutine.GetName() + "();");
 | 
			
		||||
            shader.AddLine("return true;");
 | 
			
		||||
        } else if (subroutine.exit_method == ExitMethod::Conditional) {
 | 
			
		||||
            shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }");
 | 
			
		||||
        } else {
 | 
			
		||||
            shader.AddLine(subroutine.GetName() + "();");
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Writes code that does an assignment operation.
 | 
			
		||||
     * @param reg the destination register code.
 | 
			
		||||
     * @param value the code representing the value to assign.
 | 
			
		||||
     */
 | 
			
		||||
    void SetDest(u64 elem, const std::string& reg, const std::string& value,
 | 
			
		||||
                 u64 dest_num_components, u64 value_num_components) {
 | 
			
		||||
        std::string swizzle = ".";
 | 
			
		||||
        swizzle += "xyzw"[elem];
 | 
			
		||||
 | 
			
		||||
        std::string dest = reg + (dest_num_components != 1 ? swizzle : "");
 | 
			
		||||
        std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : "");
 | 
			
		||||
 | 
			
		||||
        shader.AddLine(dest + " = " + src + ";");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Compiles a single instruction from Tegra to GLSL.
 | 
			
		||||
     * @param offset the offset of the Tegra shader instruction.
 | 
			
		||||
     * @return the offset of the next instruction to execute. Usually it is the current offset
 | 
			
		||||
     * + 1. If the current instruction always terminates the program, returns PROGRAM_END.
 | 
			
		||||
     */
 | 
			
		||||
    u32 CompileInstr(u32 offset) {
 | 
			
		||||
        const Instruction instr = {program_code[offset]};
 | 
			
		||||
 | 
			
		||||
        shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name);
 | 
			
		||||
 | 
			
		||||
        switch (OpCode::GetInfo(instr.opcode).type) {
 | 
			
		||||
        case OpCode::Type::Arithmetic: {
 | 
			
		||||
            ASSERT(!instr.alu.abs_d);
 | 
			
		||||
 | 
			
		||||
            std::string dest = GetRegister(instr.gpr0);
 | 
			
		||||
            std::string op_a = instr.alu.negate_a ? "-" : "";
 | 
			
		||||
            op_a += GetRegister(instr.gpr8);
 | 
			
		||||
            if (instr.alu.abs_a) {
 | 
			
		||||
                op_a = "abs(" + op_a + ")";
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            std::string op_b = instr.alu.negate_b ? "-" : "";
 | 
			
		||||
            if (instr.is_b_gpr) {
 | 
			
		||||
                op_b += GetRegister(instr.gpr20);
 | 
			
		||||
            } else {
 | 
			
		||||
                op_b += GetUniform(instr.uniform);
 | 
			
		||||
            }
 | 
			
		||||
            if (instr.alu.abs_b) {
 | 
			
		||||
                op_b = "abs(" + op_b + ")";
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            switch (instr.opcode.EffectiveOpCode()) {
 | 
			
		||||
            case OpCode::Id::FMUL_C:
 | 
			
		||||
            case OpCode::Id::FMUL_R: {
 | 
			
		||||
                SetDest(0, dest, op_a + " * " + op_b, 1, 1);
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            case OpCode::Id::FADD_C:
 | 
			
		||||
            case OpCode::Id::FADD_R: {
 | 
			
		||||
                SetDest(0, dest, op_a + " + " + op_b, 1, 1);
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            default: {
 | 
			
		||||
                LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
 | 
			
		||||
                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
 | 
			
		||||
                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
 | 
			
		||||
                throw DecompileFail("Unhandled instruction");
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
        case OpCode::Type::Ffma: {
 | 
			
		||||
            ASSERT_MSG(!instr.ffma.negate_b, "untested");
 | 
			
		||||
            ASSERT_MSG(!instr.ffma.negate_c, "untested");
 | 
			
		||||
 | 
			
		||||
            std::string dest = GetRegister(instr.gpr0);
 | 
			
		||||
            std::string op_a = GetRegister(instr.gpr8);
 | 
			
		||||
 | 
			
		||||
            std::string op_b = instr.ffma.negate_b ? "-" : "";
 | 
			
		||||
            op_b += GetUniform(instr.uniform);
 | 
			
		||||
 | 
			
		||||
            std::string op_c = instr.ffma.negate_c ? "-" : "";
 | 
			
		||||
            op_c += GetRegister(instr.gpr39);
 | 
			
		||||
 | 
			
		||||
            switch (instr.opcode.EffectiveOpCode()) {
 | 
			
		||||
            case OpCode::Id::FFMA_CR: {
 | 
			
		||||
                SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            default: {
 | 
			
		||||
                LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x",
 | 
			
		||||
                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
 | 
			
		||||
                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
 | 
			
		||||
                throw DecompileFail("Unhandled instruction");
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
        case OpCode::Type::Memory: {
 | 
			
		||||
            std::string gpr0 = GetRegister(instr.gpr0);
 | 
			
		||||
            const Attribute::Index attribute = instr.attribute.fmt20.index;
 | 
			
		||||
 | 
			
		||||
            switch (instr.opcode.EffectiveOpCode()) {
 | 
			
		||||
            case OpCode::Id::LD_A: {
 | 
			
		||||
                ASSERT(instr.attribute.fmt20.size == 0);
 | 
			
		||||
                SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4);
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            case OpCode::Id::ST_A: {
 | 
			
		||||
                ASSERT(instr.attribute.fmt20.size == 0);
 | 
			
		||||
                SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1);
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            default: {
 | 
			
		||||
                LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x",
 | 
			
		||||
                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
 | 
			
		||||
                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
 | 
			
		||||
                throw DecompileFail("Unhandled instruction");
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        default: {
 | 
			
		||||
            switch (instr.opcode.EffectiveOpCode()) {
 | 
			
		||||
            case OpCode::Id::EXIT: {
 | 
			
		||||
                shader.AddLine("return true;");
 | 
			
		||||
                offset = PROGRAM_END - 1;
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            default: {
 | 
			
		||||
                LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
 | 
			
		||||
                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
 | 
			
		||||
                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
 | 
			
		||||
                throw DecompileFail("Unhandled instruction");
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return offset + 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Compiles a range of instructions from Tegra to GLSL.
 | 
			
		||||
     * @param begin the offset of the starting instruction.
 | 
			
		||||
     * @param end the offset where the compilation should stop (exclusive).
 | 
			
		||||
     * @return the offset of the next instruction to compile. PROGRAM_END if the program
 | 
			
		||||
     * terminates.
 | 
			
		||||
     */
 | 
			
		||||
    u32 CompileRange(u32 begin, u32 end) {
 | 
			
		||||
        u32 program_counter;
 | 
			
		||||
        for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) {
 | 
			
		||||
            program_counter = CompileInstr(program_counter);
 | 
			
		||||
        }
 | 
			
		||||
        return program_counter;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void Generate() {
 | 
			
		||||
        // Add declarations for all subroutines
 | 
			
		||||
        for (const auto& subroutine : subroutines) {
 | 
			
		||||
            shader.AddLine("bool " + subroutine.GetName() + "();");
 | 
			
		||||
        }
 | 
			
		||||
        shader.AddLine("");
 | 
			
		||||
 | 
			
		||||
        // Add the main entry point
 | 
			
		||||
        shader.AddLine("bool exec_shader() {");
 | 
			
		||||
        ++shader.scope;
 | 
			
		||||
        CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
 | 
			
		||||
        --shader.scope;
 | 
			
		||||
        shader.AddLine("}\n");
 | 
			
		||||
 | 
			
		||||
        // Add definitions for all subroutines
 | 
			
		||||
        for (const auto& subroutine : subroutines) {
 | 
			
		||||
            std::set<u32> labels = subroutine.labels;
 | 
			
		||||
 | 
			
		||||
            shader.AddLine("bool " + subroutine.GetName() + "() {");
 | 
			
		||||
            ++shader.scope;
 | 
			
		||||
 | 
			
		||||
            if (labels.empty()) {
 | 
			
		||||
                if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) {
 | 
			
		||||
                    shader.AddLine("return false;");
 | 
			
		||||
                }
 | 
			
		||||
            } else {
 | 
			
		||||
                labels.insert(subroutine.begin);
 | 
			
		||||
                shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;");
 | 
			
		||||
                shader.AddLine("while (true) {");
 | 
			
		||||
                ++shader.scope;
 | 
			
		||||
 | 
			
		||||
                shader.AddLine("switch (jmp_to) {");
 | 
			
		||||
 | 
			
		||||
                for (auto label : labels) {
 | 
			
		||||
                    shader.AddLine("case " + std::to_string(label) + "u: {");
 | 
			
		||||
                    ++shader.scope;
 | 
			
		||||
 | 
			
		||||
                    auto next_it = labels.lower_bound(label + 1);
 | 
			
		||||
                    u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
 | 
			
		||||
 | 
			
		||||
                    u32 compile_end = CompileRange(label, next_label);
 | 
			
		||||
                    if (compile_end > next_label && compile_end != PROGRAM_END) {
 | 
			
		||||
                        // This happens only when there is a label inside a IF/LOOP block
 | 
			
		||||
                        shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }");
 | 
			
		||||
                        labels.emplace(compile_end);
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    --shader.scope;
 | 
			
		||||
                    shader.AddLine("}");
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                shader.AddLine("default: return false;");
 | 
			
		||||
                shader.AddLine("}");
 | 
			
		||||
 | 
			
		||||
                --shader.scope;
 | 
			
		||||
                shader.AddLine("}");
 | 
			
		||||
 | 
			
		||||
                shader.AddLine("return false;");
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            --shader.scope;
 | 
			
		||||
            shader.AddLine("}\n");
 | 
			
		||||
 | 
			
		||||
            DEBUG_ASSERT(shader.scope == 0);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        GenerateDeclarations();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Add declarations for registers
 | 
			
		||||
    void GenerateDeclarations() {
 | 
			
		||||
        for (const auto& reg : declr_register) {
 | 
			
		||||
            declarations.AddLine("float " + reg + " = 0.0;");
 | 
			
		||||
        }
 | 
			
		||||
        declarations.AddLine("");
 | 
			
		||||
 | 
			
		||||
        for (const auto& index : declr_input_attribute) {
 | 
			
		||||
            // TODO(bunnei): Use proper number of elements for these
 | 
			
		||||
            declarations.AddLine("layout(location = " +
 | 
			
		||||
                                 std::to_string(static_cast<u32>(index) -
 | 
			
		||||
                                                static_cast<u32>(Attribute::Index::Attribute_0)) +
 | 
			
		||||
                                 ") in vec4 " + GetInputAttribute(index) + ";");
 | 
			
		||||
        }
 | 
			
		||||
        declarations.AddLine("");
 | 
			
		||||
 | 
			
		||||
        for (const auto& index : declr_output_attribute) {
 | 
			
		||||
            // TODO(bunnei): Use proper number of elements for these
 | 
			
		||||
            declarations.AddLine("layout(location = " +
 | 
			
		||||
                                 std::to_string(static_cast<u32>(index) -
 | 
			
		||||
                                                static_cast<u32>(Attribute::Index::Attribute_0)) +
 | 
			
		||||
                                 ") out vec4 " + GetOutputAttribute(index) + ";");
 | 
			
		||||
        }
 | 
			
		||||
        declarations.AddLine("");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    const std::set<Subroutine>& subroutines;
 | 
			
		||||
    const ProgramCode& program_code;
 | 
			
		||||
    const u32 main_offset;
 | 
			
		||||
    Maxwell3D::Regs::ShaderStage stage;
 | 
			
		||||
 | 
			
		||||
    ShaderWriter shader;
 | 
			
		||||
    ShaderWriter declarations;
 | 
			
		||||
 | 
			
		||||
    // Declarations
 | 
			
		||||
    std::set<std::string> declr_register;
 | 
			
		||||
    std::set<Attribute::Index> declr_input_attribute;
 | 
			
		||||
    std::set<Attribute::Index> declr_output_attribute;
 | 
			
		||||
}; // namespace Decompiler
 | 
			
		||||
 | 
			
		||||
std::string GetCommonDeclarations() {
 | 
			
		||||
    return "bool exec_shader();";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
 | 
			
		||||
                                              Maxwell3D::Regs::ShaderStage stage) {
 | 
			
		||||
    try {
 | 
			
		||||
        auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines();
 | 
			
		||||
        GLSLGenerator generator(subroutines, program_code, main_offset, stage);
 | 
			
		||||
        return generator.GetShaderCode();
 | 
			
		||||
    } catch (const DecompileFail& exception) {
 | 
			
		||||
        LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what());
 | 
			
		||||
    }
 | 
			
		||||
    return boost::none;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace Decompiler
 | 
			
		||||
} // namespace Shader
 | 
			
		||||
} // namespace Maxwell3D
 | 
			
		||||
} // namespace GLShader
 | 
			
		||||
 | 
			
		||||
@ -5,23 +5,20 @@
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <functional>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <boost/optional.hpp>
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
 | 
			
		||||
 | 
			
		||||
namespace Maxwell3D {
 | 
			
		||||
namespace Shader {
 | 
			
		||||
namespace GLShader {
 | 
			
		||||
namespace Decompiler {
 | 
			
		||||
 | 
			
		||||
constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000};
 | 
			
		||||
constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000};
 | 
			
		||||
using Tegra::Engines::Maxwell3D;
 | 
			
		||||
 | 
			
		||||
std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code,
 | 
			
		||||
                             const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data,
 | 
			
		||||
                             u32 main_offset,
 | 
			
		||||
                             const std::function<std::string(u32)>& inputreg_getter,
 | 
			
		||||
                             const std::function<std::string(u32)>& outputreg_getter,
 | 
			
		||||
                             bool sanitize_mul, const std::string& emit_cb = "",
 | 
			
		||||
                             const std::string& setemit_cb = "");
 | 
			
		||||
std::string GetCommonDeclarations();
 | 
			
		||||
 | 
			
		||||
boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
 | 
			
		||||
                                              Maxwell3D::Regs::ShaderStage stage);
 | 
			
		||||
 | 
			
		||||
} // namespace Decompiler
 | 
			
		||||
} // namespace Shader
 | 
			
		||||
} // namespace Maxwell3D
 | 
			
		||||
} // namespace GLShader
 | 
			
		||||
 | 
			
		||||
@ -7,12 +7,12 @@
 | 
			
		||||
 | 
			
		||||
namespace GLShader {
 | 
			
		||||
 | 
			
		||||
std::string GenerateVertexShader(const MaxwellVSConfig& config) {
 | 
			
		||||
std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) {
 | 
			
		||||
    UNREACHABLE();
 | 
			
		||||
    return {};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string GenerateFragmentShader(const MaxwellFSConfig& config) {
 | 
			
		||||
std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) {
 | 
			
		||||
    UNREACHABLE();
 | 
			
		||||
    return {};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -4,46 +4,67 @@
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <type_traits>
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "common/hash.h"
 | 
			
		||||
 | 
			
		||||
namespace GLShader {
 | 
			
		||||
 | 
			
		||||
enum Attributes {
 | 
			
		||||
    ATTRIBUTE_POSITION,
 | 
			
		||||
    ATTRIBUTE_COLOR,
 | 
			
		||||
    ATTRIBUTE_TEXCOORD0,
 | 
			
		||||
    ATTRIBUTE_TEXCOORD1,
 | 
			
		||||
    ATTRIBUTE_TEXCOORD2,
 | 
			
		||||
    ATTRIBUTE_TEXCOORD0_W,
 | 
			
		||||
    ATTRIBUTE_NORMQUAT,
 | 
			
		||||
    ATTRIBUTE_VIEW,
 | 
			
		||||
constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
 | 
			
		||||
 | 
			
		||||
using ProgramCode = std::array<u64, MAX_PROGRAM_CODE_LENGTH>;
 | 
			
		||||
 | 
			
		||||
struct ShaderSetup {
 | 
			
		||||
    ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {}
 | 
			
		||||
 | 
			
		||||
    ProgramCode program_code;
 | 
			
		||||
    bool program_code_hash_dirty = true;
 | 
			
		||||
 | 
			
		||||
    u64 GetProgramCodeHash() {
 | 
			
		||||
        if (program_code_hash_dirty) {
 | 
			
		||||
            program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
 | 
			
		||||
            program_code_hash_dirty = false;
 | 
			
		||||
        }
 | 
			
		||||
        return program_code_hash;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    u64 program_code_hash{};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct MaxwellShaderConfigCommon {
 | 
			
		||||
    explicit MaxwellShaderConfigCommon(){};
 | 
			
		||||
    void Init(ShaderSetup& setup) {
 | 
			
		||||
        program_hash = setup.GetProgramCodeHash();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    u64 program_hash;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct MaxwellVSConfig : MaxwellShaderConfigCommon {
 | 
			
		||||
    explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {}
 | 
			
		||||
 | 
			
		||||
    bool operator==(const MaxwellVSConfig& o) const {
 | 
			
		||||
        return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0;
 | 
			
		||||
    };
 | 
			
		||||
struct MaxwellVSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> {
 | 
			
		||||
    explicit MaxwellVSConfig(ShaderSetup& setup) {
 | 
			
		||||
        state.Init(setup);
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct MaxwellFSConfig : MaxwellShaderConfigCommon {
 | 
			
		||||
    explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {}
 | 
			
		||||
 | 
			
		||||
    bool operator==(const MaxwellFSConfig& o) const {
 | 
			
		||||
        return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0;
 | 
			
		||||
    };
 | 
			
		||||
struct MaxwellFSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> {
 | 
			
		||||
    explicit MaxwellFSConfig(ShaderSetup& setup) {
 | 
			
		||||
        state.Init(setup);
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
std::string GenerateVertexShader(const MaxwellVSConfig& config);
 | 
			
		||||
std::string GenerateFragmentShader(const MaxwellFSConfig& config);
 | 
			
		||||
/**
 | 
			
		||||
 * Generates the GLSL vertex shader program source code for the given VS program
 | 
			
		||||
 * @returns String of the shader source code
 | 
			
		||||
 */
 | 
			
		||||
std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Generates the GLSL fragment shader program source code for the given FS program
 | 
			
		||||
 * @returns String of the shader source code
 | 
			
		||||
 */
 | 
			
		||||
std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config);
 | 
			
		||||
 | 
			
		||||
} // namespace GLShader
 | 
			
		||||
 | 
			
		||||
@ -52,14 +73,14 @@ namespace std {
 | 
			
		||||
template <>
 | 
			
		||||
struct hash<GLShader::MaxwellVSConfig> {
 | 
			
		||||
    size_t operator()(const GLShader::MaxwellVSConfig& k) const {
 | 
			
		||||
        return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig));
 | 
			
		||||
        return k.Hash();
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
struct hash<GLShader::MaxwellFSConfig> {
 | 
			
		||||
    size_t operator()(const GLShader::MaxwellFSConfig& k) const {
 | 
			
		||||
        return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig));
 | 
			
		||||
        return k.Hash();
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										65
									
								
								src/video_core/renderer_opengl/gl_shader_manager.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								src/video_core/renderer_opengl/gl_shader_manager.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,65 @@
 | 
			
		||||
// Copyright 2018 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "core/hle/kernel/process.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
 | 
			
		||||
 | 
			
		||||
namespace GLShader {
 | 
			
		||||
 | 
			
		||||
namespace Impl {
 | 
			
		||||
void SetShaderUniformBlockBinding(GLuint shader, const char* name,
 | 
			
		||||
                                  Maxwell3D::Regs::ShaderStage binding, size_t expected_size) {
 | 
			
		||||
    GLuint ub_index = glGetUniformBlockIndex(shader, name);
 | 
			
		||||
    if (ub_index != GL_INVALID_INDEX) {
 | 
			
		||||
        GLint ub_size = 0;
 | 
			
		||||
        glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
 | 
			
		||||
        ASSERT_MSG(ub_size == expected_size,
 | 
			
		||||
                   "Uniform block size did not match! Got %d, expected %zu",
 | 
			
		||||
                   static_cast<int>(ub_size), expected_size);
 | 
			
		||||
        glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SetShaderUniformBlockBindings(GLuint shader) {
 | 
			
		||||
    SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex,
 | 
			
		||||
                                 sizeof(MaxwellUniformData));
 | 
			
		||||
    SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry,
 | 
			
		||||
                                 sizeof(MaxwellUniformData));
 | 
			
		||||
    SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment,
 | 
			
		||||
                                 sizeof(MaxwellUniformData));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SetShaderSamplerBindings(GLuint shader) {
 | 
			
		||||
    OpenGLState cur_state = OpenGLState::GetCurState();
 | 
			
		||||
    GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
 | 
			
		||||
    cur_state.Apply();
 | 
			
		||||
 | 
			
		||||
    // Set the texture samplers to correspond to different texture units
 | 
			
		||||
    for (u32 texture = 0; texture < NumTextureSamplers; ++texture) {
 | 
			
		||||
        // Set the texture samplers to correspond to different texture units
 | 
			
		||||
        std::string uniform_name = "tex[" + std::to_string(texture) + "]";
 | 
			
		||||
        GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str());
 | 
			
		||||
        if (uniform_tex != -1) {
 | 
			
		||||
            glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    cur_state.draw.shader_program = old_program;
 | 
			
		||||
    cur_state.Apply();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace Impl
 | 
			
		||||
 | 
			
		||||
void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
 | 
			
		||||
    const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
 | 
			
		||||
    for (unsigned index = 0; index < shader_stage.const_buffers.size(); ++index) {
 | 
			
		||||
        const auto& const_buffer = shader_stage.const_buffers[index];
 | 
			
		||||
        const VAddr vaddr = memory_manager->PhysicalToVirtualAddress(const_buffer.address);
 | 
			
		||||
        Memory::ReadBlock(vaddr, const_buffers[index].data(), sizeof(ConstBuffer));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace GLShader
 | 
			
		||||
							
								
								
									
										151
									
								
								src/video_core/renderer_opengl/gl_shader_manager.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										151
									
								
								src/video_core/renderer_opengl/gl_shader_manager.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,151 @@
 | 
			
		||||
// Copyright 2018 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <tuple>
 | 
			
		||||
#include <unordered_map>
 | 
			
		||||
#include <boost/functional/hash.hpp>
 | 
			
		||||
#include <glad/glad.h>
 | 
			
		||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
 | 
			
		||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
 | 
			
		||||
 | 
			
		||||
namespace GLShader {
 | 
			
		||||
 | 
			
		||||
/// Number of OpenGL texture samplers that can be used in the fragment shader
 | 
			
		||||
static constexpr size_t NumTextureSamplers = 32;
 | 
			
		||||
 | 
			
		||||
using Tegra::Engines::Maxwell3D;
 | 
			
		||||
 | 
			
		||||
namespace Impl {
 | 
			
		||||
void SetShaderUniformBlockBindings(GLuint shader);
 | 
			
		||||
void SetShaderSamplerBindings(GLuint shader);
 | 
			
		||||
} // namespace Impl
 | 
			
		||||
 | 
			
		||||
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
 | 
			
		||||
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
 | 
			
		||||
//       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
 | 
			
		||||
//       Not following that rule will cause problems on some AMD drivers.
 | 
			
		||||
struct MaxwellUniformData {
 | 
			
		||||
    void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
 | 
			
		||||
 | 
			
		||||
    using ConstBuffer = std::array<GLvec4, 4>;
 | 
			
		||||
    alignas(16) std::array<ConstBuffer, Maxwell3D::Regs::MaxConstBuffers> const_buffers;
 | 
			
		||||
};
 | 
			
		||||
static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is incorrect");
 | 
			
		||||
static_assert(sizeof(MaxwellUniformData) < 16384,
 | 
			
		||||
              "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
 | 
			
		||||
 | 
			
		||||
class OGLShaderStage {
 | 
			
		||||
public:
 | 
			
		||||
    OGLShaderStage() = default;
 | 
			
		||||
 | 
			
		||||
    void Create(const char* source, GLenum type) {
 | 
			
		||||
        OGLShader shader;
 | 
			
		||||
        shader.Create(source, type);
 | 
			
		||||
        program.Create(true, shader.handle);
 | 
			
		||||
        Impl::SetShaderUniformBlockBindings(program.handle);
 | 
			
		||||
        Impl::SetShaderSamplerBindings(program.handle);
 | 
			
		||||
    }
 | 
			
		||||
    GLuint GetHandle() const {
 | 
			
		||||
        return program.handle;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    OGLProgram program;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// TODO(wwylele): beautify this doc
 | 
			
		||||
// This is a shader cache designed for translating PICA shader to GLSL shader.
 | 
			
		||||
// The double cache is needed because diffent KeyConfigType, which includes a hash of the code
 | 
			
		||||
// region (including its leftover unused code) can generate the same GLSL code.
 | 
			
		||||
template <typename KeyConfigType,
 | 
			
		||||
          std::string (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&), GLenum ShaderType>
 | 
			
		||||
class ShaderCache {
 | 
			
		||||
public:
 | 
			
		||||
    ShaderCache() = default;
 | 
			
		||||
 | 
			
		||||
    GLuint Get(const KeyConfigType& key, const ShaderSetup& setup) {
 | 
			
		||||
        auto map_it = shader_map.find(key);
 | 
			
		||||
        if (map_it == shader_map.end()) {
 | 
			
		||||
            std::string program = CodeGenerator(setup, key);
 | 
			
		||||
 | 
			
		||||
            auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{});
 | 
			
		||||
            OGLShaderStage& cached_shader = iter->second;
 | 
			
		||||
            if (new_shader) {
 | 
			
		||||
                cached_shader.Create(program.c_str(), ShaderType);
 | 
			
		||||
            }
 | 
			
		||||
            shader_map[key] = &cached_shader;
 | 
			
		||||
            return cached_shader.GetHandle();
 | 
			
		||||
        } else {
 | 
			
		||||
            return map_it->second->GetHandle();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map;
 | 
			
		||||
    std::unordered_map<std::string, OGLShaderStage> shader_cache;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>;
 | 
			
		||||
 | 
			
		||||
using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>;
 | 
			
		||||
 | 
			
		||||
class ProgramManager {
 | 
			
		||||
public:
 | 
			
		||||
    ProgramManager() {
 | 
			
		||||
        pipeline.Create();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void UseProgrammableVertexShader(const MaxwellVSConfig& config, const ShaderSetup setup) {
 | 
			
		||||
        current.vs = vertex_shaders.Get(config, setup);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void UseProgrammableFragmentShader(const MaxwellFSConfig& config, const ShaderSetup setup) {
 | 
			
		||||
        current.fs = fragment_shaders.Get(config, setup);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void UseTrivialGeometryShader() {
 | 
			
		||||
        current.gs = 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void ApplyTo(OpenGLState& state) {
 | 
			
		||||
        // Workaround for AMD bug
 | 
			
		||||
        glUseProgramStages(pipeline.handle,
 | 
			
		||||
                           GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
 | 
			
		||||
                           0);
 | 
			
		||||
 | 
			
		||||
        glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs);
 | 
			
		||||
        glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs);
 | 
			
		||||
        glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs);
 | 
			
		||||
        state.draw.shader_program = 0;
 | 
			
		||||
        state.draw.program_pipeline = pipeline.handle;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    struct ShaderTuple {
 | 
			
		||||
        GLuint vs = 0, gs = 0, fs = 0;
 | 
			
		||||
        bool operator==(const ShaderTuple& rhs) const {
 | 
			
		||||
            return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs);
 | 
			
		||||
        }
 | 
			
		||||
        struct Hash {
 | 
			
		||||
            std::size_t operator()(const ShaderTuple& tuple) const {
 | 
			
		||||
                std::size_t hash = 0;
 | 
			
		||||
                boost::hash_combine(hash, tuple.vs);
 | 
			
		||||
                boost::hash_combine(hash, tuple.gs);
 | 
			
		||||
                boost::hash_combine(hash, tuple.fs);
 | 
			
		||||
                return hash;
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
    };
 | 
			
		||||
    ShaderTuple current;
 | 
			
		||||
    VertexShaders vertex_shaders;
 | 
			
		||||
    FragmentShaders fragment_shaders;
 | 
			
		||||
 | 
			
		||||
    std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache;
 | 
			
		||||
    OGLPipeline pipeline;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace GLShader
 | 
			
		||||
@ -10,156 +10,41 @@
 | 
			
		||||
 | 
			
		||||
namespace GLShader {
 | 
			
		||||
 | 
			
		||||
GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
 | 
			
		||||
                   const char* fragment_shader, const std::vector<const char*>& feedback_vars,
 | 
			
		||||
                   bool separable_program) {
 | 
			
		||||
    // Create the shaders
 | 
			
		||||
    GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0;
 | 
			
		||||
    GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0;
 | 
			
		||||
    GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0;
 | 
			
		||||
GLuint LoadShader(const char* source, GLenum type) {
 | 
			
		||||
    const char* debug_type;
 | 
			
		||||
    switch (type) {
 | 
			
		||||
    case GL_VERTEX_SHADER:
 | 
			
		||||
        debug_type = "vertex";
 | 
			
		||||
        break;
 | 
			
		||||
    case GL_GEOMETRY_SHADER:
 | 
			
		||||
        debug_type = "geometry";
 | 
			
		||||
        break;
 | 
			
		||||
    case GL_FRAGMENT_SHADER:
 | 
			
		||||
        debug_type = "fragment";
 | 
			
		||||
        break;
 | 
			
		||||
    default:
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
    }
 | 
			
		||||
    GLuint shader_id = glCreateShader(type);
 | 
			
		||||
    glShaderSource(shader_id, 1, &source, nullptr);
 | 
			
		||||
    NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
 | 
			
		||||
    glCompileShader(shader_id);
 | 
			
		||||
 | 
			
		||||
    GLint result = GL_FALSE;
 | 
			
		||||
    int info_log_length;
 | 
			
		||||
 | 
			
		||||
    if (vertex_shader) {
 | 
			
		||||
        // Compile Vertex Shader
 | 
			
		||||
        LOG_DEBUG(Render_OpenGL, "Compiling vertex shader...");
 | 
			
		||||
 | 
			
		||||
        glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr);
 | 
			
		||||
        glCompileShader(vertex_shader_id);
 | 
			
		||||
 | 
			
		||||
        // Check Vertex Shader
 | 
			
		||||
        glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result);
 | 
			
		||||
        glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
 | 
			
		||||
 | 
			
		||||
        if (info_log_length > 1) {
 | 
			
		||||
            std::vector<char> vertex_shader_error(info_log_length);
 | 
			
		||||
            glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]);
 | 
			
		||||
            if (result == GL_TRUE) {
 | 
			
		||||
                LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]);
 | 
			
		||||
            } else {
 | 
			
		||||
                LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s",
 | 
			
		||||
                             &vertex_shader_error[0]);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (geometry_shader) {
 | 
			
		||||
        // Compile Geometry Shader
 | 
			
		||||
        LOG_DEBUG(Render_OpenGL, "Compiling geometry shader...");
 | 
			
		||||
 | 
			
		||||
        glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr);
 | 
			
		||||
        glCompileShader(geometry_shader_id);
 | 
			
		||||
 | 
			
		||||
        // Check Geometry Shader
 | 
			
		||||
        glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result);
 | 
			
		||||
        glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
 | 
			
		||||
 | 
			
		||||
        if (info_log_length > 1) {
 | 
			
		||||
            std::vector<char> geometry_shader_error(info_log_length);
 | 
			
		||||
            glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr,
 | 
			
		||||
                               &geometry_shader_error[0]);
 | 
			
		||||
            if (result == GL_TRUE) {
 | 
			
		||||
                LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]);
 | 
			
		||||
            } else {
 | 
			
		||||
                LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s",
 | 
			
		||||
                             &geometry_shader_error[0]);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (fragment_shader) {
 | 
			
		||||
        // Compile Fragment Shader
 | 
			
		||||
        LOG_DEBUG(Render_OpenGL, "Compiling fragment shader...");
 | 
			
		||||
 | 
			
		||||
        glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr);
 | 
			
		||||
        glCompileShader(fragment_shader_id);
 | 
			
		||||
 | 
			
		||||
        // Check Fragment Shader
 | 
			
		||||
        glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result);
 | 
			
		||||
        glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
 | 
			
		||||
 | 
			
		||||
        if (info_log_length > 1) {
 | 
			
		||||
            std::vector<char> fragment_shader_error(info_log_length);
 | 
			
		||||
            glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr,
 | 
			
		||||
                               &fragment_shader_error[0]);
 | 
			
		||||
            if (result == GL_TRUE) {
 | 
			
		||||
                LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]);
 | 
			
		||||
            } else {
 | 
			
		||||
                LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s",
 | 
			
		||||
                             &fragment_shader_error[0]);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Link the program
 | 
			
		||||
    LOG_DEBUG(Render_OpenGL, "Linking program...");
 | 
			
		||||
 | 
			
		||||
    GLuint program_id = glCreateProgram();
 | 
			
		||||
    if (vertex_shader) {
 | 
			
		||||
        glAttachShader(program_id, vertex_shader_id);
 | 
			
		||||
    }
 | 
			
		||||
    if (geometry_shader) {
 | 
			
		||||
        glAttachShader(program_id, geometry_shader_id);
 | 
			
		||||
    }
 | 
			
		||||
    if (fragment_shader) {
 | 
			
		||||
        glAttachShader(program_id, fragment_shader_id);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!feedback_vars.empty()) {
 | 
			
		||||
        auto varyings = feedback_vars;
 | 
			
		||||
        glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()),
 | 
			
		||||
                                    &varyings[0], GL_INTERLEAVED_ATTRIBS);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (separable_program) {
 | 
			
		||||
        glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    glLinkProgram(program_id);
 | 
			
		||||
 | 
			
		||||
    // Check the program
 | 
			
		||||
    glGetProgramiv(program_id, GL_LINK_STATUS, &result);
 | 
			
		||||
    glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
 | 
			
		||||
    GLint info_log_length;
 | 
			
		||||
    glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
 | 
			
		||||
    glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
 | 
			
		||||
 | 
			
		||||
    if (info_log_length > 1) {
 | 
			
		||||
        std::vector<char> program_error(info_log_length);
 | 
			
		||||
        glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
 | 
			
		||||
        std::string shader_error(info_log_length, ' ');
 | 
			
		||||
        glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
 | 
			
		||||
        if (result == GL_TRUE) {
 | 
			
		||||
            LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]);
 | 
			
		||||
            NGLOG_DEBUG(Render_OpenGL, "{}", shader_error);
 | 
			
		||||
        } else {
 | 
			
		||||
            LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]);
 | 
			
		||||
            NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // If the program linking failed at least one of the shaders was probably bad
 | 
			
		||||
    if (result == GL_FALSE) {
 | 
			
		||||
        if (vertex_shader) {
 | 
			
		||||
            LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader);
 | 
			
		||||
        }
 | 
			
		||||
        if (geometry_shader) {
 | 
			
		||||
            LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader);
 | 
			
		||||
        }
 | 
			
		||||
        if (fragment_shader) {
 | 
			
		||||
            LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    ASSERT_MSG(result == GL_TRUE, "Shader not linked");
 | 
			
		||||
 | 
			
		||||
    if (vertex_shader) {
 | 
			
		||||
        glDetachShader(program_id, vertex_shader_id);
 | 
			
		||||
        glDeleteShader(vertex_shader_id);
 | 
			
		||||
    }
 | 
			
		||||
    if (geometry_shader) {
 | 
			
		||||
        glDetachShader(program_id, geometry_shader_id);
 | 
			
		||||
        glDeleteShader(geometry_shader_id);
 | 
			
		||||
    }
 | 
			
		||||
    if (fragment_shader) {
 | 
			
		||||
        glDetachShader(program_id, fragment_shader_id);
 | 
			
		||||
        glDeleteShader(fragment_shader_id);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return program_id;
 | 
			
		||||
    return shader_id;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace GLShader
 | 
			
		||||
 | 
			
		||||
@ -6,18 +6,60 @@
 | 
			
		||||
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <glad/glad.h>
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "common/logging/log.h"
 | 
			
		||||
 | 
			
		||||
namespace GLShader {
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
 | 
			
		||||
 * @param vertex_shader String of the GLSL vertex shader program
 | 
			
		||||
 * @param geometry_shader String of the GLSL geometry shader program
 | 
			
		||||
 * @param fragment_shader String of the GLSL fragment shader program
 | 
			
		||||
 * @returns Handle of the newly created OpenGL shader object
 | 
			
		||||
 * Utility function to create and compile an OpenGL GLSL shader
 | 
			
		||||
 * @param source String of the GLSL shader program
 | 
			
		||||
 * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
 | 
			
		||||
 */
 | 
			
		||||
GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader,
 | 
			
		||||
                   const char* fragment_shader, const std::vector<const char*>& feedback_vars = {},
 | 
			
		||||
                   bool separable_program = false);
 | 
			
		||||
GLuint LoadShader(const char* source, GLenum type);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
 | 
			
		||||
 * @param separable_program whether to create a separable program
 | 
			
		||||
 * @param shaders ID of shaders to attach to the program
 | 
			
		||||
 * @returns Handle of the newly created OpenGL program object
 | 
			
		||||
 */
 | 
			
		||||
template <typename... T>
 | 
			
		||||
GLuint LoadProgram(bool separable_program, T... shaders) {
 | 
			
		||||
    // Link the program
 | 
			
		||||
    NGLOG_DEBUG(Render_OpenGL, "Linking program...");
 | 
			
		||||
 | 
			
		||||
    GLuint program_id = glCreateProgram();
 | 
			
		||||
 | 
			
		||||
    ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
 | 
			
		||||
 | 
			
		||||
    if (separable_program) {
 | 
			
		||||
        glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    glLinkProgram(program_id);
 | 
			
		||||
 | 
			
		||||
    // Check the program
 | 
			
		||||
    GLint result = GL_FALSE;
 | 
			
		||||
    GLint info_log_length;
 | 
			
		||||
    glGetProgramiv(program_id, GL_LINK_STATUS, &result);
 | 
			
		||||
    glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
 | 
			
		||||
 | 
			
		||||
    if (info_log_length > 1) {
 | 
			
		||||
        std::string program_error(info_log_length, ' ');
 | 
			
		||||
        glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
 | 
			
		||||
        if (result == GL_TRUE) {
 | 
			
		||||
            NGLOG_DEBUG(Render_OpenGL, "{}", program_error);
 | 
			
		||||
        } else {
 | 
			
		||||
            NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ASSERT_MSG(result == GL_TRUE, "Shader not linked");
 | 
			
		||||
 | 
			
		||||
    ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
 | 
			
		||||
 | 
			
		||||
    return program_id;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace GLShader
 | 
			
		||||
 | 
			
		||||
@ -10,6 +10,14 @@
 | 
			
		||||
#include "common/logging/log.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
 | 
			
		||||
using GLvec2 = std::array<GLfloat, 2>;
 | 
			
		||||
using GLvec3 = std::array<GLfloat, 3>;
 | 
			
		||||
using GLvec4 = std::array<GLfloat, 4>;
 | 
			
		||||
 | 
			
		||||
using GLuvec2 = std::array<GLuint, 2>;
 | 
			
		||||
using GLuvec3 = std::array<GLuint, 3>;
 | 
			
		||||
using GLuvec4 = std::array<GLuint, 4>;
 | 
			
		||||
 | 
			
		||||
namespace MaxwellToGL {
 | 
			
		||||
 | 
			
		||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 | 
			
		||||
@ -39,6 +47,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
 | 
			
		||||
 | 
			
		||||
inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
 | 
			
		||||
    switch (topology) {
 | 
			
		||||
    case Maxwell::PrimitiveTopology::Triangles:
 | 
			
		||||
        return GL_TRIANGLES;
 | 
			
		||||
    case Maxwell::PrimitiveTopology::TriangleStrip:
 | 
			
		||||
        return GL_TRIANGLE_STRIP;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -57,7 +57,7 @@ uniform sampler2D color_texture;
 | 
			
		||||
void main() {
 | 
			
		||||
    // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
 | 
			
		||||
    // support more framebuffer pixel formats.
 | 
			
		||||
    color = texture(color_texture, frag_tex_coord).abgr;
 | 
			
		||||
    color = texture(color_texture, frag_tex_coord);
 | 
			
		||||
}
 | 
			
		||||
)";
 | 
			
		||||
 | 
			
		||||
@ -210,7 +210,7 @@ void RendererOpenGL::InitOpenGLObjects() {
 | 
			
		||||
                 0.0f);
 | 
			
		||||
 | 
			
		||||
    // Link shaders and get variable locations
 | 
			
		||||
    shader.Create(vertex_shader, nullptr, fragment_shader);
 | 
			
		||||
    shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
 | 
			
		||||
    state.draw.shader_program = shader.handle;
 | 
			
		||||
    state.Apply();
 | 
			
		||||
    uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
 | 
			
		||||
@ -311,10 +311,10 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    std::array<ScreenRectVertex, 4> vertices = {{
 | 
			
		||||
        ScreenRectVertex(x, y, texcoords.top, right),
 | 
			
		||||
        ScreenRectVertex(x + w, y, texcoords.bottom, right),
 | 
			
		||||
        ScreenRectVertex(x, y + h, texcoords.top, left),
 | 
			
		||||
        ScreenRectVertex(x + w, y + h, texcoords.bottom, left),
 | 
			
		||||
        ScreenRectVertex(x, y, texcoords.top, left),
 | 
			
		||||
        ScreenRectVertex(x + w, y, texcoords.bottom, left),
 | 
			
		||||
        ScreenRectVertex(x, y + h, texcoords.top, right),
 | 
			
		||||
        ScreenRectVertex(x + w, y + h, texcoords.bottom, right),
 | 
			
		||||
    }};
 | 
			
		||||
 | 
			
		||||
    state.texture_units[0].texture_2d = screen_info.display_texture;
 | 
			
		||||
 | 
			
		||||
@ -72,7 +72,7 @@ private:
 | 
			
		||||
    // OpenGL object IDs
 | 
			
		||||
    OGLVertexArray vertex_array;
 | 
			
		||||
    OGLBuffer vertex_buffer;
 | 
			
		||||
    OGLShader shader;
 | 
			
		||||
    OGLProgram shader;
 | 
			
		||||
 | 
			
		||||
    /// Display information for Switch screen
 | 
			
		||||
    ScreenInfo screen_info;
 | 
			
		||||
 | 
			
		||||
@ -151,7 +151,7 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe
 | 
			
		||||
            const u32 coarse_y = y & ~127;
 | 
			
		||||
            u32 morton_offset =
 | 
			
		||||
                GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
 | 
			
		||||
            u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
 | 
			
		||||
            u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel;
 | 
			
		||||
 | 
			
		||||
            data_ptrs[morton_to_gl] = morton_data + morton_offset;
 | 
			
		||||
            data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user