gl_rasterizer_cache: Reintroduce code for handling swizzle and flush to guest RAM.
This commit is contained in:
		
							parent
							
								
									78f2a6a9e1
								
							
						
					
					
						commit
						9b929e934b
					
				@ -336,20 +336,22 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 d
 | 
			
		||||
    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
 | 
			
		||||
    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
 | 
			
		||||
 | 
			
		||||
    // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
 | 
			
		||||
    // pixel values.
 | 
			
		||||
    const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
 | 
			
		||||
 | 
			
		||||
    if (morton_to_gl) {
 | 
			
		||||
        // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
 | 
			
		||||
        // pixel values.
 | 
			
		||||
        const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
 | 
			
		||||
        const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
 | 
			
		||||
            addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth);
 | 
			
		||||
        const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
 | 
			
		||||
        memcpy(gl_buffer, data.data(), size_to_copy);
 | 
			
		||||
    } else {
 | 
			
		||||
        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
 | 
			
		||||
        // check the configuration for this and perform more generic un/swizzle
 | 
			
		||||
        LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
 | 
			
		||||
        VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
 | 
			
		||||
                                       Memory::GetPointer(addr), gl_buffer, morton_to_gl);
 | 
			
		||||
        std::vector<u8> data(height * stride * bytes_per_pixel);
 | 
			
		||||
        Tegra::Texture::CopySwizzledData(stride / tile_size, height / tile_size, depth,
 | 
			
		||||
                                         bytes_per_pixel, bytes_per_pixel, data.data(), gl_buffer,
 | 
			
		||||
                                         false, block_height, block_depth);
 | 
			
		||||
        const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
 | 
			
		||||
        memcpy(Memory::GetPointer(addr), data.data(), size_to_copy);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -430,17 +432,16 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,
 | 
			
		||||
        MortonCopy<false, PixelFormat::RGBA16UI>,
 | 
			
		||||
        MortonCopy<false, PixelFormat::R11FG11FB10F>,
 | 
			
		||||
        MortonCopy<false, PixelFormat::RGBA32UI>,
 | 
			
		||||
        // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/BC6H_UF16/BC6H_SF16/ASTC_2D_4X4
 | 
			
		||||
        // formats are not supported
 | 
			
		||||
        nullptr,
 | 
			
		||||
        nullptr,
 | 
			
		||||
        nullptr,
 | 
			
		||||
        nullptr,
 | 
			
		||||
        nullptr,
 | 
			
		||||
        nullptr,
 | 
			
		||||
        nullptr,
 | 
			
		||||
        nullptr,
 | 
			
		||||
        nullptr,
 | 
			
		||||
        MortonCopy<false, PixelFormat::DXT1>,
 | 
			
		||||
        MortonCopy<false, PixelFormat::DXT23>,
 | 
			
		||||
        MortonCopy<false, PixelFormat::DXT45>,
 | 
			
		||||
        MortonCopy<false, PixelFormat::DXN1>,
 | 
			
		||||
        MortonCopy<false, PixelFormat::DXN2UNORM>,
 | 
			
		||||
        MortonCopy<false, PixelFormat::DXN2SNORM>,
 | 
			
		||||
        MortonCopy<false, PixelFormat::BC7U>,
 | 
			
		||||
        MortonCopy<false, PixelFormat::BC6H_UF16>,
 | 
			
		||||
        MortonCopy<false, PixelFormat::BC6H_SF16>,
 | 
			
		||||
        // TODO(Subv): Swizzling ASTC formats are not supported
 | 
			
		||||
        nullptr,
 | 
			
		||||
        MortonCopy<false, PixelFormat::G8R8U>,
 | 
			
		||||
        MortonCopy<false, PixelFormat::G8R8S>,
 | 
			
		||||
@ -754,7 +755,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
 | 
			
		||||
                             SurfaceParams::SurfaceTargetName(params.target));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
 | 
			
		||||
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
 | 
			
		||||
    union S8Z24 {
 | 
			
		||||
        BitField<0, 24, u32> z24;
 | 
			
		||||
        BitField<24, 8, u32> s8;
 | 
			
		||||
@ -767,16 +768,23 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
 | 
			
		||||
    };
 | 
			
		||||
    static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
 | 
			
		||||
 | 
			
		||||
    S8Z24 input_pixel{};
 | 
			
		||||
    Z24S8 output_pixel{};
 | 
			
		||||
    S8Z24 s8z24_pixel{};
 | 
			
		||||
    Z24S8 z24s8_pixel{};
 | 
			
		||||
    constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
 | 
			
		||||
    for (std::size_t y = 0; y < height; ++y) {
 | 
			
		||||
        for (std::size_t x = 0; x < width; ++x) {
 | 
			
		||||
            const std::size_t offset{bpp * (y * width + x)};
 | 
			
		||||
            std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
 | 
			
		||||
            output_pixel.s8.Assign(input_pixel.s8);
 | 
			
		||||
            output_pixel.z24.Assign(input_pixel.z24);
 | 
			
		||||
            std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8));
 | 
			
		||||
            if (reverse) {
 | 
			
		||||
                std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
 | 
			
		||||
                s8z24_pixel.s8.Assign(z24s8_pixel.s8);
 | 
			
		||||
                s8z24_pixel.z24.Assign(z24s8_pixel.z24);
 | 
			
		||||
                std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
 | 
			
		||||
            } else {
 | 
			
		||||
                std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
 | 
			
		||||
                z24s8_pixel.s8.Assign(s8z24_pixel.s8);
 | 
			
		||||
                z24s8_pixel.z24.Assign(s8z24_pixel.z24);
 | 
			
		||||
                std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@ -814,7 +822,7 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
 | 
			
		||||
    }
 | 
			
		||||
    case PixelFormat::S8Z24:
 | 
			
		||||
        // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
 | 
			
		||||
        ConvertS8Z24ToZ24S8(data, width, height);
 | 
			
		||||
        ConvertS8Z24ToZ24S8(data, width, height, false);
 | 
			
		||||
        break;
 | 
			
		||||
 | 
			
		||||
    case PixelFormat::G8R8U:
 | 
			
		||||
@ -825,6 +833,30 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
 | 
			
		||||
 * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
 | 
			
		||||
 * with typical desktop GPUs.
 | 
			
		||||
 */
 | 
			
		||||
static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
 | 
			
		||||
                                                u32 width, u32 height) {
 | 
			
		||||
    switch (pixel_format) {
 | 
			
		||||
    case PixelFormat::G8R8U:
 | 
			
		||||
    case PixelFormat::G8R8S:
 | 
			
		||||
    case PixelFormat::ASTC_2D_4X4:
 | 
			
		||||
    case PixelFormat::ASTC_2D_8X8: {
 | 
			
		||||
        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
 | 
			
		||||
                     static_cast<u32>(pixel_format));
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case PixelFormat::S8Z24:
 | 
			
		||||
        // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
 | 
			
		||||
        ConvertS8Z24ToZ24S8(data, width, height, true);
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
 | 
			
		||||
void CachedSurface::LoadGLBuffer() {
 | 
			
		||||
    ASSERT(params.type != SurfaceType::Fill);
 | 
			
		||||
@ -864,11 +896,57 @@ void CachedSurface::LoadGLBuffer() {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height);
 | 
			
		||||
 | 
			
		||||
    dirty = false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
 | 
			
		||||
void CachedSurface::FlushGLBuffer() {
 | 
			
		||||
    ASSERT_MSG(false, "Unimplemented");
 | 
			
		||||
    MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
 | 
			
		||||
    const auto& rect{params.GetRect()};
 | 
			
		||||
    // Load data from memory to the surface
 | 
			
		||||
    const GLint x0 = static_cast<GLint>(rect.left);
 | 
			
		||||
    const GLint y0 = static_cast<GLint>(rect.bottom);
 | 
			
		||||
    const size_t buffer_offset =
 | 
			
		||||
        static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) *
 | 
			
		||||
        GetGLBytesPerPixel(params.pixel_format);
 | 
			
		||||
    const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format);
 | 
			
		||||
    const u32 copy_size = params.width * params.height * bytes_per_pixel;
 | 
			
		||||
    gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size);
 | 
			
		||||
    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
 | 
			
		||||
    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
 | 
			
		||||
    ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0);
 | 
			
		||||
    glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
 | 
			
		||||
    ASSERT(!tuple.compressed);
 | 
			
		||||
    ASSERT(x0 == 0 && y0 == 0);
 | 
			
		||||
    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
 | 
			
		||||
    glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, gl_buffer.size(),
 | 
			
		||||
                      gl_buffer.data());
 | 
			
		||||
    glPixelStorei(GL_PACK_ROW_LENGTH, 0);
 | 
			
		||||
    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
 | 
			
		||||
                                        params.height);
 | 
			
		||||
    ASSERT(params.type != SurfaceType::Fill);
 | 
			
		||||
    const u8* const texture_src_data = Memory::GetPointer(params.addr);
 | 
			
		||||
    ASSERT(texture_src_data);
 | 
			
		||||
    if (params.is_tiled) {
 | 
			
		||||
        u32 depth = params.depth;
 | 
			
		||||
        u32 block_depth = params.block_depth;
 | 
			
		||||
 | 
			
		||||
        ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
 | 
			
		||||
                   params.block_width, static_cast<u32>(params.target));
 | 
			
		||||
 | 
			
		||||
        if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
 | 
			
		||||
            // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
 | 
			
		||||
            depth = 1U;
 | 
			
		||||
            block_depth = 1U;
 | 
			
		||||
        }
 | 
			
		||||
        gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
 | 
			
		||||
            params.width, params.block_height, params.height, block_depth, depth,
 | 
			
		||||
            &gl_buffer[buffer_offset], copy_size, params.addr + buffer_offset);
 | 
			
		||||
    } else {
 | 
			
		||||
        Memory::WriteBlock(params.addr + buffer_offset, &gl_buffer[buffer_offset],
 | 
			
		||||
                           gl_buffer.size() - buffer_offset);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
 | 
			
		||||
 | 
			
		||||
@ -802,6 +802,18 @@ public:
 | 
			
		||||
        return params.size_in_bytes_total;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void Flush() {
 | 
			
		||||
        // There is no need to flush the surface if it hasn't been modified by us.
 | 
			
		||||
        if (!dirty)
 | 
			
		||||
            return;
 | 
			
		||||
        FlushGLBuffer();
 | 
			
		||||
        dirty = false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void MarkAsDirty() {
 | 
			
		||||
        dirty = true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const OGLTexture& Texture() const {
 | 
			
		||||
        return texture;
 | 
			
		||||
    }
 | 
			
		||||
@ -833,6 +845,7 @@ private:
 | 
			
		||||
    std::vector<u8> gl_buffer;
 | 
			
		||||
    SurfaceParams params;
 | 
			
		||||
    GLenum gl_target;
 | 
			
		||||
    bool dirty = false;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user