diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6a5d5764b3..1e12f4ac2b 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -32,6 +32,8 @@ add_library(video_core STATIC renderer_opengl/gl_shader_decompiler.h renderer_opengl/gl_shader_gen.cpp renderer_opengl/gl_shader_gen.h + renderer_opengl/gl_shader_manager.cpp + renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state.cpp diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e4c620d96a..d18b846825 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -178,6 +178,9 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); + shader_program_manager = + std::make_unique(GLAD_GL_ARB_separate_shader_objects); + glEnable(GL_BLEND); SyncEntireState(); @@ -486,6 +489,11 @@ void RasterizerOpenGL::DrawTriangles() { state.scissor.height = draw_rect.GetHeight(); state.Apply(); + shader_program_manager->UseTrivialVertexShader(); + shader_program_manager->UseTrivialGeometryShader(); + shader_program_manager->ApplyTo(state); + state.Apply(); + // Draw the vertex batch size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex))); for (size_t base_vertex = 0; base_vertex < vertex_batch.size(); base_vertex += max_vertices) { @@ -1253,95 +1261,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig( void RasterizerOpenGL::SetShader() { auto config = GLShader::PicaShaderConfig::BuildFromRegs(Pica::g_state.regs); - std::unique_ptr shader = std::make_unique(); - - // Find (or generate) the GLSL shader for the current TEV state - auto cached_shader = shader_cache.find(config); - if (cached_shader != shader_cache.end()) { - current_shader = cached_shader->second.get(); - - state.draw.shader_program = current_shader->shader.handle; - state.Apply(); - } else { - LOG_DEBUG(Render_OpenGL, "Creating new shader"); - - shader->shader.Create(GLShader::GenerateVertexShader().c_str(), - GLShader::GenerateFragmentShader(config).c_str()); - - state.draw.shader_program = shader->shader.handle; - state.Apply(); - - // Set the texture samplers to correspond to different texture units - GLint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]"); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::PicaTexture(0).id); - } - uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]"); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::PicaTexture(1).id); - } - uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::PicaTexture(2).id); - } - uniform_tex = glGetUniformLocation(shader->shader.handle, "tex_cube"); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::TextureCube.id); - } - - // Set the texture samplers to correspond to different lookup table texture units - GLint uniform_lut = glGetUniformLocation(shader->shader.handle, "lighting_lut"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, TextureUnits::LightingLUT.id); - } - - GLint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut"); - if (uniform_fog_lut != -1) { - glUniform1i(uniform_fog_lut, TextureUnits::FogLUT.id); - } - - GLint uniform_proctex_noise_lut = - glGetUniformLocation(shader->shader.handle, "proctex_noise_lut"); - if (uniform_proctex_noise_lut != -1) { - glUniform1i(uniform_proctex_noise_lut, TextureUnits::ProcTexNoiseLUT.id); - } - - GLint uniform_proctex_color_map = - glGetUniformLocation(shader->shader.handle, "proctex_color_map"); - if (uniform_proctex_color_map != -1) { - glUniform1i(uniform_proctex_color_map, TextureUnits::ProcTexColorMap.id); - } - - GLint uniform_proctex_alpha_map = - glGetUniformLocation(shader->shader.handle, "proctex_alpha_map"); - if (uniform_proctex_alpha_map != -1) { - glUniform1i(uniform_proctex_alpha_map, TextureUnits::ProcTexAlphaMap.id); - } - - GLint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut"); - if (uniform_proctex_lut != -1) { - glUniform1i(uniform_proctex_lut, TextureUnits::ProcTexLUT.id); - } - - GLint uniform_proctex_diff_lut = - glGetUniformLocation(shader->shader.handle, "proctex_diff_lut"); - if (uniform_proctex_diff_lut != -1) { - glUniform1i(uniform_proctex_diff_lut, TextureUnits::ProcTexDiffLUT.id); - } - - current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); - - GLuint block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); - if (block_index != GL_INVALID_INDEX) { - GLint block_size; - glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, - GL_UNIFORM_BLOCK_DATA_SIZE, &block_size); - ASSERT_MSG(block_size == sizeof(UniformData), - "Uniform block size did not match! Got {}, expected {}", - static_cast(block_size), sizeof(UniformData)); - glUniformBlockBinding(current_shader->shader.handle, block_index, 0); - } - } + shader_program_manager->UseFragmentShader(config); } void RasterizerOpenGL::SyncClipEnabled() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 999550a233..fd72e9f1ed 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -8,12 +8,10 @@ #include #include #include -#include #include #include #include "common/bit_field.h" #include "common/common_types.h" -#include "common/hash.h" #include "common/vector_math.h" #include "core/hw/gpu.h" #include "video_core/pica_state.h" @@ -25,13 +23,14 @@ #include "video_core/regs_texturing.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/shader/shader.h" struct ScreenInfo; +class ShaderProgramManager; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: @@ -52,12 +51,6 @@ public: bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override; - /// OpenGL shader generated for a given Pica register state - struct PicaShader { - /// OpenGL shader resource - OGLShader shader; - }; - private: struct SamplerInfo { using TextureConfig = Pica::TexturingRegs::TextureConfig; @@ -121,47 +114,6 @@ private: GLfloat view[3]; }; - struct LightSrc { - alignas(16) GLvec3 specular_0; - alignas(16) GLvec3 specular_1; - alignas(16) GLvec3 diffuse; - alignas(16) GLvec3 ambient; - alignas(16) GLvec3 position; - alignas(16) GLvec3 spot_direction; // negated - GLfloat dist_atten_bias; - GLfloat dist_atten_scale; - }; - - /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned - // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at - // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. - // Not following that rule will cause problems on some AMD drivers. - struct UniformData { - GLint framebuffer_scale; - GLint alphatest_ref; - GLfloat depth_scale; - GLfloat depth_offset; - GLint scissor_x1; - GLint scissor_y1; - GLint scissor_x2; - GLint scissor_y2; - alignas(16) GLvec3 fog_color; - alignas(8) GLvec2 proctex_noise_f; - alignas(8) GLvec2 proctex_noise_a; - alignas(8) GLvec2 proctex_noise_p; - alignas(16) GLvec3 lighting_global_ambient; - LightSrc light_src[8]; - alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages - alignas(16) GLvec4 tev_combiner_buffer_color; - alignas(16) GLvec4 clip_coef; - }; - - static_assert( - sizeof(UniformData) == 0x460, - "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 16384, - "UniformData structure must be less than 16kb as per the OpenGL spec"); - /// Syncs entire status to match PICA registers void SyncEntireState(); @@ -269,8 +221,6 @@ private: std::vector vertex_batch; - std::unordered_map> shader_cache; - const PicaShader* current_shader = nullptr; bool shader_dirty; struct { @@ -285,6 +235,8 @@ private: bool dirty; } uniform_block_data = {}; + std::unique_ptr shader_program_manager; + std::array texture_samplers; OGLVertexArray vertex_array; static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp new file mode 100644 index 0000000000..30bc2d49d8 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -0,0 +1,216 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include "video_core/renderer_opengl/gl_shader_manager.h" + +static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, + size_t expected_size) { + GLuint ub_index = glGetUniformBlockIndex(shader, name); + if (ub_index == GL_INVALID_INDEX) { + return; + } + GLint ub_size = 0; + glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); + ASSERT_MSG(ub_size == expected_size, "Uniform block size did not match! Got %d, expected %zu", + static_cast(ub_size), expected_size); + glUniformBlockBinding(shader, ub_index, static_cast(binding)); +} + +static void SetShaderUniformBlockBindings(GLuint shader) { + SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, + sizeof(UniformData)); +} + +static void SetShaderSamplerBinding(GLuint shader, const char* name, + TextureUnits::TextureUnit binding) { + GLint uniform_tex = glGetUniformLocation(shader, name); + if (uniform_tex != -1) { + glUniform1i(uniform_tex, binding.id); + } +} + +static void SetShaderSamplerBindings(GLuint shader) { + OpenGLState cur_state = OpenGLState::GetCurState(); + GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); + cur_state.Apply(); + + // Set the texture samplers to correspond to different texture units + SetShaderSamplerBinding(shader, "tex0", TextureUnits::PicaTexture(0)); + SetShaderSamplerBinding(shader, "tex1", TextureUnits::PicaTexture(1)); + SetShaderSamplerBinding(shader, "tex2", TextureUnits::PicaTexture(2)); + SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube); + + // Set the texture samplers to correspond to different lookup table texture units + SetShaderSamplerBinding(shader, "lighting_lut", TextureUnits::LightingLUT); + SetShaderSamplerBinding(shader, "fog_lut", TextureUnits::FogLUT); + SetShaderSamplerBinding(shader, "proctex_noise_lut", TextureUnits::ProcTexNoiseLUT); + SetShaderSamplerBinding(shader, "proctex_color_map", TextureUnits::ProcTexColorMap); + SetShaderSamplerBinding(shader, "proctex_alpha_map", TextureUnits::ProcTexAlphaMap); + SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT); + SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT); + + cur_state.draw.shader_program = old_program; + cur_state.Apply(); +} + +/** + * An object representing a shader program staging. It can be either a shader object or a program + * object, depending on whether separable program is used. + */ +class OGLShaderStage { +public: + explicit OGLShaderStage(bool separable) { + if (separable) { + shader_or_program = OGLProgram(); + } else { + shader_or_program = OGLShader(); + } + } + + void Create(const char* source, GLenum type) { + if (shader_or_program.which() == 0) { + boost::get(shader_or_program).Create(source, type); + } else { + OGLShader shader; + shader.Create(source, type); + OGLProgram& program = boost::get(shader_or_program); + program.Create(true, {shader.handle}); + SetShaderUniformBlockBindings(program.handle); + SetShaderSamplerBindings(program.handle); + } + } + + GLuint GetHandle() const { + if (shader_or_program.which() == 0) { + return boost::get(shader_or_program).handle; + } else { + return boost::get(shader_or_program).handle; + } + } + +private: + boost::variant shader_or_program; +}; + +class TrivialVertexShader { +public: + explicit TrivialVertexShader(bool separable) : program(separable) { + program.Create(GLShader::GenerateTrivialVertexShader(separable).c_str(), GL_VERTEX_SHADER); + } + GLuint Get() const { + return program.GetHandle(); + } + +private: + OGLShaderStage program; +}; + +template +class ShaderCache { +public: + explicit ShaderCache(bool separable) : separable(separable) {} + GLuint Get(const KeyConfigType& config) { + auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable}); + OGLShaderStage& cached_shader = iter->second; + if (new_shader) { + cached_shader.Create(CodeGenerator(config, separable).c_str(), ShaderType); + } + return cached_shader.GetHandle(); + } + +private: + bool separable; + std::unordered_map shaders; +}; + +using FragmentShaders = + ShaderCache; + +class ShaderProgramManager::Impl { +public: + explicit Impl(bool separable) + : separable(separable), trivial_vertex_shader(separable), fragment_shaders(separable) { + if (separable) + pipeline.Create(); + } + + struct ShaderTuple { + GLuint vs = 0; + GLuint gs = 0; + GLuint fs = 0; + + bool operator==(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); + } + + bool operator!=(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) != std::tie(rhs.vs, rhs.gs, rhs.fs); + } + + struct Hash { + std::size_t operator()(const ShaderTuple& tuple) const { + std::size_t hash = 0; + boost::hash_combine(hash, tuple.vs); + boost::hash_combine(hash, tuple.gs); + boost::hash_combine(hash, tuple.fs); + return hash; + } + }; + }; + + ShaderTuple current; + + TrivialVertexShader trivial_vertex_shader; + + FragmentShaders fragment_shaders; + + bool separable; + std::unordered_map program_cache; + OGLPipeline pipeline; +}; + +ShaderProgramManager::ShaderProgramManager(bool separable) + : impl(std::make_unique(separable)) {} + +ShaderProgramManager::~ShaderProgramManager() = default; + +void ShaderProgramManager::UseTrivialVertexShader() { + impl->current.vs = impl->trivial_vertex_shader.Get(); +} + +void ShaderProgramManager::UseTrivialGeometryShader() { + impl->current.gs = 0; +} + +void ShaderProgramManager::UseFragmentShader(const GLShader::PicaShaderConfig& config) { + impl->current.fs = impl->fragment_shaders.Get(config); +} + +void ShaderProgramManager::ApplyTo(OpenGLState& state) { + if (impl->separable) { + // Without this reseting, AMD sometimes freezes when one stage is changed but not for the + // others + glUseProgramStages(impl->pipeline.handle, + GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, + 0); + + glUseProgramStages(impl->pipeline.handle, GL_VERTEX_SHADER_BIT, impl->current.vs); + glUseProgramStages(impl->pipeline.handle, GL_GEOMETRY_SHADER_BIT, impl->current.gs); + glUseProgramStages(impl->pipeline.handle, GL_FRAGMENT_SHADER_BIT, impl->current.fs); + state.draw.shader_program = 0; + state.draw.program_pipeline = impl->pipeline.handle; + } else { + OGLProgram& cached_program = impl->program_cache[impl->current]; + if (cached_program.handle == 0) { + cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs}); + SetShaderUniformBlockBindings(cached_program.handle); + SetShaderSamplerBindings(cached_program.handle); + } + state.draw.shader_program = cached_program.handle; + } +} diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h new file mode 100644 index 0000000000..1e59b74aa5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -0,0 +1,73 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/pica_to_gl.h" + +enum class UniformBindings : GLuint { Common }; + +struct LightSrc { + alignas(16) GLvec3 specular_0; + alignas(16) GLvec3 specular_1; + alignas(16) GLvec3 diffuse; + alignas(16) GLvec3 ambient; + alignas(16) GLvec3 position; + alignas(16) GLvec3 spot_direction; // negated + GLfloat dist_atten_bias; + GLfloat dist_atten_scale; +}; + +/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned +// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at +// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +// Not following that rule will cause problems on some AMD drivers. +struct UniformData { + GLint framebuffer_scale; + GLint alphatest_ref; + GLfloat depth_scale; + GLfloat depth_offset; + GLint scissor_x1; + GLint scissor_y1; + GLint scissor_x2; + GLint scissor_y2; + alignas(16) GLvec3 fog_color; + alignas(8) GLvec2 proctex_noise_f; + alignas(8) GLvec2 proctex_noise_a; + alignas(8) GLvec2 proctex_noise_p; + alignas(16) GLvec3 lighting_global_ambient; + LightSrc light_src[8]; + alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages + alignas(16) GLvec4 tev_combiner_buffer_color; + alignas(16) GLvec4 clip_coef; +}; + +static_assert( + sizeof(UniformData) == 0x460, + "The size of the UniformData structure has changed, update the structure in the shader"); +static_assert(sizeof(UniformData) < 16384, + "UniformData structure must be less than 16kb as per the OpenGL spec"); + +/// A class that manage different shader stages and configures them with given config data. +class ShaderProgramManager { +public: + explicit ShaderProgramManager(bool separable); + ~ShaderProgramManager(); + + void UseTrivialVertexShader(); + + void UseTrivialGeometryShader(); + + void UseFragmentShader(const GLShader::PicaShaderConfig& config); + + void ApplyTo(OpenGLState& state); + +private: + class Impl; + std::unique_ptr impl; +};