From a5aa5e2b2d75ffdee2d3bee24baab2b4befbe4de Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 9 Dec 2017 20:16:07 -0800 Subject: [PATCH 1/3] Common: Simplify and optimize BitSet iterator Instead of doing complex machinations to keep track of the current bit index, just unset the lowest bit on each iteration, greatly simplifying the code. --- src/common/bit_set.h | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/src/common/bit_set.h b/src/common/bit_set.h index 9c2e6b28c0..749de4df01 100644 --- a/src/common/bit_set.h +++ b/src/common/bit_set.h @@ -120,20 +120,15 @@ public: // A STL-like iterator is required to be able to use range-based for loops. class Iterator { public: - Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {} - Iterator(IntTy val) : m_val(val), m_bit(0) {} - Iterator& operator=(Iterator other) { - new (this) Iterator(other); - return *this; - } + Iterator(const Iterator& other) : m_val(other.m_val) {} + Iterator(IntTy val) : m_val(val) {} int operator*() { - return m_bit + ComputeLsb(); + // This will never be called when m_val == 0, because that would be the end() iterator + return LeastSignificantSetBit(m_val); } Iterator& operator++() { - int lsb = ComputeLsb(); - m_val >>= lsb + 1; - m_bit += lsb + 1; - m_has_lsb = false; + // Unset least significant set bit + m_val &= m_val - IntTy(1); return *this; } Iterator operator++(int _) { @@ -149,17 +144,7 @@ public: } private: - int ComputeLsb() { - if (!m_has_lsb) { - m_lsb = LeastSignificantSetBit(m_val); - m_has_lsb = true; - } - return m_lsb; - } IntTy m_val; - int m_bit; - int m_lsb = -1; - bool m_has_lsb = false; }; BitSet() : m_val(0) {} From 0184419814981fed918037c136b473d006c84d6e Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 9 Dec 2017 20:23:27 -0800 Subject: [PATCH 2/3] Shader: Refactor output_mask copy loop to function --- src/video_core/shader/shader.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 2857d28297..f8b7460e04 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -77,13 +77,18 @@ void UnitState::LoadInput(const ShaderRegs& config, const AttributeBuffer& input } } -void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) { - unsigned int output_i = 0; - for (unsigned int reg : Common::BitSet(config.output_mask)) { - output.attr[output_i++] = registers.output[reg]; +static void CopyRegistersToOutput(const Math::Vec4* regs, u32 mask, + AttributeBuffer& buffer) { + int output_i = 0; + for (int reg : Common::BitSet(mask)) { + buffer.attr[output_i++] = regs[reg]; } } +void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) { + CopyRegistersToOutput(registers.output, config.output_mask, output); +} + UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {} GSEmitter::GSEmitter() { @@ -102,10 +107,7 @@ void GSEmitter::Emit(Math::Vec4 (&vertex)[16]) { handlers->winding_setter(); for (size_t i = 0; i < buffer.size(); ++i) { AttributeBuffer output; - unsigned int output_i = 0; - for (unsigned int reg : Common::BitSet(output_mask)) { - output.attr[output_i++] = buffer[i][reg]; - } + CopyRegistersToOutput(buffer[i].data(), output_mask, output); handlers->vertex_handler(output); } } From 230a7557f11360e2ac61851b9a7c39422dd2e2fb Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 9 Dec 2017 20:30:14 -0800 Subject: [PATCH 3/3] Shader: Store AttributeBuffers in GS output buffer This also does the output masking early at EMIT time, instead of when a triangle is sent to the vertex handler. --- src/video_core/shader/shader.cpp | 10 +++++----- src/video_core/shader/shader.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index f8b7460e04..2d0ffe8216 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -99,16 +99,16 @@ GSEmitter::~GSEmitter() { delete handlers; } -void GSEmitter::Emit(Math::Vec4 (&vertex)[16]) { +void GSEmitter::Emit(Math::Vec4 (&output_regs)[16]) { ASSERT(vertex_id < 3); - std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin()); + // TODO: This should be merged with UnitState::WriteOutput somehow + CopyRegistersToOutput(output_regs, output_mask, buffer[vertex_id]); + if (prim_emit) { if (winding) handlers->winding_setter(); for (size_t i = 0; i < buffer.size(); ++i) { - AttributeBuffer output; - CopyRegistersToOutput(buffer[i].data(), output_mask, output); - handlers->vertex_handler(output); + handlers->vertex_handler(buffer[i]); } } } diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index a3789da012..8740a16189 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -72,7 +72,7 @@ static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has inva * This structure contains state information for primitive emitting in geometry shader. */ struct GSEmitter { - std::array, 16>, 3> buffer; + std::array buffer; u8 vertex_id; bool prim_emit; bool winding; @@ -87,7 +87,7 @@ struct GSEmitter { GSEmitter(); ~GSEmitter(); - void Emit(Math::Vec4 (&vertex)[16]); + void Emit(Math::Vec4 (&output_regs)[16]); }; static_assert(std::is_standard_layout::value, "GSEmitter is not standard layout type");