citra/src/video_core/gpu.cpp
James Rowe 282adfc70b Frontend/GPU: Refactor context management
Changes the GraphicsContext to be managed by the GPU core. This
eliminates the need for the frontends to fool around with tricky
MakeCurrent/DoneCurrent calls that are dependent on the settings (such
as async gpu option).

This also refactors out the need to use QWidget::fromWindowContainer as
that caused issues with focus and input handling. Now we use a regular
QWidget and just access the native windowHandle() directly.

Another change is removing the debug tool setting in FrameMailbox.
Instead of trying to block the frontend until a new frame is ready, the
core will now take over presentation and draw directly to the window if
the renderer detects that its hooked by NSight or RenderDoc

Lastly, since it was in the way, I removed ScopeAcquireWindowContext and
replaced it with a simple subclass in GraphicsContext that achieves the
same result
2020-03-24 21:03:42 -06:00

341 lines
11 KiB
C++

// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
namespace Tegra {
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async)
: system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
auto& rasterizer{renderer->Rasterizer()};
memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
}
GPU::~GPU() = default;
Engines::Maxwell3D& GPU::Maxwell3D() {
return *maxwell_3d;
}
const Engines::Maxwell3D& GPU::Maxwell3D() const {
return *maxwell_3d;
}
Engines::KeplerCompute& GPU::KeplerCompute() {
return *kepler_compute;
}
const Engines::KeplerCompute& GPU::KeplerCompute() const {
return *kepler_compute;
}
MemoryManager& GPU::MemoryManager() {
return *memory_manager;
}
const MemoryManager& GPU::MemoryManager() const {
return *memory_manager;
}
DmaPusher& GPU::DmaPusher() {
return *dma_pusher;
}
const DmaPusher& GPU::DmaPusher() const {
return *dma_pusher;
}
void GPU::WaitFence(u32 syncpoint_id, u32 value) {
// Synced GPU, is always in sync
if (!is_async) {
return;
}
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock lock{sync_mutex};
sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
syncpoints[syncpoint_id]++;
std::lock_guard lock{sync_mutex};
sync_cv.notify_all();
if (!syncpt_interrupts[syncpoint_id].empty()) {
u32 value = syncpoints[syncpoint_id].load();
auto it = syncpt_interrupts[syncpoint_id].begin();
while (it != syncpt_interrupts[syncpoint_id].end()) {
if (value >= *it) {
TriggerCpuInterrupt(syncpoint_id, *it);
it = syncpt_interrupts[syncpoint_id].erase(it);
continue;
}
it++;
}
}
}
u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
return syncpoints[syncpoint_id].load();
}
void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
auto& interrupt = syncpt_interrupts[syncpoint_id];
bool contains = std::any_of(interrupt.begin(), interrupt.end(),
[value](u32 in_value) { return in_value == value; });
if (contains) {
return;
}
syncpt_interrupts[syncpoint_id].emplace_back(value);
}
bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
std::lock_guard lock{sync_mutex};
auto& interrupt = syncpt_interrupts[syncpoint_id];
const auto iter =
std::find_if(interrupt.begin(), interrupt.end(),
[value](u32 interrupt_value) { return value == interrupt_value; });
if (iter == interrupt.end()) {
return false;
}
interrupt.erase(iter);
return true;
}
u64 GPU::GetTicks() const {
// This values were reversed engineered by fincs from NVN
// The gpu clock is reported in units of 385/625 nanoseconds
constexpr u64 gpu_ticks_num = 384;
constexpr u64 gpu_ticks_den = 625;
const u64 cpu_ticks = system.CoreTiming().GetTicks();
const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
}
void GPU::FlushCommands() {
renderer->Rasterizer().FlushCommands();
}
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
// So the values you see in docs might be multiplied by 4.
enum class BufferMethods {
BindObject = 0x0,
Nop = 0x2,
SemaphoreAddressHigh = 0x4,
SemaphoreAddressLow = 0x5,
SemaphoreSequence = 0x6,
SemaphoreTrigger = 0x7,
NotifyIntr = 0x8,
WrcacheFlush = 0x9,
Unk28 = 0xA,
UnkCacheFlush = 0xB,
RefCnt = 0x14,
SemaphoreAcquire = 0x1A,
SemaphoreRelease = 0x1B,
FenceValue = 0x1C,
FenceAction = 0x1D,
Unk78 = 0x1E,
Unk7c = 0x1F,
Yield = 0x20,
NonPullerMethods = 0x40,
};
enum class GpuSemaphoreOperation {
AcquireEqual = 0x1,
WriteLong = 0x2,
AcquireGequal = 0x4,
AcquireMask = 0x8,
};
void GPU::CallMethod(const MethodCall& method_call) {
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
method_call.subchannel);
ASSERT(method_call.subchannel < bound_engines.size());
if (ExecuteMethodOnEngine(method_call)) {
CallEngineMethod(method_call);
} else {
CallPullerMethod(method_call);
}
}
bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) {
const auto method = static_cast<BufferMethods>(method_call.method);
return method >= BufferMethods::NonPullerMethods;
}
void GPU::CallPullerMethod(const MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument;
const auto method = static_cast<BufferMethods>(method_call.method);
switch (method) {
case BufferMethods::BindObject: {
ProcessBindMethod(method_call);
break;
}
case BufferMethods::Nop:
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequence:
case BufferMethods::RefCnt:
case BufferMethods::UnkCacheFlush:
case BufferMethods::WrcacheFlush:
case BufferMethods::FenceValue:
case BufferMethods::FenceAction:
break;
case BufferMethods::SemaphoreTrigger: {
ProcessSemaphoreTriggerMethod();
break;
}
case BufferMethods::NotifyIntr: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
break;
}
case BufferMethods::Unk28: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
break;
}
case BufferMethods::SemaphoreAcquire: {
ProcessSemaphoreAcquire();
break;
}
case BufferMethods::SemaphoreRelease: {
ProcessSemaphoreRelease();
break;
}
case BufferMethods::Yield: {
// TODO(Kmather73): Research and implement this method.
LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
break;
}
default:
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented",
static_cast<u32>(method));
break;
}
}
void GPU::CallEngineMethod(const MethodCall& method_call) {
const EngineID engine = bound_engines[method_call.subchannel];
switch (engine) {
case EngineID::FERMI_TWOD_A:
fermi_2d->CallMethod(method_call);
break;
case EngineID::MAXWELL_B:
maxwell_3d->CallMethod(method_call);
break;
case EngineID::KEPLER_COMPUTE_B:
kepler_compute->CallMethod(method_call);
break;
case EngineID::MAXWELL_DMA_COPY_A:
maxwell_dma->CallMethod(method_call);
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
kepler_memory->CallMethod(method_call);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine");
}
}
void GPU::ProcessBindMethod(const MethodCall& method_call) {
// Bind the current subchannel to the desired engine id.
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
method_call.argument);
bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
}
void GPU::ProcessSemaphoreTriggerMethod() {
const auto semaphoreOperationMask = 0xF;
const auto op =
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
if (op == GpuSemaphoreOperation::WriteLong) {
struct Block {
u32 sequence;
u32 zeros = 0;
u64 timestamp;
};
Block block{};
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
block.timestamp = GetTicks();
memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
sizeof(block));
} else {
const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
(op == GpuSemaphoreOperation::AcquireGequal &&
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
(op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
// Nothing to do in this case
} else {
regs.acquire_source = true;
regs.acquire_value = regs.semaphore_sequence;
if (op == GpuSemaphoreOperation::AcquireEqual) {
regs.acquire_active = true;
regs.acquire_mode = false;
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
regs.acquire_active = true;
regs.acquire_mode = true;
} else if (op == GpuSemaphoreOperation::AcquireMask) {
// TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
// semaphore_sequence, gives a non-0 result
LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
} else {
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
}
}
}
}
void GPU::ProcessSemaphoreRelease() {
memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
}
void GPU::ProcessSemaphoreAcquire() {
const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
const auto value = regs.semaphore_acquire;
if (word != value) {
regs.acquire_active = true;
regs.acquire_value = value;
// TODO(kemathe73) figure out how to do the acquire_timeout
regs.acquire_mode = false;
regs.acquire_source = false;
}
}
} // namespace Tegra