dma_pushbuffer: Optimize to avoid loop and copy on Push.
This commit is contained in:
		
							parent
							
								
									c568f5cea7
								
							
						
					
					
						commit
						ac74b71d75
					
				@ -128,11 +128,9 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
 | 
				
			|||||||
    return 0;
 | 
					    return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void PushGPUEntries(const std::vector<Tegra::CommandListHeader>& entries) {
 | 
					static void PushGPUEntries(Tegra::CommandList&& entries) {
 | 
				
			||||||
    auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
 | 
					    auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
 | 
				
			||||||
    for (const auto& entry : entries) {
 | 
					    dma_pusher.Push(std::move(entries));
 | 
				
			||||||
        dma_pusher.Push(entry);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    dma_pusher.DispatchCalls();
 | 
					    dma_pusher.DispatchCalls();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -149,11 +147,11 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
 | 
				
			|||||||
                                   params.num_entries * sizeof(Tegra::CommandListHeader),
 | 
					                                   params.num_entries * sizeof(Tegra::CommandListHeader),
 | 
				
			||||||
               "Incorrect input size");
 | 
					               "Incorrect input size");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::vector<Tegra::CommandListHeader> entries(params.num_entries);
 | 
					    Tegra::CommandList entries(params.num_entries);
 | 
				
			||||||
    std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
 | 
					    std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
 | 
				
			||||||
                params.num_entries * sizeof(Tegra::CommandListHeader));
 | 
					                params.num_entries * sizeof(Tegra::CommandListHeader));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    PushGPUEntries(entries);
 | 
					    PushGPUEntries(std::move(entries));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    params.fence_out.id = 0;
 | 
					    params.fence_out.id = 0;
 | 
				
			||||||
    params.fence_out.value = 0;
 | 
					    params.fence_out.value = 0;
 | 
				
			||||||
@ -170,11 +168,11 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
 | 
				
			|||||||
    LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
 | 
					    LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
 | 
				
			||||||
                params.address, params.num_entries, params.flags);
 | 
					                params.address, params.num_entries, params.flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::vector<Tegra::CommandListHeader> entries(params.num_entries);
 | 
					    Tegra::CommandList entries(params.num_entries);
 | 
				
			||||||
    Memory::ReadBlock(params.address, entries.data(),
 | 
					    Memory::ReadBlock(params.address, entries.data(),
 | 
				
			||||||
                      params.num_entries * sizeof(Tegra::CommandListHeader));
 | 
					                      params.num_entries * sizeof(Tegra::CommandListHeader));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    PushGPUEntries(entries);
 | 
					    PushGPUEntries(std::move(entries));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    params.fence_out.id = 0;
 | 
					    params.fence_out.id = 0;
 | 
				
			||||||
    params.fence_out.value = 0;
 | 
					    params.fence_out.value = 0;
 | 
				
			||||||
 | 
				
			|||||||
@ -23,6 +23,8 @@ void DmaPusher::DispatchCalls() {
 | 
				
			|||||||
    // On entering GPU code, assume all memory may be touched by the ARM core.
 | 
					    // On entering GPU code, assume all memory may be touched by the ARM core.
 | 
				
			||||||
    gpu.Maxwell3D().dirty_flags.OnMemoryWrite();
 | 
					    gpu.Maxwell3D().dirty_flags.OnMemoryWrite();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dma_pushbuffer_subindex = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    while (Core::System::GetInstance().IsPoweredOn()) {
 | 
					    while (Core::System::GetInstance().IsPoweredOn()) {
 | 
				
			||||||
        if (!Step()) {
 | 
					        if (!Step()) {
 | 
				
			||||||
            break;
 | 
					            break;
 | 
				
			||||||
@ -89,11 +91,17 @@ bool DmaPusher::Step() {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    } else if (ib_enable && !dma_pushbuffer.empty()) {
 | 
					    } else if (ib_enable && !dma_pushbuffer.empty()) {
 | 
				
			||||||
        // Current pushbuffer empty, but we have more IB entries to read
 | 
					        // Current pushbuffer empty, but we have more IB entries to read
 | 
				
			||||||
        const CommandListHeader& command_list_header{dma_pushbuffer.front()};
 | 
					        const CommandList& command_list{dma_pushbuffer.front()};
 | 
				
			||||||
 | 
					        const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
 | 
				
			||||||
        dma_get = command_list_header.addr;
 | 
					        dma_get = command_list_header.addr;
 | 
				
			||||||
        dma_put = dma_get + command_list_header.size * sizeof(u32);
 | 
					        dma_put = dma_get + command_list_header.size * sizeof(u32);
 | 
				
			||||||
        non_main = command_list_header.is_non_main;
 | 
					        non_main = command_list_header.is_non_main;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (dma_pushbuffer_subindex >= command_list.size()) {
 | 
				
			||||||
 | 
					            // We've gone through the current list, remove it from the queue
 | 
				
			||||||
            dma_pushbuffer.pop();
 | 
					            dma_pushbuffer.pop();
 | 
				
			||||||
 | 
					            dma_pushbuffer_subindex = 0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
 | 
					        // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
 | 
				
			||||||
        return {};
 | 
					        return {};
 | 
				
			||||||
 | 
				
			|||||||
@ -4,6 +4,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#pragma once
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <vector>
 | 
				
			||||||
#include <queue>
 | 
					#include <queue>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "common/bit_field.h"
 | 
					#include "common/bit_field.h"
 | 
				
			||||||
@ -45,6 +46,8 @@ static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class GPU;
 | 
					class GPU;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using CommandList = std::vector<Tegra::CommandListHeader>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
 | 
					 * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
 | 
				
			||||||
 * emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled
 | 
					 * emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled
 | 
				
			||||||
@ -57,8 +60,8 @@ public:
 | 
				
			|||||||
    explicit DmaPusher(GPU& gpu);
 | 
					    explicit DmaPusher(GPU& gpu);
 | 
				
			||||||
    ~DmaPusher();
 | 
					    ~DmaPusher();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void Push(const CommandListHeader& command_list_header) {
 | 
					    void Push(CommandList&& entries) {
 | 
				
			||||||
        dma_pushbuffer.push(command_list_header);
 | 
					        dma_pushbuffer.push(std::move(entries));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void DispatchCalls();
 | 
					    void DispatchCalls();
 | 
				
			||||||
@ -72,7 +75,8 @@ private:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    GPU& gpu;
 | 
					    GPU& gpu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::queue<CommandListHeader> dma_pushbuffer;
 | 
					    std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
 | 
				
			||||||
 | 
					    std::size_t dma_pushbuffer_subindex{};  ///< Index within a command list within the pushbuffer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    struct DmaState {
 | 
					    struct DmaState {
 | 
				
			||||||
        u32 method;            ///< Current method
 | 
					        u32 method;            ///< Current method
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user