gl_rasterizer: Implement a more accurate fermi 2D copy.
- This is a blit, use the blit registers.
This commit is contained in:
		
							parent
							
								
									ca482997fe
								
							
						
					
					
						commit
						dd1aab5446
					
				| @ -21,7 +21,9 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { | ||||
|     regs.reg_array[method_call.method] = method_call.argument; | ||||
| 
 | ||||
|     switch (method_call.method) { | ||||
|     case FERMI2D_REG_INDEX(trigger): { | ||||
|     // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
 | ||||
|     // so trigger on the second 32-bit write.
 | ||||
|     case FERMI2D_REG_INDEX(blit_src_y) + 1: { | ||||
|         HandleSurfaceCopy(); | ||||
|         break; | ||||
|     } | ||||
| @ -32,57 +34,23 @@ void Fermi2D::HandleSurfaceCopy() { | ||||
|     LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", | ||||
|                 static_cast<u32>(regs.operation)); | ||||
| 
 | ||||
|     const GPUVAddr source = regs.src.Address(); | ||||
|     const GPUVAddr dest = regs.dst.Address(); | ||||
| 
 | ||||
|     // TODO(Subv): Only same-format and same-size copies are allowed for now.
 | ||||
|     ASSERT(regs.src.format == regs.dst.format); | ||||
|     ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height); | ||||
| 
 | ||||
|     // TODO(Subv): Only raw copies are implemented.
 | ||||
|     ASSERT(regs.operation == Regs::Operation::SrcCopy); | ||||
| 
 | ||||
|     const auto source_cpu = memory_manager.GpuToCpuAddress(source); | ||||
|     const auto dest_cpu = memory_manager.GpuToCpuAddress(dest); | ||||
|     ASSERT_MSG(source_cpu, "Invalid source GPU address"); | ||||
|     ASSERT_MSG(dest_cpu, "Invalid destination GPU address"); | ||||
|     const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; | ||||
|     const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; | ||||
|     const u32 src_blit_x2{ | ||||
|         static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)}; | ||||
|     const u32 src_blit_y2{ | ||||
|         static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; | ||||
| 
 | ||||
|     u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); | ||||
|     u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); | ||||
|     const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; | ||||
|     const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, | ||||
|                                             regs.blit_dst_x + regs.blit_dst_width, | ||||
|                                             regs.blit_dst_y + regs.blit_dst_height}; | ||||
| 
 | ||||
|     if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { | ||||
|         // All copies here update the main memory, so mark all rasterizer states as invalid.
 | ||||
|         Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||||
| 
 | ||||
|         rasterizer.FlushRegion(*source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); | ||||
|         // We have to invalidate the destination region to evict any outdated surfaces from the
 | ||||
|         // cache. We do this before actually writing the new data because the destination address
 | ||||
|         // might contain a dirty surface that will have to be written back to memory.
 | ||||
|         rasterizer.InvalidateRegion(*dest_cpu, | ||||
|                                     dst_bytes_per_pixel * regs.dst.width * regs.dst.height); | ||||
| 
 | ||||
|         if (regs.src.linear == regs.dst.linear) { | ||||
|             // If the input layout and the output layout are the same, just perform a raw copy.
 | ||||
|             ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight()); | ||||
|             Memory::CopyBlock(*dest_cpu, *source_cpu, | ||||
|                               src_bytes_per_pixel * regs.dst.width * regs.dst.height); | ||||
|             return; | ||||
|         } | ||||
|         u8* src_buffer = Memory::GetPointer(*source_cpu); | ||||
|         u8* dst_buffer = Memory::GetPointer(*dest_cpu); | ||||
|         if (!regs.src.linear && regs.dst.linear) { | ||||
|             // If the input is tiled and the output is linear, deswizzle the input and copy it over.
 | ||||
|             Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, | ||||
|                                       src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer, | ||||
|                                       dst_buffer, true, regs.src.BlockHeight(), | ||||
|                                       regs.src.BlockDepth(), 0); | ||||
|         } else { | ||||
|             // If the input is linear and the output is tiled, swizzle the input and copy it over.
 | ||||
|             Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, | ||||
|                                       src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer, | ||||
|                                       src_buffer, false, regs.dst.BlockHeight(), | ||||
|                                       regs.dst.BlockDepth(), 0); | ||||
|         } | ||||
|     if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { | ||||
|         UNIMPLEMENTED(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -94,12 +94,22 @@ public: | ||||
| 
 | ||||
|                 Operation operation; | ||||
| 
 | ||||
|                 INSERT_PADDING_WORDS(0x9); | ||||
|                 INSERT_PADDING_WORDS(0x177); | ||||
| 
 | ||||
|                 // TODO(Subv): This is only a guess.
 | ||||
|                 u32 trigger; | ||||
|                 u32 blit_control; | ||||
| 
 | ||||
|                 INSERT_PADDING_WORDS(0x1A3); | ||||
|                 INSERT_PADDING_WORDS(0x8); | ||||
| 
 | ||||
|                 u32 blit_dst_x; | ||||
|                 u32 blit_dst_y; | ||||
|                 u32 blit_dst_width; | ||||
|                 u32 blit_dst_height; | ||||
|                 u64 blit_du_dx; | ||||
|                 u64 blit_dv_dy; | ||||
|                 u64 blit_src_x; | ||||
|                 u64 blit_src_y; | ||||
| 
 | ||||
|                 INSERT_PADDING_WORDS(0x21); | ||||
|             }; | ||||
|             std::array<u32, NUM_REGS> reg_array; | ||||
|         }; | ||||
| @ -122,7 +132,16 @@ private: | ||||
| ASSERT_REG_POSITION(dst, 0x80); | ||||
| ASSERT_REG_POSITION(src, 0x8C); | ||||
| ASSERT_REG_POSITION(operation, 0xAB); | ||||
| ASSERT_REG_POSITION(trigger, 0xB5); | ||||
| ASSERT_REG_POSITION(blit_control, 0x223); | ||||
| ASSERT_REG_POSITION(blit_dst_x, 0x22c); | ||||
| ASSERT_REG_POSITION(blit_dst_y, 0x22d); | ||||
| ASSERT_REG_POSITION(blit_dst_width, 0x22e); | ||||
| ASSERT_REG_POSITION(blit_dst_height, 0x22f); | ||||
| ASSERT_REG_POSITION(blit_du_dx, 0x230); | ||||
| ASSERT_REG_POSITION(blit_dv_dy, 0x232); | ||||
| ASSERT_REG_POSITION(blit_src_x, 0x234); | ||||
| ASSERT_REG_POSITION(blit_src_y, 0x236); | ||||
| 
 | ||||
| #undef ASSERT_REG_POSITION | ||||
| 
 | ||||
| } // namespace Tegra::Engines
 | ||||
|  | ||||
| @ -46,7 +46,9 @@ public: | ||||
| 
 | ||||
|     /// Attempt to use a faster method to perform a surface copy
 | ||||
|     virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||||
|                                        const Tegra::Engines::Fermi2D::Regs::Surface& dst) { | ||||
|                                        const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||||
|                                        const MathUtil::Rectangle<u32>& src_rect, | ||||
|                                        const MathUtil::Rectangle<u32>& dst_rect) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|  | ||||
| @ -778,15 +778,11 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
| } | ||||
| 
 | ||||
| bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||||
|                                              const Tegra::Engines::Fermi2D::Regs::Surface& dst) { | ||||
|                                              const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||||
|                                              const MathUtil::Rectangle<u32>& src_rect, | ||||
|                                              const MathUtil::Rectangle<u32>& dst_rect) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Blits); | ||||
| 
 | ||||
|     if (Settings::values.use_accurate_gpu_emulation) { | ||||
|         // Skip the accelerated copy and perform a slow but more accurate copy
 | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     res_cache.FermiCopySurface(src, dst); | ||||
|     res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -61,7 +61,9 @@ public: | ||||
|     void InvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||||
|     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||||
|                                const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; | ||||
|                                const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||||
|                                const MathUtil::Rectangle<u32>& src_rect, | ||||
|                                const MathUtil::Rectangle<u32>& dst_rect) override; | ||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||||
|                            u32 pixel_stride) override; | ||||
|     bool AccelerateDrawBatch(bool is_indexed) override; | ||||
|  | ||||
| @ -1021,24 +1021,155 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, | ||||
|                         const MathUtil::Rectangle<u32>& src_rect, | ||||
|                         const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle, | ||||
|                         GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, | ||||
|                         std::size_t cubemap_face = 0) { | ||||
| 
 | ||||
|     const auto& src_params{src_surface->GetSurfaceParams()}; | ||||
|     const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||||
| 
 | ||||
|     OpenGLState prev_state{OpenGLState::GetCurState()}; | ||||
|     SCOPE_EXIT({ prev_state.Apply(); }); | ||||
| 
 | ||||
|     OpenGLState state; | ||||
|     state.draw.read_framebuffer = read_fb_handle; | ||||
|     state.draw.draw_framebuffer = draw_fb_handle; | ||||
|     state.Apply(); | ||||
| 
 | ||||
|     u32 buffers{}; | ||||
| 
 | ||||
|     if (src_params.type == SurfaceType::ColorTexture) { | ||||
|         switch (src_params.target) { | ||||
|         case SurfaceTarget::Texture2D: | ||||
|             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||||
|                                    GL_TEXTURE_2D, src_surface->Texture().handle, 0); | ||||
|             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                    0, 0); | ||||
|             break; | ||||
|         case SurfaceTarget::TextureCubemap: | ||||
|             glFramebufferTexture2D( | ||||
|                 GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||||
|                 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), | ||||
|                 src_surface->Texture().handle, 0); | ||||
|             glFramebufferTexture2D( | ||||
|                 GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||||
|                 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); | ||||
|             break; | ||||
|         case SurfaceTarget::Texture2DArray: | ||||
|             glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||||
|                                       src_surface->Texture().handle, 0, 0); | ||||
|             glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); | ||||
|             break; | ||||
|         case SurfaceTarget::Texture3D: | ||||
|             glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||||
|                                    SurfaceTargetToGL(src_params.target), | ||||
|                                    src_surface->Texture().handle, 0, 0); | ||||
|             glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||||
|                                    SurfaceTargetToGL(src_params.target), 0, 0, 0); | ||||
|             break; | ||||
|         default: | ||||
|             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||||
|                                    GL_TEXTURE_2D, src_surface->Texture().handle, 0); | ||||
|             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                    0, 0); | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         switch (dst_params.target) { | ||||
|         case SurfaceTarget::Texture2D: | ||||
|             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||||
|                                    GL_TEXTURE_2D, dst_surface->Texture().handle, 0); | ||||
|             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                    0, 0); | ||||
|             break; | ||||
|         case SurfaceTarget::TextureCubemap: | ||||
|             glFramebufferTexture2D( | ||||
|                 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||||
|                 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), | ||||
|                 dst_surface->Texture().handle, 0); | ||||
|             glFramebufferTexture2D( | ||||
|                 GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||||
|                 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); | ||||
|             break; | ||||
|         case SurfaceTarget::Texture2DArray: | ||||
|             glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||||
|                                       dst_surface->Texture().handle, 0, 0); | ||||
|             glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); | ||||
|             break; | ||||
| 
 | ||||
|         case SurfaceTarget::Texture3D: | ||||
|             glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||||
|                                    SurfaceTargetToGL(dst_params.target), | ||||
|                                    dst_surface->Texture().handle, 0, 0); | ||||
|             glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||||
|                                    SurfaceTargetToGL(dst_params.target), 0, 0, 0); | ||||
|             break; | ||||
|         default: | ||||
|             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||||
|                                    GL_TEXTURE_2D, dst_surface->Texture().handle, 0); | ||||
|             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                    0, 0); | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         buffers = GL_COLOR_BUFFER_BIT; | ||||
|     } else if (src_params.type == SurfaceType::Depth) { | ||||
|         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||||
|                                GL_TEXTURE_2D, 0, 0); | ||||
|         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                src_surface->Texture().handle, 0); | ||||
|         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||||
| 
 | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||||
|                                GL_TEXTURE_2D, 0, 0); | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                dst_surface->Texture().handle, 0); | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||||
| 
 | ||||
|         buffers = GL_DEPTH_BUFFER_BIT; | ||||
|     } else if (src_params.type == SurfaceType::DepthStencil) { | ||||
|         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||||
|                                GL_TEXTURE_2D, 0, 0); | ||||
|         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                src_surface->Texture().handle, 0); | ||||
| 
 | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||||
|                                GL_TEXTURE_2D, 0, 0); | ||||
|         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||||
|                                dst_surface->Texture().handle, 0); | ||||
| 
 | ||||
|         buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | ||||
|     } | ||||
| 
 | ||||
|     glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, | ||||
|                       dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, | ||||
|                       buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); | ||||
| 
 | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| void RasterizerCacheOpenGL::FermiCopySurface( | ||||
|     const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||||
|     const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) { | ||||
|     const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||||
|     const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) { | ||||
| 
 | ||||
|     const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); | ||||
|     const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); | ||||
| 
 | ||||
|     ASSERT(src_params.width == dst_params.width); | ||||
|     ASSERT(src_params.height == dst_params.height); | ||||
|     ASSERT(src_params.pixel_format == dst_params.pixel_format); | ||||
|     ASSERT(src_params.block_height == dst_params.block_height); | ||||
|     ASSERT(src_params.is_tiled == dst_params.is_tiled); | ||||
|     ASSERT(src_params.depth == dst_params.depth); | ||||
|     ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces
 | ||||
|     ASSERT(src_params.target == dst_params.target); | ||||
|     ASSERT(src_params.rt.index == dst_params.rt.index); | ||||
| 
 | ||||
|     FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false)); | ||||
|     auto src_surface = GetSurface(src_params, true); | ||||
|     auto dst_surface = GetSurface(dst_params, true); | ||||
| 
 | ||||
|     BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle, | ||||
|                 draw_framebuffer.handle); | ||||
| } | ||||
| 
 | ||||
| void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, | ||||
|  | ||||
| @ -421,7 +421,9 @@ public: | ||||
| 
 | ||||
|     /// Copies the contents of one surface to another
 | ||||
|     void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||||
|                           const Tegra::Engines::Fermi2D::Regs::Surface& dst_config); | ||||
|                           const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||||
|                           const MathUtil::Rectangle<u32>& src_rect, | ||||
|                           const MathUtil::Rectangle<u32>& dst_rect); | ||||
| 
 | ||||
| private: | ||||
|     void LoadSurface(const Surface& surface); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 bunnei
						bunnei