From 4add1f67b1f327bbb075e08b6e02a23afbcf2b2a Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 23 Oct 2022 18:09:47 +0300 Subject: [PATCH 1/4] [D3D12] Replace unused shared memory view with a null view Fixes the PIX validation warning about missing resource states on every guest draw. Also potentially prevents drivers from making assumptions about the shared memory buffer based on the bindings, though no such cases are currently known. --- .../gpu/d3d12/d3d12_command_processor.cc | 155 ++++++++++++------ src/xenia/gpu/d3d12/d3d12_command_processor.h | 42 +++-- 2 files changed, 141 insertions(+), 56 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index e39418cc8e..b086f325bf 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1000,6 +1000,36 @@ bool D3D12CommandProcessor::SetupContext() { parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } + // Shared memory SRV and UAV. + D3D12_DESCRIPTOR_RANGE root_shared_memory_view_ranges[2]; + { + auto& parameter = + root_parameters_bindless[kRootParameter_Bindless_SharedMemory]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = + uint32_t(xe::countof(root_shared_memory_view_ranges)); + parameter.DescriptorTable.pDescriptorRanges = + root_shared_memory_view_ranges; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + { + auto& range = root_shared_memory_view_ranges[0]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = 1; + range.BaseShaderRegister = + UINT(DxbcShaderTranslator::SRVMainRegister::kSharedMemory); + range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kMain); + range.OffsetInDescriptorsFromTableStart = 0; + } + { + auto& range = root_shared_memory_view_ranges[1]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + range.NumDescriptors = 1; + range.BaseShaderRegister = + UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory); + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 1; + } + } // Sampler heap. D3D12_DESCRIPTOR_RANGE root_bindless_sampler_range; { @@ -1019,7 +1049,7 @@ bool D3D12CommandProcessor::SetupContext() { root_bindless_sampler_range.OffsetInDescriptorsFromTableStart = 0; } // View heap. - D3D12_DESCRIPTOR_RANGE root_bindless_view_ranges[6]; + D3D12_DESCRIPTOR_RANGE root_bindless_view_ranges[4]; { auto& parameter = root_parameters_bindless[kRootParameter_Bindless_ViewHeap]; @@ -1028,34 +1058,6 @@ bool D3D12CommandProcessor::SetupContext() { parameter.DescriptorTable.NumDescriptorRanges = 0; parameter.DescriptorTable.pDescriptorRanges = root_bindless_view_ranges; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - // Shared memory SRV. - { - assert_true(parameter.DescriptorTable.NumDescriptorRanges < - xe::countof(root_bindless_view_ranges)); - auto& range = root_bindless_view_ranges[parameter.DescriptorTable - .NumDescriptorRanges++]; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = 1; - range.BaseShaderRegister = - UINT(DxbcShaderTranslator::SRVMainRegister::kSharedMemory); - range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kMain); - range.OffsetInDescriptorsFromTableStart = - UINT(SystemBindlessView::kSharedMemoryRawSRV); - } - // Shared memory UAV. - { - assert_true(parameter.DescriptorTable.NumDescriptorRanges < - xe::countof(root_bindless_view_ranges)); - auto& range = root_bindless_view_ranges[parameter.DescriptorTable - .NumDescriptorRanges++]; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - range.NumDescriptors = 1; - range.BaseShaderRegister = - UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory); - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = - UINT(SystemBindlessView::kSharedMemoryRawUAV); - } // EDRAM. if (render_target_cache_->GetPath() == RenderTargetCache::Path::kPixelShaderInterlock) { @@ -1418,6 +1420,20 @@ bool D3D12CommandProcessor::SetupContext() { if (bindless_resources_used_) { // Create the system bindless descriptors once all resources are // initialized. + // kNullRawSRV. + ui::d3d12::util::CreateBufferRawSRV( + device, + provider.OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kNullRawSRV)), + nullptr, 0); + // kNullRawUAV. + ui::d3d12::util::CreateBufferRawUAV( + device, + provider.OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kNullRawUAV)), + nullptr, 0); // kNullTexture2DArray. D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc; null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; @@ -2272,7 +2288,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, used_texture_mask, normalized_depth_control, normalized_color_mask); // Update constant buffers, descriptors and root parameters. - if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) { + if (!UpdateBindings(vertex_shader, pixel_shader, root_signature, + memexport_used)) { return false; } // Must not call anything that can change the descriptor heap from now on! @@ -2890,6 +2907,7 @@ bool D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { cbuffer_binding_float_pixel_.up_to_date = false; cbuffer_binding_bool_loop_.up_to_date = false; cbuffer_binding_fetch_.up_to_date = false; + current_shared_memory_binding_is_uav_.reset(); if (bindless_resources_used_) { cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; @@ -3649,9 +3667,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( cbuffer_binding_system_.up_to_date &= !dirty; } -bool D3D12CommandProcessor::UpdateBindings( - const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, - ID3D12RootSignature* root_signature) { +bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, + const D3D12Shader* pixel_shader, + ID3D12RootSignature* root_signature, + bool shared_memory_is_uav) { const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); const RegisterFile& regs = *register_file_; @@ -3688,6 +3707,9 @@ bool D3D12CommandProcessor::UpdateBindings( uint32_t root_parameter_bool_loop_constants = bindless_resources_used_ ? kRootParameter_Bindless_BoolLoopConstants : kRootParameter_Bindful_BoolLoopConstants; + uint32_t root_parameter_shared_memory_and_bindful_edram = + bindless_resources_used_ ? kRootParameter_Bindless_SharedMemory + : kRootParameter_Bindful_SharedMemoryAndEdram; // // Update root constant buffers that are common for bindful and bindless. @@ -3852,6 +3874,13 @@ bool D3D12CommandProcessor::UpdateBindings( // Update descriptors. // + if (!current_shared_memory_binding_is_uav_.has_value() || + current_shared_memory_binding_is_uav_.value() != shared_memory_is_uav) { + current_shared_memory_binding_is_uav_ = shared_memory_is_uav; + current_graphics_root_up_to_date_ &= + ~(1u << root_parameter_shared_memory_and_bindful_edram); + } + // Get textures and samplers used by the vertex shader, check if the last used // samplers are compatible and update them. size_t texture_layout_uid_vertex = @@ -4180,12 +4209,14 @@ bool D3D12CommandProcessor::UpdateBindings( if (write_textures_pixel) { view_count_partial_update += texture_count_pixel; } - // All the constants + shared memory SRV and UAV + textures. + // Shared memory SRV and null UAV + null SRV and shared memory UAV + + // textures. size_t view_count_full_update = - 2 + texture_count_vertex + texture_count_pixel; + 4 + texture_count_vertex + texture_count_pixel; if (edram_rov_used) { - // + EDRAM UAV. - ++view_count_full_update; + // + EDRAM UAV in two tables (with the shared memory SRV and with the + // shared memory UAV). + view_count_full_update += 2; } D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle; D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle; @@ -4230,10 +4261,25 @@ bool D3D12CommandProcessor::UpdateBindings( bindful_textures_written_pixel_ = false; // If updating fully, write the shared memory SRV and UAV descriptors and, // if needed, the EDRAM descriptor. - gpu_handle_shared_memory_and_edram_ = view_gpu_handle; + // SRV + null UAV + EDRAM. + gpu_handle_shared_memory_srv_and_edram_ = view_gpu_handle; shared_memory_->WriteRawSRVDescriptor(view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; + ui::d3d12::util::CreateBufferRawUAV(device, view_cpu_handle, nullptr, 0); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + if (edram_rov_used) { + render_target_cache_->WriteEdramUintPow2UAVDescriptor(view_cpu_handle, + 2); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + } + // Null SRV + UAV + EDRAM. + gpu_handle_shared_memory_uav_and_edram_ = view_gpu_handle; + ui::d3d12::util::CreateBufferRawSRV(device, view_cpu_handle, nullptr, 0); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; shared_memory_->WriteRawUAVDescriptor(view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; @@ -4372,6 +4418,31 @@ bool D3D12CommandProcessor::UpdateBindings( current_graphics_root_up_to_date_ |= 1u << root_parameter_bool_loop_constants; } + if (!(current_graphics_root_up_to_date_ & + (1u << root_parameter_shared_memory_and_bindful_edram))) { + assert_true(current_shared_memory_binding_is_uav_.has_value()); + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_and_bindful_edram; + if (bindless_resources_used_) { + gpu_handle_shared_memory_and_bindful_edram = + provider.OffsetViewDescriptor( + view_bindless_heap_gpu_start_, + uint32_t(current_shared_memory_binding_is_uav_.value() + ? SystemBindlessView :: + kNullRawSRVAndSharedMemoryRawUAVStart + : SystemBindlessView :: + kSharedMemoryRawSRVAndNullRawUAVStart)); + } else { + gpu_handle_shared_memory_and_bindful_edram = + current_shared_memory_binding_is_uav_.value() + ? gpu_handle_shared_memory_uav_and_edram_ + : gpu_handle_shared_memory_srv_and_edram_; + } + deferred_command_list_.D3DSetGraphicsRootDescriptorTable( + root_parameter_shared_memory_and_bindful_edram, + gpu_handle_shared_memory_and_bindful_edram); + current_graphics_root_up_to_date_ |= + 1u << root_parameter_shared_memory_and_bindful_edram; + } if (bindless_resources_used_) { if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_Bindless_DescriptorIndicesPixel))) { @@ -4405,14 +4476,6 @@ bool D3D12CommandProcessor::UpdateBindings( << kRootParameter_Bindless_ViewHeap; } } else { - if (!(current_graphics_root_up_to_date_ & - (1u << kRootParameter_Bindful_SharedMemoryAndEdram))) { - deferred_command_list_.D3DSetGraphicsRootDescriptorTable( - kRootParameter_Bindful_SharedMemoryAndEdram, - gpu_handle_shared_memory_and_edram_); - current_graphics_root_up_to_date_ |= - 1u << kRootParameter_Bindful_SharedMemoryAndEdram; - } uint32_t extra_index; extra_index = current_graphics_root_bindful_extras_.textures_pixel; if (extra_index != RootBindfulExtraParameterIndices::kUnavailable && diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 6162b46830..353e6bd0b5 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -128,11 +129,19 @@ class D3D12CommandProcessor : public CommandProcessor { uint32_t count, ui::d3d12::util::DescriptorCpuGpuHandlePair* handles_out); // These are needed often, so they are always allocated. enum class SystemBindlessView : uint32_t { - kSharedMemoryRawSRV, + // Both may be bound as one root parameter. + kSharedMemoryRawSRVAndNullRawUAVStart, + kSharedMemoryRawSRV = kSharedMemoryRawSRVAndNullRawUAVStart, + kNullRawUAV, + + // Both may be bound as one root parameter. + kNullRawSRVAndSharedMemoryRawUAVStart, + kNullRawSRV = kNullRawSRVAndSharedMemoryRawUAVStart, + kSharedMemoryRawUAV, + kSharedMemoryR32UintSRV, kSharedMemoryR32G32UintSRV, kSharedMemoryR32G32B32A32UintSRV, - kSharedMemoryRawUAV, kSharedMemoryR32UintUAV, kSharedMemoryR32G32UintUAV, kSharedMemoryR32G32B32A32UintUAV, @@ -253,10 +262,10 @@ class D3D12CommandProcessor : public CommandProcessor { kRootParameter_Bindful_SystemConstants, // +2 = 6 in all. // Pretty rarely used and rarely changed - flow control constants. kRootParameter_Bindful_BoolLoopConstants, // +2 = 8 in all. - // Never changed except for when starting a new descriptor heap - shared - // memory byte address buffer, and, if ROV is used for EDRAM, EDRAM R32_UINT - // UAV. - // SRV/UAV descriptor table. + // Changed only when starting a new descriptor heap or when switching + // between shared memory as SRV and UAV - shared memory byte address buffer + // (as SRV and as UAV, either may be null if not used), and, if ROV is used + // for EDRAM, EDRAM R32_UINT UAV. kRootParameter_Bindful_SharedMemoryAndEdram, // +1 = 9 in all. kRootParameter_Bindful_Count_Base, @@ -280,10 +289,14 @@ class D3D12CommandProcessor : public CommandProcessor { kRootParameter_Bindless_DescriptorIndicesVertex, // +2 = 6 in VS. kRootParameter_Bindless_SystemConstants, // +2 = 8 in all. kRootParameter_Bindless_BoolLoopConstants, // +2 = 10 in all. + // Changed only when switching between shared memory as SRV and UAV - shared + // memory byte address buffer (as SRV and as UAV, either may be null if not + // used). + kRootParameter_Bindless_SharedMemory, // +1 = 11 in all. // Unbounded sampler descriptor table - changed in case of overflow. - kRootParameter_Bindless_SamplerHeap, // +1 = 11 in all. + kRootParameter_Bindless_SamplerHeap, // +1 = 12 in all. // Unbounded SRV/UAV descriptor table - never changed. - kRootParameter_Bindless_ViewHeap, // +1 = 12 in all. + kRootParameter_Bindless_ViewHeap, // +1 = 13 in all. kRootParameter_Bindless_Count, }; @@ -362,7 +375,8 @@ class D3D12CommandProcessor : public CommandProcessor { uint32_t normalized_color_mask); bool UpdateBindings(const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, - ID3D12RootSignature* root_signature); + ID3D12RootSignature* root_signature, + bool shared_memory_is_uav); // Returns dword count for one element for a memexport format, or 0 if it's // not supported by the D3D12 command processor (if it's smaller that 1 dword, @@ -622,6 +636,13 @@ class D3D12CommandProcessor : public CommandProcessor { ConstantBufferBinding cbuffer_binding_descriptor_indices_vertex_; ConstantBufferBinding cbuffer_binding_descriptor_indices_pixel_; + // Whether the latest shared memory and EDRAM buffer binding contains the + // shared memory UAV rather than the SRV. + // Separate descriptor tables for the SRV and the UAV, even though only one is + // accessed dynamically in the shaders, are used to prevent a validation + // message about missing resource states in PIX. + std::optional current_shared_memory_binding_is_uav_; + // Pages with the descriptors currently used for handling Xenos draw calls. uint64_t draw_view_bindful_heap_index_; uint64_t draw_sampler_bindful_heap_index_; @@ -654,7 +675,8 @@ class D3D12CommandProcessor : public CommandProcessor { std::vector current_sampler_bindless_indices_pixel_; // Latest bindful descriptor handles used for handling Xenos draw calls. - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_and_edram_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_srv_and_edram_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_uav_and_edram_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_vertex_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_pixel_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_samplers_vertex_; From 74f1f6bb6d1facfcb0e584ad4c168c2f8bdf5534 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 23 Oct 2022 19:01:17 +0300 Subject: [PATCH 2/4] [Vulkan] Check depthClamp feature --- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index f1af57a23f..2969699590 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -581,6 +581,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( primitive_processing_result.host_primitive_reset_enabled; description_out.depth_clamp_enable = + device_features.depthClamp && regs.Get().clip_disable; // TODO(Triang3l): Tessellation. @@ -825,6 +826,10 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet( return false; } + if (!device_features.depthClamp && description.depth_clamp_enable) { + return false; + } + if (!device_features.fillModeNonSolid && description.polygon_mode != PipelinePolygonMode::kFill) { return false; From a37b57ca8dd0b86b0476156d30da18d001aea90f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 23 Oct 2022 21:19:41 +0300 Subject: [PATCH 3/4] [GPU] Fix tiled mip tail extent calculation Previously, for mips, the dimensions of the texture weren't rounded to powers of two before calculating the mip tail extent, resulting in the mip tail for a 260 blocks tall texture, that contains mips ending at Y of up to 36, having the Y extent calculated as 32. With rounding to powers of two, it would have been 64. However, with the GetTiledAddressUpperBound functions, none of this is necessary at all (and neither is rounding the extents in TextureGuestLayout::Level to 32x32x4 blocks) - using the same code for calculating the XYZ extents of tiled textures as for linear textures now, which, for the mip tail, calculates the actual maximum coordinates of the mips stored in it - and rounding to tiles is done internally by GetTiledAddressUpperBound. --- src/xenia/gpu/texture_util.cc | 122 +++++++++++++++------------------- src/xenia/gpu/texture_util.h | 33 ++++----- 2 files changed, 72 insertions(+), 83 deletions(-) diff --git a/src/xenia/gpu/texture_util.cc b/src/xenia/gpu/texture_util.cc index ada6cf1404..5b2f7f2357 100644 --- a/src/xenia/gpu/texture_util.cc +++ b/src/xenia/gpu/texture_util.cc @@ -355,90 +355,76 @@ TextureGuestLayout GetGuestTextureLayout( // be smaller (especially in the 1280x720 linear k_8_8_8_8 case in 4E4D083E, // for which memory exactly for 1280x720 is allocated, and aligning the // height to 32 would cause access of an unallocated page) or bigger than - // the stride. For tiled textures, this is the dimensions aligned to 32x32x4 - // blocks (or x1 for the missing dimensions). - uint32_t level_width_blocks = - xe::align(std::max(width_texels >> level, uint32_t(1)), - format_info->block_width) / - format_info->block_width; - uint32_t level_height_blocks = - xe::align(std::max(height_texels >> level, uint32_t(1)), - format_info->block_height) / - format_info->block_height; - uint32_t level_depth = std::max(depth >> level, uint32_t(1)); - if (is_tiled) { + // the stride. + if (level == layout.packed_level) { + // Calculate the portion of the mip tail actually used by the needed mips. + // The actually used region may be significantly smaller than the full + // 32x32-texel-aligned (and, for mips, calculated from the base dimensions + // rounded to powers of two - 58410A7A has an 80x260 tiled texture with + // packed mips at level 3 containing a mip ending at Y = 36, while + // 260 >> 3 == 32, but 512 >> 3 == 64) tail. A 2x2 texture (for example, + // in 494707D4, there's a 2x2 k_8_8_8_8 linear texture with packed mips), + // for instance, would have its 2x2 base at (16, 0) and its 1x1 mip at + // (8, 0) - and we need 2 or 1 rows in these cases, not 32 - the 32 rows + // in a linear texture (with 256-byte pitch alignment) would span two 4 KB + // pages rather than one. + level_layout.x_extent_blocks = 0; + level_layout.y_extent_blocks = 0; + level_layout.z_extent = 0; + uint32_t packed_sublevel_last = is_base ? 0 : max_level; + for (uint32_t packed_sublevel = layout.packed_level; + packed_sublevel <= packed_sublevel_last; ++packed_sublevel) { + uint32_t packed_sublevel_x_blocks; + uint32_t packed_sublevel_y_blocks; + uint32_t packed_sublevel_z; + GetPackedMipOffset(width_texels, height_texels, depth, format, + packed_sublevel, packed_sublevel_x_blocks, + packed_sublevel_y_blocks, packed_sublevel_z); + level_layout.x_extent_blocks = std::max( + level_layout.x_extent_blocks, + packed_sublevel_x_blocks + + xe::align( + std::max(width_texels >> packed_sublevel, uint32_t(1)), + format_info->block_width) / + format_info->block_width); + level_layout.y_extent_blocks = std::max( + level_layout.y_extent_blocks, + packed_sublevel_y_blocks + + xe::align( + std::max(height_texels >> packed_sublevel, uint32_t(1)), + format_info->block_height) / + format_info->block_height); + level_layout.z_extent = + std::max(level_layout.z_extent, + packed_sublevel_z + + std::max(depth >> packed_sublevel, uint32_t(1))); + } + } else { level_layout.x_extent_blocks = - xe::align(level_width_blocks, xenos::kTextureTileWidthHeight); + xe::align(std::max(width_texels >> level, uint32_t(1)), + format_info->block_width) / + format_info->block_width; level_layout.y_extent_blocks = - xe::align(level_height_blocks, xenos::kTextureTileWidthHeight); + xe::align(std::max(height_texels >> level, uint32_t(1)), + format_info->block_height) / + format_info->block_height; + level_layout.z_extent = std::max(depth >> level, uint32_t(1)); + } + if (is_tiled) { uint32_t bytes_per_block_log2 = xe::log2_floor(bytes_per_block); if (dimension == xenos::DataDimension::k3D) { - level_layout.z_extent = - xe::align(level_depth, xenos::kTextureTileDepth); - // 32-block-row x 4 slice portions laid out sequentially (4-slice-major, - // 32-block-row-minor), address extent within a 32x32x4 tile depends on - // the pitch. Origins of 32x32x4 tiles grow monotonically, first along - // Z, then along Y, then along X. level_layout.array_slice_data_extent_bytes = GetTiledAddressUpperBound3D( level_layout.x_extent_blocks, level_layout.y_extent_blocks, level_layout.z_extent, row_pitch_blocks_tile_aligned, level_layout.y_extent_blocks, bytes_per_block_log2); } else { - level_layout.z_extent = 1; - // Origins of 32x32 tiles grow monotonically, first along Y, then along - // X. level_layout.array_slice_data_extent_bytes = GetTiledAddressUpperBound2D( level_layout.x_extent_blocks, level_layout.y_extent_blocks, row_pitch_blocks_tile_aligned, bytes_per_block_log2); } } else { - if (level == layout.packed_level) { - // Calculate the portion of the mip tail actually used by the needed - // mips. The actually used region may be significantly smaller than the - // full 32x32-texel-aligned tail. A 2x2 texture (for example, in - // 494707D4, there's a 2x2 k_8_8_8_8 linear texture with packed mips), - // for instance, would have its 2x2 base at (16, 0) and its 1x1 mip at - // (8, 0) - and we need 2 or 1 rows in these cases, not 32 - the 32 rows - // would span two 4 KB pages rather than one, taking the 256-byte pitch - // alignment in linear textures into account. - level_layout.x_extent_blocks = 0; - level_layout.y_extent_blocks = 0; - level_layout.z_extent = 0; - uint32_t packed_sublevel_last = is_base ? 0 : max_level; - for (uint32_t packed_sublevel = layout.packed_level; - packed_sublevel <= packed_sublevel_last; ++packed_sublevel) { - uint32_t packed_sublevel_x_blocks; - uint32_t packed_sublevel_y_blocks; - uint32_t packed_sublevel_z; - GetPackedMipOffset(width_texels, height_texels, depth, format, - packed_sublevel, packed_sublevel_x_blocks, - packed_sublevel_y_blocks, packed_sublevel_z); - level_layout.x_extent_blocks = std::max( - level_layout.x_extent_blocks, - packed_sublevel_x_blocks + - xe::align( - std::max(width_texels >> packed_sublevel, uint32_t(1)), - format_info->block_width) / - format_info->block_width); - level_layout.y_extent_blocks = std::max( - level_layout.y_extent_blocks, - packed_sublevel_y_blocks + - xe::align( - std::max(height_texels >> packed_sublevel, uint32_t(1)), - format_info->block_height) / - format_info->block_height); - level_layout.z_extent = - std::max(level_layout.z_extent, - packed_sublevel_z + - std::max(depth >> packed_sublevel, uint32_t(1))); - } - } else { - level_layout.x_extent_blocks = level_width_blocks; - level_layout.y_extent_blocks = level_height_blocks; - level_layout.z_extent = level_depth; - } level_layout.array_slice_data_extent_bytes = z_stride_bytes * (level_layout.z_extent - 1) + level_layout.row_pitch_bytes * (level_layout.y_extent_blocks - 1) + diff --git a/src/xenia/gpu/texture_util.h b/src/xenia/gpu/texture_util.h index bcc080de37..b52b4fa38b 100644 --- a/src/xenia/gpu/texture_util.h +++ b/src/xenia/gpu/texture_util.h @@ -144,24 +144,27 @@ struct TextureGuestLayout { // multiplied by the array slice count. uint32_t array_slice_stride_bytes; - // Estimated amount of memory this level occupies, and variables involved in - // its calculation. Not aligned to kTextureSubresourceAlignmentBytes. For - // tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1 - // depending on the dimension), but for the linear subresources, this may be - // significantly (including less 4 KB pages) smaller than the aligned size - // (like for 4E4D083E where aligning the height of a 1280x720 linear texture - // results in access violations). For the linear mip tail, this includes all - // the mip levels stored in it. If the width is bigger than the pitch, this - // will also be taken into account for the last row so all memory actually - // used by the texture will be loaded, and may be bigger than the distance - // between array slices or levels. The purpose of this parameter is to make - // the memory amount that needs to be resident as close to the real amount - // as possible, to make sure all the needed data will be read, but also, if - // possible, unneeded memory pages won't be accessed (since that may trigger - // an access violation on the CPU). + // The exclusive upper bound of blocks needed at this level (this level for + // non-packed levels, or all the packed levels for the packed mip tail). uint32_t x_extent_blocks; uint32_t y_extent_blocks; uint32_t z_extent; + // Estimated amount of memory this level occupies. Not aligned to + // kTextureSubresourceAlignmentBytes. For tiled textures, this will be + // calculated for the extent rounded to 32x32x4 blocks (or 32x32x1 depending + // on the dimensionality), but for linear textures, as well as for mips of + // non-power-of-two tiled textures, this may be significantly (including + // less 4 KB pages) smaller than the aligned size (like for 4E4D083E where + // aligning the height of a 1280x720 linear texture results in access + // violations). For the linear mip tail, this includes all the mip levels + // stored in it. If the width is bigger than the pitch, this will also be + // taken into account for the last row so all memory actually used by the + // texture will be loaded, and may be bigger than the distance between array + // slices or levels. The purpose of this parameter is to make the memory + // amount that needs to be resident as close to the real amount as possible, + // to make sure all the needed data will be read, but also, if possible, + // unneeded memory pages won't be accessed (since that may trigger an access + // violation on the CPU). uint32_t array_slice_data_extent_bytes; // Including all array slices. uint32_t level_data_extent_bytes; From 778333b1b50f9c50b5aaa0010346aa0855840292 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 31 Oct 2022 18:57:54 +0300 Subject: [PATCH 4/4] [UI] Fix ClearInput not called in ImGuiDrawer after deferred dialog removal Also cleanup the code involved in dialog registration, and update the explanation of why dialog removal is delayed until the end of drawing (the original was written back when window listener and UI drawer callback registration during the execution of the callbacks was deferred, but that was wrong as that might result in execution of callbacks belonging to now-deleted objects). --- src/xenia/ui/imgui_drawer.cc | 55 +++++++++++++++++++++--------------- src/xenia/ui/imgui_drawer.h | 3 ++ 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/src/xenia/ui/imgui_drawer.cc b/src/xenia/ui/imgui_drawer.cc index 2454947fd2..5f3075ca56 100644 --- a/src/xenia/ui/imgui_drawer.cc +++ b/src/xenia/ui/imgui_drawer.cc @@ -62,11 +62,11 @@ void ImGuiDrawer::AddDialog(ImGuiDialog* dialog) { dialogs_.cend()) { return; } - if (dialog_loop_next_index_ == SIZE_MAX && dialogs_.empty()) { - // First dialog added. dialog_loop_next_index_ == SIZE_MAX is also checked - // because in a situation of removing the only dialog, then adding a dialog, - // from within a dialog's Draw function, the removal would not cause the - // listener and the drawer to be removed (it's deferred in this case). + if (dialogs_.empty() && !IsDrawingDialogs()) { + // First dialog added. !IsDrawingDialogs() is also checked because in a + // situation of removing the only dialog, then adding a dialog, from within + // a dialog's Draw function, re-registering the ImGuiDrawer may result in + // ImGui being drawn multiple times in the current frame. window_->AddInputListener(this, z_order_); if (presenter_) { presenter_->AddUIDrawerFromUIThread(this, z_order_); @@ -81,7 +81,7 @@ void ImGuiDrawer::RemoveDialog(ImGuiDialog* dialog) { if (it == dialogs_.cend()) { return; } - if (dialog_loop_next_index_ != SIZE_MAX) { + if (IsDrawingDialogs()) { // Actualize the next dialog index after the erasure from the vector. size_t existing_index = size_t(std::distance(dialogs_.cbegin(), it)); if (dialog_loop_next_index_ > existing_index) { @@ -89,17 +89,7 @@ void ImGuiDrawer::RemoveDialog(ImGuiDialog* dialog) { } } dialogs_.erase(it); - if (dialog_loop_next_index_ == SIZE_MAX && dialogs_.empty()) { - if (presenter_) { - presenter_->RemoveUIDrawerFromUIThread(this); - } - window_->RemoveInputListener(this); - // Clear all input since no input will be received anymore, and when the - // drawer becomes active again, it'd have an outdated input state otherwise - // which will be persistent until new events actualize individual input - // properties. - ClearInput(); - } + DetachIfLastDialogRemoved(); } void ImGuiDrawer::Initialize() { @@ -301,7 +291,7 @@ void ImGuiDrawer::Draw(UIDrawContext& ui_draw_context) { ImGui::NewFrame(); - assert_true(dialog_loop_next_index_ == SIZE_MAX); + assert_true(!IsDrawingDialogs()); dialog_loop_next_index_ = 0; while (dialog_loop_next_index_ < dialogs_.size()) { dialogs_[dialog_loop_next_index_++]->Draw(); @@ -319,11 +309,11 @@ void ImGuiDrawer::Draw(UIDrawContext& ui_draw_context) { io.MousePos = ImVec2(-FLT_MAX, -FLT_MAX); } - if (dialogs_.empty()) { - // All dialogs have removed themselves during the draw, detach. - presenter_->RemoveUIDrawerFromUIThread(this); - window_->RemoveInputListener(this); - } else { + // Detaching is deferred if the last dialog is removed during drawing, perform + // it now if needed. + DetachIfLastDialogRemoved(); + + if (!dialogs_.empty()) { // Repaint (and handle input) continuously if still active. presenter_->RequestUIPaintFromUIThread(); } @@ -557,5 +547,24 @@ void ImGuiDrawer::SwitchToPhysicalMouseAndUpdateMousePosition( UpdateMousePosition(float(e.x()), float(e.y())); } +void ImGuiDrawer::DetachIfLastDialogRemoved() { + // IsDrawingDialogs() is also checked because in a situation of removing the + // only dialog, then adding a dialog, from within a dialog's Draw function, + // re-registering the ImGuiDrawer may result in ImGui being drawn multiple + // times in the current frame. + if (!dialogs_.empty() || IsDrawingDialogs()) { + return; + } + if (presenter_) { + presenter_->RemoveUIDrawerFromUIThread(this); + } + window_->RemoveInputListener(this); + // Clear all input since no input will be received anymore, and when the + // drawer becomes active again, it'd have an outdated input state otherwise + // which will be persistent until new events actualize individual input + // properties. + ClearInput(); +} + } // namespace ui } // namespace xe diff --git a/src/xenia/ui/imgui_drawer.h b/src/xenia/ui/imgui_drawer.h index 5563a11519..2a114fce48 100644 --- a/src/xenia/ui/imgui_drawer.h +++ b/src/xenia/ui/imgui_drawer.h @@ -74,6 +74,9 @@ class ImGuiDrawer : public WindowInputListener, public UIDrawer { void UpdateMousePosition(float x, float y); void SwitchToPhysicalMouseAndUpdateMousePosition(const MouseEvent& e); + bool IsDrawingDialogs() const { return dialog_loop_next_index_ != SIZE_MAX; } + void DetachIfLastDialogRemoved(); + Window* window_; size_t z_order_;