diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index e39418cc8e..b086f325bf 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1000,6 +1000,36 @@ bool D3D12CommandProcessor::SetupContext() { parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } + // Shared memory SRV and UAV. + D3D12_DESCRIPTOR_RANGE root_shared_memory_view_ranges[2]; + { + auto& parameter = + root_parameters_bindless[kRootParameter_Bindless_SharedMemory]; + parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + parameter.DescriptorTable.NumDescriptorRanges = + uint32_t(xe::countof(root_shared_memory_view_ranges)); + parameter.DescriptorTable.pDescriptorRanges = + root_shared_memory_view_ranges; + parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + { + auto& range = root_shared_memory_view_ranges[0]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + range.NumDescriptors = 1; + range.BaseShaderRegister = + UINT(DxbcShaderTranslator::SRVMainRegister::kSharedMemory); + range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kMain); + range.OffsetInDescriptorsFromTableStart = 0; + } + { + auto& range = root_shared_memory_view_ranges[1]; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + range.NumDescriptors = 1; + range.BaseShaderRegister = + UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory); + range.RegisterSpace = 0; + range.OffsetInDescriptorsFromTableStart = 1; + } + } // Sampler heap. D3D12_DESCRIPTOR_RANGE root_bindless_sampler_range; { @@ -1019,7 +1049,7 @@ bool D3D12CommandProcessor::SetupContext() { root_bindless_sampler_range.OffsetInDescriptorsFromTableStart = 0; } // View heap. - D3D12_DESCRIPTOR_RANGE root_bindless_view_ranges[6]; + D3D12_DESCRIPTOR_RANGE root_bindless_view_ranges[4]; { auto& parameter = root_parameters_bindless[kRootParameter_Bindless_ViewHeap]; @@ -1028,34 +1058,6 @@ bool D3D12CommandProcessor::SetupContext() { parameter.DescriptorTable.NumDescriptorRanges = 0; parameter.DescriptorTable.pDescriptorRanges = root_bindless_view_ranges; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - // Shared memory SRV. - { - assert_true(parameter.DescriptorTable.NumDescriptorRanges < - xe::countof(root_bindless_view_ranges)); - auto& range = root_bindless_view_ranges[parameter.DescriptorTable - .NumDescriptorRanges++]; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - range.NumDescriptors = 1; - range.BaseShaderRegister = - UINT(DxbcShaderTranslator::SRVMainRegister::kSharedMemory); - range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kMain); - range.OffsetInDescriptorsFromTableStart = - UINT(SystemBindlessView::kSharedMemoryRawSRV); - } - // Shared memory UAV. - { - assert_true(parameter.DescriptorTable.NumDescriptorRanges < - xe::countof(root_bindless_view_ranges)); - auto& range = root_bindless_view_ranges[parameter.DescriptorTable - .NumDescriptorRanges++]; - range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - range.NumDescriptors = 1; - range.BaseShaderRegister = - UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory); - range.RegisterSpace = 0; - range.OffsetInDescriptorsFromTableStart = - UINT(SystemBindlessView::kSharedMemoryRawUAV); - } // EDRAM. if (render_target_cache_->GetPath() == RenderTargetCache::Path::kPixelShaderInterlock) { @@ -1418,6 +1420,20 @@ bool D3D12CommandProcessor::SetupContext() { if (bindless_resources_used_) { // Create the system bindless descriptors once all resources are // initialized. + // kNullRawSRV. + ui::d3d12::util::CreateBufferRawSRV( + device, + provider.OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kNullRawSRV)), + nullptr, 0); + // kNullRawUAV. + ui::d3d12::util::CreateBufferRawUAV( + device, + provider.OffsetViewDescriptor( + view_bindless_heap_cpu_start_, + uint32_t(SystemBindlessView::kNullRawUAV)), + nullptr, 0); // kNullTexture2DArray. D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc; null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; @@ -2272,7 +2288,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, used_texture_mask, normalized_depth_control, normalized_color_mask); // Update constant buffers, descriptors and root parameters. - if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) { + if (!UpdateBindings(vertex_shader, pixel_shader, root_signature, + memexport_used)) { return false; } // Must not call anything that can change the descriptor heap from now on! @@ -2890,6 +2907,7 @@ bool D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { cbuffer_binding_float_pixel_.up_to_date = false; cbuffer_binding_bool_loop_.up_to_date = false; cbuffer_binding_fetch_.up_to_date = false; + current_shared_memory_binding_is_uav_.reset(); if (bindless_resources_used_) { cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; @@ -3649,9 +3667,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( cbuffer_binding_system_.up_to_date &= !dirty; } -bool D3D12CommandProcessor::UpdateBindings( - const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, - ID3D12RootSignature* root_signature) { +bool D3D12CommandProcessor::UpdateBindings(const D3D12Shader* vertex_shader, + const D3D12Shader* pixel_shader, + ID3D12RootSignature* root_signature, + bool shared_memory_is_uav) { const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); const RegisterFile& regs = *register_file_; @@ -3688,6 +3707,9 @@ bool D3D12CommandProcessor::UpdateBindings( uint32_t root_parameter_bool_loop_constants = bindless_resources_used_ ? kRootParameter_Bindless_BoolLoopConstants : kRootParameter_Bindful_BoolLoopConstants; + uint32_t root_parameter_shared_memory_and_bindful_edram = + bindless_resources_used_ ? kRootParameter_Bindless_SharedMemory + : kRootParameter_Bindful_SharedMemoryAndEdram; // // Update root constant buffers that are common for bindful and bindless. @@ -3852,6 +3874,13 @@ bool D3D12CommandProcessor::UpdateBindings( // Update descriptors. // + if (!current_shared_memory_binding_is_uav_.has_value() || + current_shared_memory_binding_is_uav_.value() != shared_memory_is_uav) { + current_shared_memory_binding_is_uav_ = shared_memory_is_uav; + current_graphics_root_up_to_date_ &= + ~(1u << root_parameter_shared_memory_and_bindful_edram); + } + // Get textures and samplers used by the vertex shader, check if the last used // samplers are compatible and update them. size_t texture_layout_uid_vertex = @@ -4180,12 +4209,14 @@ bool D3D12CommandProcessor::UpdateBindings( if (write_textures_pixel) { view_count_partial_update += texture_count_pixel; } - // All the constants + shared memory SRV and UAV + textures. + // Shared memory SRV and null UAV + null SRV and shared memory UAV + + // textures. size_t view_count_full_update = - 2 + texture_count_vertex + texture_count_pixel; + 4 + texture_count_vertex + texture_count_pixel; if (edram_rov_used) { - // + EDRAM UAV. - ++view_count_full_update; + // + EDRAM UAV in two tables (with the shared memory SRV and with the + // shared memory UAV). + view_count_full_update += 2; } D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle; D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle; @@ -4230,10 +4261,25 @@ bool D3D12CommandProcessor::UpdateBindings( bindful_textures_written_pixel_ = false; // If updating fully, write the shared memory SRV and UAV descriptors and, // if needed, the EDRAM descriptor. - gpu_handle_shared_memory_and_edram_ = view_gpu_handle; + // SRV + null UAV + EDRAM. + gpu_handle_shared_memory_srv_and_edram_ = view_gpu_handle; shared_memory_->WriteRawSRVDescriptor(view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; + ui::d3d12::util::CreateBufferRawUAV(device, view_cpu_handle, nullptr, 0); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + if (edram_rov_used) { + render_target_cache_->WriteEdramUintPow2UAVDescriptor(view_cpu_handle, + 2); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + } + // Null SRV + UAV + EDRAM. + gpu_handle_shared_memory_uav_and_edram_ = view_gpu_handle; + ui::d3d12::util::CreateBufferRawSRV(device, view_cpu_handle, nullptr, 0); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; shared_memory_->WriteRawUAVDescriptor(view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; @@ -4372,6 +4418,31 @@ bool D3D12CommandProcessor::UpdateBindings( current_graphics_root_up_to_date_ |= 1u << root_parameter_bool_loop_constants; } + if (!(current_graphics_root_up_to_date_ & + (1u << root_parameter_shared_memory_and_bindful_edram))) { + assert_true(current_shared_memory_binding_is_uav_.has_value()); + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_and_bindful_edram; + if (bindless_resources_used_) { + gpu_handle_shared_memory_and_bindful_edram = + provider.OffsetViewDescriptor( + view_bindless_heap_gpu_start_, + uint32_t(current_shared_memory_binding_is_uav_.value() + ? SystemBindlessView :: + kNullRawSRVAndSharedMemoryRawUAVStart + : SystemBindlessView :: + kSharedMemoryRawSRVAndNullRawUAVStart)); + } else { + gpu_handle_shared_memory_and_bindful_edram = + current_shared_memory_binding_is_uav_.value() + ? gpu_handle_shared_memory_uav_and_edram_ + : gpu_handle_shared_memory_srv_and_edram_; + } + deferred_command_list_.D3DSetGraphicsRootDescriptorTable( + root_parameter_shared_memory_and_bindful_edram, + gpu_handle_shared_memory_and_bindful_edram); + current_graphics_root_up_to_date_ |= + 1u << root_parameter_shared_memory_and_bindful_edram; + } if (bindless_resources_used_) { if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_Bindless_DescriptorIndicesPixel))) { @@ -4405,14 +4476,6 @@ bool D3D12CommandProcessor::UpdateBindings( << kRootParameter_Bindless_ViewHeap; } } else { - if (!(current_graphics_root_up_to_date_ & - (1u << kRootParameter_Bindful_SharedMemoryAndEdram))) { - deferred_command_list_.D3DSetGraphicsRootDescriptorTable( - kRootParameter_Bindful_SharedMemoryAndEdram, - gpu_handle_shared_memory_and_edram_); - current_graphics_root_up_to_date_ |= - 1u << kRootParameter_Bindful_SharedMemoryAndEdram; - } uint32_t extra_index; extra_index = current_graphics_root_bindful_extras_.textures_pixel; if (extra_index != RootBindfulExtraParameterIndices::kUnavailable && diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 6162b46830..353e6bd0b5 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -128,11 +129,19 @@ class D3D12CommandProcessor : public CommandProcessor { uint32_t count, ui::d3d12::util::DescriptorCpuGpuHandlePair* handles_out); // These are needed often, so they are always allocated. enum class SystemBindlessView : uint32_t { - kSharedMemoryRawSRV, + // Both may be bound as one root parameter. + kSharedMemoryRawSRVAndNullRawUAVStart, + kSharedMemoryRawSRV = kSharedMemoryRawSRVAndNullRawUAVStart, + kNullRawUAV, + + // Both may be bound as one root parameter. + kNullRawSRVAndSharedMemoryRawUAVStart, + kNullRawSRV = kNullRawSRVAndSharedMemoryRawUAVStart, + kSharedMemoryRawUAV, + kSharedMemoryR32UintSRV, kSharedMemoryR32G32UintSRV, kSharedMemoryR32G32B32A32UintSRV, - kSharedMemoryRawUAV, kSharedMemoryR32UintUAV, kSharedMemoryR32G32UintUAV, kSharedMemoryR32G32B32A32UintUAV, @@ -253,10 +262,10 @@ class D3D12CommandProcessor : public CommandProcessor { kRootParameter_Bindful_SystemConstants, // +2 = 6 in all. // Pretty rarely used and rarely changed - flow control constants. kRootParameter_Bindful_BoolLoopConstants, // +2 = 8 in all. - // Never changed except for when starting a new descriptor heap - shared - // memory byte address buffer, and, if ROV is used for EDRAM, EDRAM R32_UINT - // UAV. - // SRV/UAV descriptor table. + // Changed only when starting a new descriptor heap or when switching + // between shared memory as SRV and UAV - shared memory byte address buffer + // (as SRV and as UAV, either may be null if not used), and, if ROV is used + // for EDRAM, EDRAM R32_UINT UAV. kRootParameter_Bindful_SharedMemoryAndEdram, // +1 = 9 in all. kRootParameter_Bindful_Count_Base, @@ -280,10 +289,14 @@ class D3D12CommandProcessor : public CommandProcessor { kRootParameter_Bindless_DescriptorIndicesVertex, // +2 = 6 in VS. kRootParameter_Bindless_SystemConstants, // +2 = 8 in all. kRootParameter_Bindless_BoolLoopConstants, // +2 = 10 in all. + // Changed only when switching between shared memory as SRV and UAV - shared + // memory byte address buffer (as SRV and as UAV, either may be null if not + // used). + kRootParameter_Bindless_SharedMemory, // +1 = 11 in all. // Unbounded sampler descriptor table - changed in case of overflow. - kRootParameter_Bindless_SamplerHeap, // +1 = 11 in all. + kRootParameter_Bindless_SamplerHeap, // +1 = 12 in all. // Unbounded SRV/UAV descriptor table - never changed. - kRootParameter_Bindless_ViewHeap, // +1 = 12 in all. + kRootParameter_Bindless_ViewHeap, // +1 = 13 in all. kRootParameter_Bindless_Count, }; @@ -362,7 +375,8 @@ class D3D12CommandProcessor : public CommandProcessor { uint32_t normalized_color_mask); bool UpdateBindings(const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, - ID3D12RootSignature* root_signature); + ID3D12RootSignature* root_signature, + bool shared_memory_is_uav); // Returns dword count for one element for a memexport format, or 0 if it's // not supported by the D3D12 command processor (if it's smaller that 1 dword, @@ -622,6 +636,13 @@ class D3D12CommandProcessor : public CommandProcessor { ConstantBufferBinding cbuffer_binding_descriptor_indices_vertex_; ConstantBufferBinding cbuffer_binding_descriptor_indices_pixel_; + // Whether the latest shared memory and EDRAM buffer binding contains the + // shared memory UAV rather than the SRV. + // Separate descriptor tables for the SRV and the UAV, even though only one is + // accessed dynamically in the shaders, are used to prevent a validation + // message about missing resource states in PIX. + std::optional current_shared_memory_binding_is_uav_; + // Pages with the descriptors currently used for handling Xenos draw calls. uint64_t draw_view_bindful_heap_index_; uint64_t draw_sampler_bindful_heap_index_; @@ -654,7 +675,8 @@ class D3D12CommandProcessor : public CommandProcessor { std::vector current_sampler_bindless_indices_pixel_; // Latest bindful descriptor handles used for handling Xenos draw calls. - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_and_edram_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_srv_and_edram_; + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_uav_and_edram_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_vertex_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_pixel_; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_samplers_vertex_; diff --git a/src/xenia/gpu/texture_util.cc b/src/xenia/gpu/texture_util.cc index ada6cf1404..5b2f7f2357 100644 --- a/src/xenia/gpu/texture_util.cc +++ b/src/xenia/gpu/texture_util.cc @@ -355,90 +355,76 @@ TextureGuestLayout GetGuestTextureLayout( // be smaller (especially in the 1280x720 linear k_8_8_8_8 case in 4E4D083E, // for which memory exactly for 1280x720 is allocated, and aligning the // height to 32 would cause access of an unallocated page) or bigger than - // the stride. For tiled textures, this is the dimensions aligned to 32x32x4 - // blocks (or x1 for the missing dimensions). - uint32_t level_width_blocks = - xe::align(std::max(width_texels >> level, uint32_t(1)), - format_info->block_width) / - format_info->block_width; - uint32_t level_height_blocks = - xe::align(std::max(height_texels >> level, uint32_t(1)), - format_info->block_height) / - format_info->block_height; - uint32_t level_depth = std::max(depth >> level, uint32_t(1)); - if (is_tiled) { + // the stride. + if (level == layout.packed_level) { + // Calculate the portion of the mip tail actually used by the needed mips. + // The actually used region may be significantly smaller than the full + // 32x32-texel-aligned (and, for mips, calculated from the base dimensions + // rounded to powers of two - 58410A7A has an 80x260 tiled texture with + // packed mips at level 3 containing a mip ending at Y = 36, while + // 260 >> 3 == 32, but 512 >> 3 == 64) tail. A 2x2 texture (for example, + // in 494707D4, there's a 2x2 k_8_8_8_8 linear texture with packed mips), + // for instance, would have its 2x2 base at (16, 0) and its 1x1 mip at + // (8, 0) - and we need 2 or 1 rows in these cases, not 32 - the 32 rows + // in a linear texture (with 256-byte pitch alignment) would span two 4 KB + // pages rather than one. + level_layout.x_extent_blocks = 0; + level_layout.y_extent_blocks = 0; + level_layout.z_extent = 0; + uint32_t packed_sublevel_last = is_base ? 0 : max_level; + for (uint32_t packed_sublevel = layout.packed_level; + packed_sublevel <= packed_sublevel_last; ++packed_sublevel) { + uint32_t packed_sublevel_x_blocks; + uint32_t packed_sublevel_y_blocks; + uint32_t packed_sublevel_z; + GetPackedMipOffset(width_texels, height_texels, depth, format, + packed_sublevel, packed_sublevel_x_blocks, + packed_sublevel_y_blocks, packed_sublevel_z); + level_layout.x_extent_blocks = std::max( + level_layout.x_extent_blocks, + packed_sublevel_x_blocks + + xe::align( + std::max(width_texels >> packed_sublevel, uint32_t(1)), + format_info->block_width) / + format_info->block_width); + level_layout.y_extent_blocks = std::max( + level_layout.y_extent_blocks, + packed_sublevel_y_blocks + + xe::align( + std::max(height_texels >> packed_sublevel, uint32_t(1)), + format_info->block_height) / + format_info->block_height); + level_layout.z_extent = + std::max(level_layout.z_extent, + packed_sublevel_z + + std::max(depth >> packed_sublevel, uint32_t(1))); + } + } else { level_layout.x_extent_blocks = - xe::align(level_width_blocks, xenos::kTextureTileWidthHeight); + xe::align(std::max(width_texels >> level, uint32_t(1)), + format_info->block_width) / + format_info->block_width; level_layout.y_extent_blocks = - xe::align(level_height_blocks, xenos::kTextureTileWidthHeight); + xe::align(std::max(height_texels >> level, uint32_t(1)), + format_info->block_height) / + format_info->block_height; + level_layout.z_extent = std::max(depth >> level, uint32_t(1)); + } + if (is_tiled) { uint32_t bytes_per_block_log2 = xe::log2_floor(bytes_per_block); if (dimension == xenos::DataDimension::k3D) { - level_layout.z_extent = - xe::align(level_depth, xenos::kTextureTileDepth); - // 32-block-row x 4 slice portions laid out sequentially (4-slice-major, - // 32-block-row-minor), address extent within a 32x32x4 tile depends on - // the pitch. Origins of 32x32x4 tiles grow monotonically, first along - // Z, then along Y, then along X. level_layout.array_slice_data_extent_bytes = GetTiledAddressUpperBound3D( level_layout.x_extent_blocks, level_layout.y_extent_blocks, level_layout.z_extent, row_pitch_blocks_tile_aligned, level_layout.y_extent_blocks, bytes_per_block_log2); } else { - level_layout.z_extent = 1; - // Origins of 32x32 tiles grow monotonically, first along Y, then along - // X. level_layout.array_slice_data_extent_bytes = GetTiledAddressUpperBound2D( level_layout.x_extent_blocks, level_layout.y_extent_blocks, row_pitch_blocks_tile_aligned, bytes_per_block_log2); } } else { - if (level == layout.packed_level) { - // Calculate the portion of the mip tail actually used by the needed - // mips. The actually used region may be significantly smaller than the - // full 32x32-texel-aligned tail. A 2x2 texture (for example, in - // 494707D4, there's a 2x2 k_8_8_8_8 linear texture with packed mips), - // for instance, would have its 2x2 base at (16, 0) and its 1x1 mip at - // (8, 0) - and we need 2 or 1 rows in these cases, not 32 - the 32 rows - // would span two 4 KB pages rather than one, taking the 256-byte pitch - // alignment in linear textures into account. - level_layout.x_extent_blocks = 0; - level_layout.y_extent_blocks = 0; - level_layout.z_extent = 0; - uint32_t packed_sublevel_last = is_base ? 0 : max_level; - for (uint32_t packed_sublevel = layout.packed_level; - packed_sublevel <= packed_sublevel_last; ++packed_sublevel) { - uint32_t packed_sublevel_x_blocks; - uint32_t packed_sublevel_y_blocks; - uint32_t packed_sublevel_z; - GetPackedMipOffset(width_texels, height_texels, depth, format, - packed_sublevel, packed_sublevel_x_blocks, - packed_sublevel_y_blocks, packed_sublevel_z); - level_layout.x_extent_blocks = std::max( - level_layout.x_extent_blocks, - packed_sublevel_x_blocks + - xe::align( - std::max(width_texels >> packed_sublevel, uint32_t(1)), - format_info->block_width) / - format_info->block_width); - level_layout.y_extent_blocks = std::max( - level_layout.y_extent_blocks, - packed_sublevel_y_blocks + - xe::align( - std::max(height_texels >> packed_sublevel, uint32_t(1)), - format_info->block_height) / - format_info->block_height); - level_layout.z_extent = - std::max(level_layout.z_extent, - packed_sublevel_z + - std::max(depth >> packed_sublevel, uint32_t(1))); - } - } else { - level_layout.x_extent_blocks = level_width_blocks; - level_layout.y_extent_blocks = level_height_blocks; - level_layout.z_extent = level_depth; - } level_layout.array_slice_data_extent_bytes = z_stride_bytes * (level_layout.z_extent - 1) + level_layout.row_pitch_bytes * (level_layout.y_extent_blocks - 1) + diff --git a/src/xenia/gpu/texture_util.h b/src/xenia/gpu/texture_util.h index bcc080de37..b52b4fa38b 100644 --- a/src/xenia/gpu/texture_util.h +++ b/src/xenia/gpu/texture_util.h @@ -144,24 +144,27 @@ struct TextureGuestLayout { // multiplied by the array slice count. uint32_t array_slice_stride_bytes; - // Estimated amount of memory this level occupies, and variables involved in - // its calculation. Not aligned to kTextureSubresourceAlignmentBytes. For - // tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1 - // depending on the dimension), but for the linear subresources, this may be - // significantly (including less 4 KB pages) smaller than the aligned size - // (like for 4E4D083E where aligning the height of a 1280x720 linear texture - // results in access violations). For the linear mip tail, this includes all - // the mip levels stored in it. If the width is bigger than the pitch, this - // will also be taken into account for the last row so all memory actually - // used by the texture will be loaded, and may be bigger than the distance - // between array slices or levels. The purpose of this parameter is to make - // the memory amount that needs to be resident as close to the real amount - // as possible, to make sure all the needed data will be read, but also, if - // possible, unneeded memory pages won't be accessed (since that may trigger - // an access violation on the CPU). + // The exclusive upper bound of blocks needed at this level (this level for + // non-packed levels, or all the packed levels for the packed mip tail). uint32_t x_extent_blocks; uint32_t y_extent_blocks; uint32_t z_extent; + // Estimated amount of memory this level occupies. Not aligned to + // kTextureSubresourceAlignmentBytes. For tiled textures, this will be + // calculated for the extent rounded to 32x32x4 blocks (or 32x32x1 depending + // on the dimensionality), but for linear textures, as well as for mips of + // non-power-of-two tiled textures, this may be significantly (including + // less 4 KB pages) smaller than the aligned size (like for 4E4D083E where + // aligning the height of a 1280x720 linear texture results in access + // violations). For the linear mip tail, this includes all the mip levels + // stored in it. If the width is bigger than the pitch, this will also be + // taken into account for the last row so all memory actually used by the + // texture will be loaded, and may be bigger than the distance between array + // slices or levels. The purpose of this parameter is to make the memory + // amount that needs to be resident as close to the real amount as possible, + // to make sure all the needed data will be read, but also, if possible, + // unneeded memory pages won't be accessed (since that may trigger an access + // violation on the CPU). uint32_t array_slice_data_extent_bytes; // Including all array slices. uint32_t level_data_extent_bytes; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index f1af57a23f..2969699590 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -581,6 +581,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( primitive_processing_result.host_primitive_reset_enabled; description_out.depth_clamp_enable = + device_features.depthClamp && regs.Get().clip_disable; // TODO(Triang3l): Tessellation. @@ -825,6 +826,10 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet( return false; } + if (!device_features.depthClamp && description.depth_clamp_enable) { + return false; + } + if (!device_features.fillModeNonSolid && description.polygon_mode != PipelinePolygonMode::kFill) { return false; diff --git a/src/xenia/ui/imgui_drawer.cc b/src/xenia/ui/imgui_drawer.cc index 2454947fd2..5f3075ca56 100644 --- a/src/xenia/ui/imgui_drawer.cc +++ b/src/xenia/ui/imgui_drawer.cc @@ -62,11 +62,11 @@ void ImGuiDrawer::AddDialog(ImGuiDialog* dialog) { dialogs_.cend()) { return; } - if (dialog_loop_next_index_ == SIZE_MAX && dialogs_.empty()) { - // First dialog added. dialog_loop_next_index_ == SIZE_MAX is also checked - // because in a situation of removing the only dialog, then adding a dialog, - // from within a dialog's Draw function, the removal would not cause the - // listener and the drawer to be removed (it's deferred in this case). + if (dialogs_.empty() && !IsDrawingDialogs()) { + // First dialog added. !IsDrawingDialogs() is also checked because in a + // situation of removing the only dialog, then adding a dialog, from within + // a dialog's Draw function, re-registering the ImGuiDrawer may result in + // ImGui being drawn multiple times in the current frame. window_->AddInputListener(this, z_order_); if (presenter_) { presenter_->AddUIDrawerFromUIThread(this, z_order_); @@ -81,7 +81,7 @@ void ImGuiDrawer::RemoveDialog(ImGuiDialog* dialog) { if (it == dialogs_.cend()) { return; } - if (dialog_loop_next_index_ != SIZE_MAX) { + if (IsDrawingDialogs()) { // Actualize the next dialog index after the erasure from the vector. size_t existing_index = size_t(std::distance(dialogs_.cbegin(), it)); if (dialog_loop_next_index_ > existing_index) { @@ -89,17 +89,7 @@ void ImGuiDrawer::RemoveDialog(ImGuiDialog* dialog) { } } dialogs_.erase(it); - if (dialog_loop_next_index_ == SIZE_MAX && dialogs_.empty()) { - if (presenter_) { - presenter_->RemoveUIDrawerFromUIThread(this); - } - window_->RemoveInputListener(this); - // Clear all input since no input will be received anymore, and when the - // drawer becomes active again, it'd have an outdated input state otherwise - // which will be persistent until new events actualize individual input - // properties. - ClearInput(); - } + DetachIfLastDialogRemoved(); } void ImGuiDrawer::Initialize() { @@ -301,7 +291,7 @@ void ImGuiDrawer::Draw(UIDrawContext& ui_draw_context) { ImGui::NewFrame(); - assert_true(dialog_loop_next_index_ == SIZE_MAX); + assert_true(!IsDrawingDialogs()); dialog_loop_next_index_ = 0; while (dialog_loop_next_index_ < dialogs_.size()) { dialogs_[dialog_loop_next_index_++]->Draw(); @@ -319,11 +309,11 @@ void ImGuiDrawer::Draw(UIDrawContext& ui_draw_context) { io.MousePos = ImVec2(-FLT_MAX, -FLT_MAX); } - if (dialogs_.empty()) { - // All dialogs have removed themselves during the draw, detach. - presenter_->RemoveUIDrawerFromUIThread(this); - window_->RemoveInputListener(this); - } else { + // Detaching is deferred if the last dialog is removed during drawing, perform + // it now if needed. + DetachIfLastDialogRemoved(); + + if (!dialogs_.empty()) { // Repaint (and handle input) continuously if still active. presenter_->RequestUIPaintFromUIThread(); } @@ -557,5 +547,24 @@ void ImGuiDrawer::SwitchToPhysicalMouseAndUpdateMousePosition( UpdateMousePosition(float(e.x()), float(e.y())); } +void ImGuiDrawer::DetachIfLastDialogRemoved() { + // IsDrawingDialogs() is also checked because in a situation of removing the + // only dialog, then adding a dialog, from within a dialog's Draw function, + // re-registering the ImGuiDrawer may result in ImGui being drawn multiple + // times in the current frame. + if (!dialogs_.empty() || IsDrawingDialogs()) { + return; + } + if (presenter_) { + presenter_->RemoveUIDrawerFromUIThread(this); + } + window_->RemoveInputListener(this); + // Clear all input since no input will be received anymore, and when the + // drawer becomes active again, it'd have an outdated input state otherwise + // which will be persistent until new events actualize individual input + // properties. + ClearInput(); +} + } // namespace ui } // namespace xe diff --git a/src/xenia/ui/imgui_drawer.h b/src/xenia/ui/imgui_drawer.h index 5563a11519..2a114fce48 100644 --- a/src/xenia/ui/imgui_drawer.h +++ b/src/xenia/ui/imgui_drawer.h @@ -74,6 +74,9 @@ class ImGuiDrawer : public WindowInputListener, public UIDrawer { void UpdateMousePosition(float x, float y); void SwitchToPhysicalMouseAndUpdateMousePosition(const MouseEvent& e); + bool IsDrawingDialogs() const { return dialog_loop_next_index_ != SIZE_MAX; } + void DetachIfLastDialogRemoved(); + Window* window_; size_t z_order_;