D3D12 root element cleanup (#9432)

This commit is contained in:
Teodor Tanasoaia
2026-04-19 21:57:21 +02:00
committed by GitHub
parent ff438f3cad
commit 5eeb484758
4 changed files with 105 additions and 102 deletions

View File

@@ -744,23 +744,23 @@ impl super::Adapter {
// Source: https://learn.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits#memory-limits-and-costs
//
// Per pipeline layout:
// - RootElement::Constant, (immediates) 32 root constants
// - RootElement::Immediates, 32 root constants
// (bounded by maxImmediateSize) = 32 x 4 bytes = 128 bytes
// - RootElement::SamplerHeap, a root table = 4 bytes
// - RootElement::SamplerHeapDescriptorTable, a descriptor table = 4 bytes
// - RootElement::SpecialConstantBuffer, 3 root constants = 3 x 4 bytes = 12 bytes
// - RootElement::DynamicOffsetsBuffer, a root constant per dynamic storage buffer
// - RootElement::DynamicStorageBufferOffsets, a root constant per dynamic storage buffer
// (bounded by maxDynamicStorageBuffersPerPipelineLayout) = 4 x 4 bytes = 16 bytes
// - RootElement::DynamicUniformBuffer, a root descriptor per dynamic uniform buffer
// (bounded by maxDynamicUniformBuffersPerPipelineLayout) = 8 x 8 bytes = 64 bytes
// Per bind group:
// - RootElement::Table, a root table
// - RootElement::DescriptorTable, a descriptor table
// (bounded by maxBindGroups) = 8 x 4 bytes = 32 bytes
//
// Source: logic in `create_pipeline_layout`
//
// Total: 128 + 4 + 12 + 16 + 64 + 32 = 256 bytes
//
let max_immediate_size = 128;
let max_immediate_size = super::MAX_IMMEDIATE_SIZE;
let max_bind_groups = 8;
let max_dynamic_uniform_buffers_per_pipeline_layout = 8;
let max_dynamic_storage_buffers_per_pipeline_layout = 4;

View File

@@ -139,28 +139,26 @@ impl super::CommandEncoder {
.as_ref()
.map(|sc| sc.root_index)
{
let special_constants = super::SpecialConstants::from_indirect_draw_call_params(
first_vertex,
first_instance,
);
let needs_update = match self.pass.root_elements[root_index as usize] {
super::RootElement::SpecialConstantBuffer {
first_vertex: other_vertex,
first_instance: other_instance,
other: _,
} => first_vertex != other_vertex || first_instance != other_instance,
super::RootElement::SpecialConstants(old_special_constants) => {
old_special_constants != special_constants
}
_ => true,
};
if needs_update {
self.pass.dirty_root_elements |= 1 << root_index;
self.pass.root_elements[root_index as usize] =
super::RootElement::SpecialConstantBuffer {
first_vertex,
first_instance,
other: 0,
};
super::RootElement::SpecialConstants(special_constants);
}
}
self.update_root_elements();
}
fn prepare_dispatch(&mut self, count: [u32; 3]) {
fn prepare_dispatch(&mut self, workgroup_count: [u32; 3]) {
if let Some(root_index) = self
.pass
.layout
@@ -168,22 +166,18 @@ impl super::CommandEncoder {
.as_ref()
.map(|sc| sc.root_index)
{
let special_constants =
super::SpecialConstants::from_compute_dispatch_params(workgroup_count);
let needs_update = match self.pass.root_elements[root_index as usize] {
super::RootElement::SpecialConstantBuffer {
first_vertex,
first_instance,
other,
} => [first_vertex as u32, first_instance, other] != count,
super::RootElement::SpecialConstants(old_special_constants) => {
old_special_constants != special_constants
}
_ => true,
};
if needs_update {
self.pass.dirty_root_elements |= 1 << root_index;
self.pass.root_elements[root_index as usize] =
super::RootElement::SpecialConstantBuffer {
first_vertex: count[0] as i32,
first_instance: count[1],
other: count[2],
};
super::RootElement::SpecialConstants(special_constants);
}
}
self.update_root_elements();
@@ -200,12 +194,14 @@ impl super::CommandEncoder {
self.pass.dirty_root_elements ^= 1 << index;
match self.pass.root_elements[index as usize] {
super::RootElement::Empty => log::error!("Root index {index} is not bound"),
super::RootElement::Constant => {
let info = self.pass.layout.root_constant_info.as_ref().unwrap();
super::RootElement::Empty => unreachable!(
"Empty root element at index {index} should not have been marked as dirty"
),
super::RootElement::Immediates => {
let info = self.pass.layout.immediates_info.as_ref().unwrap();
for offset in info.range.clone() {
let val = self.pass.constant_data[offset as usize];
for offset in 0..info.size {
let val = self.pass.immediates[offset as usize];
match self.pass.kind {
Pk::Render => unsafe {
list.SetGraphicsRoot32BitConstant(index, val, offset)
@@ -217,23 +213,27 @@ impl super::CommandEncoder {
}
}
}
super::RootElement::SpecialConstantBuffer {
first_vertex,
first_instance,
other,
} => match self.pass.kind {
super::RootElement::SpecialConstants(super::SpecialConstants {
first_vertex_or_x,
first_instance_or_y,
unused_or_z,
}) => match self.pass.kind {
Pk::Render => {
unsafe { list.SetGraphicsRoot32BitConstant(index, first_vertex as u32, 0) };
unsafe { list.SetGraphicsRoot32BitConstant(index, first_instance, 1) };
unsafe {
list.SetGraphicsRoot32BitConstant(index, first_vertex_or_x as u32, 0)
};
unsafe { list.SetGraphicsRoot32BitConstant(index, first_instance_or_y, 1) };
}
Pk::Compute => {
unsafe { list.SetComputeRoot32BitConstant(index, first_vertex as u32, 0) };
unsafe { list.SetComputeRoot32BitConstant(index, first_instance, 1) };
unsafe { list.SetComputeRoot32BitConstant(index, other, 2) };
unsafe {
list.SetComputeRoot32BitConstant(index, first_vertex_or_x as u32, 0)
};
unsafe { list.SetComputeRoot32BitConstant(index, first_instance_or_y, 1) };
unsafe { list.SetComputeRoot32BitConstant(index, unused_or_z, 2) };
}
Pk::Transfer => (),
},
super::RootElement::Table(descriptor) => match self.pass.kind {
super::RootElement::DescriptorTable(descriptor) => match self.pass.kind {
Pk::Render => unsafe { list.SetGraphicsRootDescriptorTable(index, descriptor) },
Pk::Compute => unsafe { list.SetComputeRootDescriptorTable(index, descriptor) },
Pk::Transfer => (),
@@ -250,7 +250,7 @@ impl super::CommandEncoder {
Pk::Transfer => (),
}
}
super::RootElement::DynamicOffsetsBuffer { start, end } => {
super::RootElement::DynamicStorageBufferOffsets { start, end } => {
let values = &self.pass.dynamic_storage_buffer_offsets[start..end];
for (offset, &value) in values.iter().enumerate() {
@@ -265,7 +265,7 @@ impl super::CommandEncoder {
}
}
}
super::RootElement::SamplerHeap => match self.pass.kind {
super::RootElement::SamplerHeapDescriptorTable => match self.pass.kind {
Pk::Render => unsafe {
list.SetGraphicsRootDescriptorTable(
index,
@@ -287,14 +287,11 @@ impl super::CommandEncoder {
fn reset_signature(&mut self, layout: &super::PipelineLayoutShared) {
if let Some(root_index) = layout.special_constants.as_ref().map(|sc| sc.root_index) {
self.pass.root_elements[root_index as usize] =
super::RootElement::SpecialConstantBuffer {
first_vertex: 0,
first_instance: 0,
other: 0,
};
super::RootElement::SpecialConstants(super::SpecialConstants::default());
}
if let Some(root_index) = layout.sampler_heap_root_index {
self.pass.root_elements[root_index as usize] = super::RootElement::SamplerHeap;
self.pass.root_elements[root_index as usize] =
super::RootElement::SamplerHeapDescriptorTable;
}
self.pass.layout = layout.clone();
self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1;
@@ -1133,7 +1130,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
// Bind CBV/SRC/UAV descriptor tables
if info.tables.contains(super::TableTypes::SRV_CBV_UAV) {
self.pass.root_elements[root_index] =
super::RootElement::Table(group.handle_views.unwrap().gpu);
super::RootElement::DescriptorTable(group.handle_views.unwrap().gpu);
root_index += 1;
}
@@ -1151,7 +1148,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
offsets_index += range.start;
self.pass.root_elements[root_index as usize] =
super::RootElement::DynamicOffsetsBuffer {
super::RootElement::DynamicStorageBufferOffsets {
start: range.start,
end: range.end,
};
@@ -1199,11 +1196,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
) {
let offset_words = offset_bytes as usize / 4;
let info = layout.shared.root_constant_info.as_ref().unwrap();
let info = layout.shared.immediates_info.as_ref().unwrap();
self.pass.root_elements[info.root_index as usize] = super::RootElement::Constant;
self.pass.root_elements[info.root_index as usize] = super::RootElement::Immediates;
self.pass.constant_data[offset_words..(offset_words + data.len())].copy_from_slice(data);
self.pass.immediates[offset_words..(offset_words + data.len())].copy_from_slice(data);
if self.pass.layout.signature == layout.shared.signature {
self.pass.dirty_root_elements |= 1 << info.root_index;

View File

@@ -936,7 +936,7 @@ impl crate::Device for super::Device {
let mut bind_uav = hlsl::BindTarget::default();
let mut parameters = Vec::new();
let mut immediates_target = None;
let mut root_constant_info = None;
let mut immediates_info = None;
if desc.immediate_size != 0 {
let parameter_index = parameters.len();
@@ -954,9 +954,9 @@ impl crate::Device for super::Device {
});
let binding = bind_cbv;
bind_cbv.register += 1;
root_constant_info = Some(super::RootConstantInfo {
immediates_info = Some(super::ImmediatesInfo {
root_index: parameter_index as u32,
range: 0..size,
size,
});
immediates_target = Some(binding);
@@ -1400,23 +1400,7 @@ impl crate::Device for super::Device {
},
},
};
let special_constant_buffer_args_len = {
// Hack: construct a dummy value of the special constants buffer value we need to
// fill, and calculate the size of each member.
let super::RootElement::SpecialConstantBuffer {
first_vertex,
first_instance,
other,
} = (super::RootElement::SpecialConstantBuffer {
first_vertex: 0,
first_instance: 0,
other: 0,
})
else {
unreachable!();
};
size_of_val(&first_vertex) + size_of_val(&first_instance) + size_of_val(&other)
};
let special_constant_buffer_args_len = size_of::<super::SpecialConstants>();
let draw_mesh = if self
.features
@@ -1505,7 +1489,7 @@ impl crate::Device for super::Device {
signature: Some(raw),
total_root_elements: parameters.len() as super::RootIndex,
special_constants,
root_constant_info,
immediates_info,
sampler_heap_root_index,
},
bind_group_infos,

View File

@@ -509,6 +509,10 @@ crate::impl_dyn_resource!(
// Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries.
const MAX_ROOT_ELEMENTS: usize = 64;
/// See comment in [`Adapter::expose`].
/// You must change the math in the comment before you update this value.
const MAX_IMMEDIATE_SIZE: u32 = 128;
const MAX_IMMEDIATES: usize = MAX_IMMEDIATE_SIZE as usize / 4;
const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10;
pub struct Instance {
@@ -821,31 +825,51 @@ struct PassResolve {
format: Dxgi::Common::DXGI_FORMAT,
}
#[derive(Clone, Copy, Debug, Default, PartialEq)]
struct SpecialConstants {
/// The first vertex in an indirect draw call, _or_ the `x` of a compute dispatch.
first_vertex_or_x: i32,
/// The first instance in an indirect draw call, _or_ the `y` of a compute dispatch.
first_instance_or_y: u32,
/// Unused in an indirect draw call, _or_ the `z` of a compute dispatch.
unused_or_z: u32,
}
impl SpecialConstants {
fn from_indirect_draw_call_params(first_vertex: i32, first_instance: u32) -> Self {
Self {
first_vertex_or_x: first_vertex,
first_instance_or_y: first_instance,
unused_or_z: 0,
}
}
fn from_compute_dispatch_params(workgroup_count: [u32; 3]) -> Self {
Self {
first_vertex_or_x: workgroup_count[0] as i32,
first_instance_or_y: workgroup_count[1],
unused_or_z: workgroup_count[2],
}
}
}
#[derive(Clone, Copy, Debug)]
enum RootElement {
Empty,
Constant,
SpecialConstantBuffer {
/// The first vertex in an indirect draw call, _or_ the `x` of a compute dispatch.
first_vertex: i32,
/// The first instance in an indirect draw call, _or_ the `y` of a compute dispatch.
first_instance: u32,
/// Unused in an indirect draw call, _or_ the `z` of a compute dispatch.
other: u32,
},
/// Descriptor table.
Table(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE),
/// Descriptor for an uniform buffer that has dynamic offset.
Immediates,
SpecialConstants(SpecialConstants),
DescriptorTable(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE),
/// Descriptor table referring to the entire sampler heap.
SamplerHeapDescriptorTable,
/// Root descriptor for a uniform buffer binding that has a dynamic offset.
DynamicUniformBuffer {
address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE,
},
/// Descriptor table referring to the entire sampler heap.
SamplerHeap,
/// Root constants for dynamic offsets.
/// Root constants for storage buffer bindings with dynamic offsets.
///
/// start..end is the range of values in [`PassState::dynamic_storage_buffer_offsets`]
/// that will be used to update the root constants.
DynamicOffsetsBuffer {
DynamicStorageBufferOffsets {
start: usize,
end: usize,
},
@@ -863,7 +887,7 @@ struct PassState {
resolves: ArrayVec<PassResolve, { crate::MAX_COLOR_ATTACHMENTS }>,
layout: PipelineLayoutShared,
root_elements: [RootElement; MAX_ROOT_ELEMENTS],
constant_data: [u32; MAX_ROOT_ELEMENTS],
immediates: [u32; MAX_IMMEDIATES],
dynamic_storage_buffer_offsets: Vec<u32>,
dirty_root_elements: u64,
vertex_buffers: [Direct3D12::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS],
@@ -871,10 +895,8 @@ struct PassState {
kind: PassKind,
}
#[test]
fn test_dirty_mask() {
assert_eq!(MAX_ROOT_ELEMENTS, u64::BITS as usize);
}
// `root_elements` size must match `dirty_root_elements` bit size
const _: () = assert!(MAX_ROOT_ELEMENTS == u64::BITS as usize);
impl PassState {
fn new() -> Self {
@@ -885,11 +907,11 @@ impl PassState {
signature: None,
total_root_elements: 0,
special_constants: None,
root_constant_info: None,
immediates_info: None,
sampler_heap_root_index: None,
},
root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS],
constant_data: [0; MAX_ROOT_ELEMENTS],
immediates: [0; MAX_IMMEDIATES],
dynamic_storage_buffer_offsets: Vec::new(),
dirty_root_elements: 0,
vertex_buffers: [Default::default(); crate::MAX_VERTEX_BUFFERS],
@@ -1148,9 +1170,9 @@ struct BindGroupInfo {
}
#[derive(Debug, Clone)]
struct RootConstantInfo {
struct ImmediatesInfo {
root_index: RootIndex,
range: core::ops::Range<u32>,
size: u32,
}
#[derive(Debug, Clone)]
@@ -1164,7 +1186,7 @@ struct PipelineLayoutShared {
signature: Option<Direct3D12::ID3D12RootSignature>,
total_root_elements: RootIndex,
special_constants: Option<PipelineLayoutSpecialConstants>,
root_constant_info: Option<RootConstantInfo>,
immediates_info: Option<ImmediatesInfo>,
sampler_heap_root_index: Option<RootIndex>,
}