mirror of
https://github.com/gfx-rs/wgpu.git
synced 2026-06-01 09:09:48 +09:00
D3D12 root element cleanup (#9432)
This commit is contained in:
@@ -744,23 +744,23 @@ impl super::Adapter {
|
||||
// Source: https://learn.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits#memory-limits-and-costs
|
||||
//
|
||||
// Per pipeline layout:
|
||||
// - RootElement::Constant, (immediates) 32 root constants
|
||||
// - RootElement::Immediates, 32 root constants
|
||||
// (bounded by maxImmediateSize) = 32 x 4 bytes = 128 bytes
|
||||
// - RootElement::SamplerHeap, a root table = 4 bytes
|
||||
// - RootElement::SamplerHeapDescriptorTable, a descriptor table = 4 bytes
|
||||
// - RootElement::SpecialConstantBuffer, 3 root constants = 3 x 4 bytes = 12 bytes
|
||||
// - RootElement::DynamicOffsetsBuffer, a root constant per dynamic storage buffer
|
||||
// - RootElement::DynamicStorageBufferOffsets, a root constant per dynamic storage buffer
|
||||
// (bounded by maxDynamicStorageBuffersPerPipelineLayout) = 4 x 4 bytes = 16 bytes
|
||||
// - RootElement::DynamicUniformBuffer, a root descriptor per dynamic uniform buffer
|
||||
// (bounded by maxDynamicUniformBuffersPerPipelineLayout) = 8 x 8 bytes = 64 bytes
|
||||
// Per bind group:
|
||||
// - RootElement::Table, a root table
|
||||
// - RootElement::DescriptorTable, a descriptor table
|
||||
// (bounded by maxBindGroups) = 8 x 4 bytes = 32 bytes
|
||||
//
|
||||
// Source: logic in `create_pipeline_layout`
|
||||
//
|
||||
// Total: 128 + 4 + 12 + 16 + 64 + 32 = 256 bytes
|
||||
//
|
||||
let max_immediate_size = 128;
|
||||
let max_immediate_size = super::MAX_IMMEDIATE_SIZE;
|
||||
let max_bind_groups = 8;
|
||||
let max_dynamic_uniform_buffers_per_pipeline_layout = 8;
|
||||
let max_dynamic_storage_buffers_per_pipeline_layout = 4;
|
||||
|
||||
@@ -139,28 +139,26 @@ impl super::CommandEncoder {
|
||||
.as_ref()
|
||||
.map(|sc| sc.root_index)
|
||||
{
|
||||
let special_constants = super::SpecialConstants::from_indirect_draw_call_params(
|
||||
first_vertex,
|
||||
first_instance,
|
||||
);
|
||||
let needs_update = match self.pass.root_elements[root_index as usize] {
|
||||
super::RootElement::SpecialConstantBuffer {
|
||||
first_vertex: other_vertex,
|
||||
first_instance: other_instance,
|
||||
other: _,
|
||||
} => first_vertex != other_vertex || first_instance != other_instance,
|
||||
super::RootElement::SpecialConstants(old_special_constants) => {
|
||||
old_special_constants != special_constants
|
||||
}
|
||||
_ => true,
|
||||
};
|
||||
if needs_update {
|
||||
self.pass.dirty_root_elements |= 1 << root_index;
|
||||
self.pass.root_elements[root_index as usize] =
|
||||
super::RootElement::SpecialConstantBuffer {
|
||||
first_vertex,
|
||||
first_instance,
|
||||
other: 0,
|
||||
};
|
||||
super::RootElement::SpecialConstants(special_constants);
|
||||
}
|
||||
}
|
||||
self.update_root_elements();
|
||||
}
|
||||
|
||||
fn prepare_dispatch(&mut self, count: [u32; 3]) {
|
||||
fn prepare_dispatch(&mut self, workgroup_count: [u32; 3]) {
|
||||
if let Some(root_index) = self
|
||||
.pass
|
||||
.layout
|
||||
@@ -168,22 +166,18 @@ impl super::CommandEncoder {
|
||||
.as_ref()
|
||||
.map(|sc| sc.root_index)
|
||||
{
|
||||
let special_constants =
|
||||
super::SpecialConstants::from_compute_dispatch_params(workgroup_count);
|
||||
let needs_update = match self.pass.root_elements[root_index as usize] {
|
||||
super::RootElement::SpecialConstantBuffer {
|
||||
first_vertex,
|
||||
first_instance,
|
||||
other,
|
||||
} => [first_vertex as u32, first_instance, other] != count,
|
||||
super::RootElement::SpecialConstants(old_special_constants) => {
|
||||
old_special_constants != special_constants
|
||||
}
|
||||
_ => true,
|
||||
};
|
||||
if needs_update {
|
||||
self.pass.dirty_root_elements |= 1 << root_index;
|
||||
self.pass.root_elements[root_index as usize] =
|
||||
super::RootElement::SpecialConstantBuffer {
|
||||
first_vertex: count[0] as i32,
|
||||
first_instance: count[1],
|
||||
other: count[2],
|
||||
};
|
||||
super::RootElement::SpecialConstants(special_constants);
|
||||
}
|
||||
}
|
||||
self.update_root_elements();
|
||||
@@ -200,12 +194,14 @@ impl super::CommandEncoder {
|
||||
self.pass.dirty_root_elements ^= 1 << index;
|
||||
|
||||
match self.pass.root_elements[index as usize] {
|
||||
super::RootElement::Empty => log::error!("Root index {index} is not bound"),
|
||||
super::RootElement::Constant => {
|
||||
let info = self.pass.layout.root_constant_info.as_ref().unwrap();
|
||||
super::RootElement::Empty => unreachable!(
|
||||
"Empty root element at index {index} should not have been marked as dirty"
|
||||
),
|
||||
super::RootElement::Immediates => {
|
||||
let info = self.pass.layout.immediates_info.as_ref().unwrap();
|
||||
|
||||
for offset in info.range.clone() {
|
||||
let val = self.pass.constant_data[offset as usize];
|
||||
for offset in 0..info.size {
|
||||
let val = self.pass.immediates[offset as usize];
|
||||
match self.pass.kind {
|
||||
Pk::Render => unsafe {
|
||||
list.SetGraphicsRoot32BitConstant(index, val, offset)
|
||||
@@ -217,23 +213,27 @@ impl super::CommandEncoder {
|
||||
}
|
||||
}
|
||||
}
|
||||
super::RootElement::SpecialConstantBuffer {
|
||||
first_vertex,
|
||||
first_instance,
|
||||
other,
|
||||
} => match self.pass.kind {
|
||||
super::RootElement::SpecialConstants(super::SpecialConstants {
|
||||
first_vertex_or_x,
|
||||
first_instance_or_y,
|
||||
unused_or_z,
|
||||
}) => match self.pass.kind {
|
||||
Pk::Render => {
|
||||
unsafe { list.SetGraphicsRoot32BitConstant(index, first_vertex as u32, 0) };
|
||||
unsafe { list.SetGraphicsRoot32BitConstant(index, first_instance, 1) };
|
||||
unsafe {
|
||||
list.SetGraphicsRoot32BitConstant(index, first_vertex_or_x as u32, 0)
|
||||
};
|
||||
unsafe { list.SetGraphicsRoot32BitConstant(index, first_instance_or_y, 1) };
|
||||
}
|
||||
Pk::Compute => {
|
||||
unsafe { list.SetComputeRoot32BitConstant(index, first_vertex as u32, 0) };
|
||||
unsafe { list.SetComputeRoot32BitConstant(index, first_instance, 1) };
|
||||
unsafe { list.SetComputeRoot32BitConstant(index, other, 2) };
|
||||
unsafe {
|
||||
list.SetComputeRoot32BitConstant(index, first_vertex_or_x as u32, 0)
|
||||
};
|
||||
unsafe { list.SetComputeRoot32BitConstant(index, first_instance_or_y, 1) };
|
||||
unsafe { list.SetComputeRoot32BitConstant(index, unused_or_z, 2) };
|
||||
}
|
||||
Pk::Transfer => (),
|
||||
},
|
||||
super::RootElement::Table(descriptor) => match self.pass.kind {
|
||||
super::RootElement::DescriptorTable(descriptor) => match self.pass.kind {
|
||||
Pk::Render => unsafe { list.SetGraphicsRootDescriptorTable(index, descriptor) },
|
||||
Pk::Compute => unsafe { list.SetComputeRootDescriptorTable(index, descriptor) },
|
||||
Pk::Transfer => (),
|
||||
@@ -250,7 +250,7 @@ impl super::CommandEncoder {
|
||||
Pk::Transfer => (),
|
||||
}
|
||||
}
|
||||
super::RootElement::DynamicOffsetsBuffer { start, end } => {
|
||||
super::RootElement::DynamicStorageBufferOffsets { start, end } => {
|
||||
let values = &self.pass.dynamic_storage_buffer_offsets[start..end];
|
||||
|
||||
for (offset, &value) in values.iter().enumerate() {
|
||||
@@ -265,7 +265,7 @@ impl super::CommandEncoder {
|
||||
}
|
||||
}
|
||||
}
|
||||
super::RootElement::SamplerHeap => match self.pass.kind {
|
||||
super::RootElement::SamplerHeapDescriptorTable => match self.pass.kind {
|
||||
Pk::Render => unsafe {
|
||||
list.SetGraphicsRootDescriptorTable(
|
||||
index,
|
||||
@@ -287,14 +287,11 @@ impl super::CommandEncoder {
|
||||
fn reset_signature(&mut self, layout: &super::PipelineLayoutShared) {
|
||||
if let Some(root_index) = layout.special_constants.as_ref().map(|sc| sc.root_index) {
|
||||
self.pass.root_elements[root_index as usize] =
|
||||
super::RootElement::SpecialConstantBuffer {
|
||||
first_vertex: 0,
|
||||
first_instance: 0,
|
||||
other: 0,
|
||||
};
|
||||
super::RootElement::SpecialConstants(super::SpecialConstants::default());
|
||||
}
|
||||
if let Some(root_index) = layout.sampler_heap_root_index {
|
||||
self.pass.root_elements[root_index as usize] = super::RootElement::SamplerHeap;
|
||||
self.pass.root_elements[root_index as usize] =
|
||||
super::RootElement::SamplerHeapDescriptorTable;
|
||||
}
|
||||
self.pass.layout = layout.clone();
|
||||
self.pass.dirty_root_elements = (1 << layout.total_root_elements) - 1;
|
||||
@@ -1133,7 +1130,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
|
||||
// Bind CBV/SRC/UAV descriptor tables
|
||||
if info.tables.contains(super::TableTypes::SRV_CBV_UAV) {
|
||||
self.pass.root_elements[root_index] =
|
||||
super::RootElement::Table(group.handle_views.unwrap().gpu);
|
||||
super::RootElement::DescriptorTable(group.handle_views.unwrap().gpu);
|
||||
root_index += 1;
|
||||
}
|
||||
|
||||
@@ -1151,7 +1148,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
|
||||
offsets_index += range.start;
|
||||
|
||||
self.pass.root_elements[root_index as usize] =
|
||||
super::RootElement::DynamicOffsetsBuffer {
|
||||
super::RootElement::DynamicStorageBufferOffsets {
|
||||
start: range.start,
|
||||
end: range.end,
|
||||
};
|
||||
@@ -1199,11 +1196,11 @@ impl crate::CommandEncoder for super::CommandEncoder {
|
||||
) {
|
||||
let offset_words = offset_bytes as usize / 4;
|
||||
|
||||
let info = layout.shared.root_constant_info.as_ref().unwrap();
|
||||
let info = layout.shared.immediates_info.as_ref().unwrap();
|
||||
|
||||
self.pass.root_elements[info.root_index as usize] = super::RootElement::Constant;
|
||||
self.pass.root_elements[info.root_index as usize] = super::RootElement::Immediates;
|
||||
|
||||
self.pass.constant_data[offset_words..(offset_words + data.len())].copy_from_slice(data);
|
||||
self.pass.immediates[offset_words..(offset_words + data.len())].copy_from_slice(data);
|
||||
|
||||
if self.pass.layout.signature == layout.shared.signature {
|
||||
self.pass.dirty_root_elements |= 1 << info.root_index;
|
||||
|
||||
@@ -936,7 +936,7 @@ impl crate::Device for super::Device {
|
||||
let mut bind_uav = hlsl::BindTarget::default();
|
||||
let mut parameters = Vec::new();
|
||||
let mut immediates_target = None;
|
||||
let mut root_constant_info = None;
|
||||
let mut immediates_info = None;
|
||||
|
||||
if desc.immediate_size != 0 {
|
||||
let parameter_index = parameters.len();
|
||||
@@ -954,9 +954,9 @@ impl crate::Device for super::Device {
|
||||
});
|
||||
let binding = bind_cbv;
|
||||
bind_cbv.register += 1;
|
||||
root_constant_info = Some(super::RootConstantInfo {
|
||||
immediates_info = Some(super::ImmediatesInfo {
|
||||
root_index: parameter_index as u32,
|
||||
range: 0..size,
|
||||
size,
|
||||
});
|
||||
immediates_target = Some(binding);
|
||||
|
||||
@@ -1400,23 +1400,7 @@ impl crate::Device for super::Device {
|
||||
},
|
||||
},
|
||||
};
|
||||
let special_constant_buffer_args_len = {
|
||||
// Hack: construct a dummy value of the special constants buffer value we need to
|
||||
// fill, and calculate the size of each member.
|
||||
let super::RootElement::SpecialConstantBuffer {
|
||||
first_vertex,
|
||||
first_instance,
|
||||
other,
|
||||
} = (super::RootElement::SpecialConstantBuffer {
|
||||
first_vertex: 0,
|
||||
first_instance: 0,
|
||||
other: 0,
|
||||
})
|
||||
else {
|
||||
unreachable!();
|
||||
};
|
||||
size_of_val(&first_vertex) + size_of_val(&first_instance) + size_of_val(&other)
|
||||
};
|
||||
let special_constant_buffer_args_len = size_of::<super::SpecialConstants>();
|
||||
|
||||
let draw_mesh = if self
|
||||
.features
|
||||
@@ -1505,7 +1489,7 @@ impl crate::Device for super::Device {
|
||||
signature: Some(raw),
|
||||
total_root_elements: parameters.len() as super::RootIndex,
|
||||
special_constants,
|
||||
root_constant_info,
|
||||
immediates_info,
|
||||
sampler_heap_root_index,
|
||||
},
|
||||
bind_group_infos,
|
||||
|
||||
@@ -509,6 +509,10 @@ crate::impl_dyn_resource!(
|
||||
|
||||
// Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries.
|
||||
const MAX_ROOT_ELEMENTS: usize = 64;
|
||||
/// See comment in [`Adapter::expose`].
|
||||
/// You must change the math in the comment before you update this value.
|
||||
const MAX_IMMEDIATE_SIZE: u32 = 128;
|
||||
const MAX_IMMEDIATES: usize = MAX_IMMEDIATE_SIZE as usize / 4;
|
||||
const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10;
|
||||
|
||||
pub struct Instance {
|
||||
@@ -821,31 +825,51 @@ struct PassResolve {
|
||||
format: Dxgi::Common::DXGI_FORMAT,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq)]
|
||||
struct SpecialConstants {
|
||||
/// The first vertex in an indirect draw call, _or_ the `x` of a compute dispatch.
|
||||
first_vertex_or_x: i32,
|
||||
/// The first instance in an indirect draw call, _or_ the `y` of a compute dispatch.
|
||||
first_instance_or_y: u32,
|
||||
/// Unused in an indirect draw call, _or_ the `z` of a compute dispatch.
|
||||
unused_or_z: u32,
|
||||
}
|
||||
|
||||
impl SpecialConstants {
|
||||
fn from_indirect_draw_call_params(first_vertex: i32, first_instance: u32) -> Self {
|
||||
Self {
|
||||
first_vertex_or_x: first_vertex,
|
||||
first_instance_or_y: first_instance,
|
||||
unused_or_z: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_compute_dispatch_params(workgroup_count: [u32; 3]) -> Self {
|
||||
Self {
|
||||
first_vertex_or_x: workgroup_count[0] as i32,
|
||||
first_instance_or_y: workgroup_count[1],
|
||||
unused_or_z: workgroup_count[2],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
enum RootElement {
|
||||
Empty,
|
||||
Constant,
|
||||
SpecialConstantBuffer {
|
||||
/// The first vertex in an indirect draw call, _or_ the `x` of a compute dispatch.
|
||||
first_vertex: i32,
|
||||
/// The first instance in an indirect draw call, _or_ the `y` of a compute dispatch.
|
||||
first_instance: u32,
|
||||
/// Unused in an indirect draw call, _or_ the `z` of a compute dispatch.
|
||||
other: u32,
|
||||
},
|
||||
/// Descriptor table.
|
||||
Table(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE),
|
||||
/// Descriptor for an uniform buffer that has dynamic offset.
|
||||
Immediates,
|
||||
SpecialConstants(SpecialConstants),
|
||||
DescriptorTable(Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE),
|
||||
/// Descriptor table referring to the entire sampler heap.
|
||||
SamplerHeapDescriptorTable,
|
||||
/// Root descriptor for a uniform buffer binding that has a dynamic offset.
|
||||
DynamicUniformBuffer {
|
||||
address: Direct3D12::D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||
},
|
||||
/// Descriptor table referring to the entire sampler heap.
|
||||
SamplerHeap,
|
||||
/// Root constants for dynamic offsets.
|
||||
/// Root constants for storage buffer bindings with dynamic offsets.
|
||||
///
|
||||
/// start..end is the range of values in [`PassState::dynamic_storage_buffer_offsets`]
|
||||
/// that will be used to update the root constants.
|
||||
DynamicOffsetsBuffer {
|
||||
DynamicStorageBufferOffsets {
|
||||
start: usize,
|
||||
end: usize,
|
||||
},
|
||||
@@ -863,7 +887,7 @@ struct PassState {
|
||||
resolves: ArrayVec<PassResolve, { crate::MAX_COLOR_ATTACHMENTS }>,
|
||||
layout: PipelineLayoutShared,
|
||||
root_elements: [RootElement; MAX_ROOT_ELEMENTS],
|
||||
constant_data: [u32; MAX_ROOT_ELEMENTS],
|
||||
immediates: [u32; MAX_IMMEDIATES],
|
||||
dynamic_storage_buffer_offsets: Vec<u32>,
|
||||
dirty_root_elements: u64,
|
||||
vertex_buffers: [Direct3D12::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS],
|
||||
@@ -871,10 +895,8 @@ struct PassState {
|
||||
kind: PassKind,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dirty_mask() {
|
||||
assert_eq!(MAX_ROOT_ELEMENTS, u64::BITS as usize);
|
||||
}
|
||||
// `root_elements` size must match `dirty_root_elements` bit size
|
||||
const _: () = assert!(MAX_ROOT_ELEMENTS == u64::BITS as usize);
|
||||
|
||||
impl PassState {
|
||||
fn new() -> Self {
|
||||
@@ -885,11 +907,11 @@ impl PassState {
|
||||
signature: None,
|
||||
total_root_elements: 0,
|
||||
special_constants: None,
|
||||
root_constant_info: None,
|
||||
immediates_info: None,
|
||||
sampler_heap_root_index: None,
|
||||
},
|
||||
root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS],
|
||||
constant_data: [0; MAX_ROOT_ELEMENTS],
|
||||
immediates: [0; MAX_IMMEDIATES],
|
||||
dynamic_storage_buffer_offsets: Vec::new(),
|
||||
dirty_root_elements: 0,
|
||||
vertex_buffers: [Default::default(); crate::MAX_VERTEX_BUFFERS],
|
||||
@@ -1148,9 +1170,9 @@ struct BindGroupInfo {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct RootConstantInfo {
|
||||
struct ImmediatesInfo {
|
||||
root_index: RootIndex,
|
||||
range: core::ops::Range<u32>,
|
||||
size: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -1164,7 +1186,7 @@ struct PipelineLayoutShared {
|
||||
signature: Option<Direct3D12::ID3D12RootSignature>,
|
||||
total_root_elements: RootIndex,
|
||||
special_constants: Option<PipelineLayoutSpecialConstants>,
|
||||
root_constant_info: Option<RootConstantInfo>,
|
||||
immediates_info: Option<ImmediatesInfo>,
|
||||
sampler_heap_root_index: Option<RootIndex>,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user