-
-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add GpuArrayBuffer and BatchedUniformBuffer (#8204)
# Objective - Add a type for uploading a Rust `Vec<T>` to a GPU `array<T>`. - Makes progress towards #89. ## Solution - Port @superdump's `BatchedUniformBuffer` to bevy main, as a fallback for WebGL2, which doesn't support storage buffers. - Rather than getting an `array<T>` in a shader, you get an `array<T, N>`, and have to rebind every N elements via dynamic offsets. - Add `GpuArrayBuffer` to abstract over `StorageBuffer<Vec<T>>`/`BatchedUniformBuffer`. ## Future Work Add a shader macro kinda thing to abstract over the following automatically: #8204 (review) --- ## Changelog * Added `GpuArrayBuffer`, `GpuComponentArrayBufferPlugin`, `GpuArrayBufferable`, and `GpuArrayBufferIndex` types. * Added `DynamicUniformBuffer::new_with_alignment()`. --------- Co-authored-by: Robert Swain <[email protected]> Co-authored-by: François <[email protected]> Co-authored-by: Teodor Tanasoaia <[email protected]> Co-authored-by: IceSentry <[email protected]> Co-authored-by: Vincent <[email protected]> Co-authored-by: robtfm <[email protected]>
- Loading branch information
1 parent
264195e
commit ad011d0
Showing
8 changed files
with
359 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
use crate::{ | ||
render_resource::{GpuArrayBuffer, GpuArrayBufferable}, | ||
renderer::{RenderDevice, RenderQueue}, | ||
Render, RenderApp, RenderSet, | ||
}; | ||
use bevy_app::{App, Plugin}; | ||
use bevy_ecs::{ | ||
prelude::{Component, Entity}, | ||
schedule::IntoSystemConfigs, | ||
system::{Commands, Query, Res, ResMut}, | ||
}; | ||
use std::marker::PhantomData; | ||
|
||
/// This plugin prepares the components of the corresponding type for the GPU | ||
/// by storing them in a [`GpuArrayBuffer`]. | ||
pub struct GpuComponentArrayBufferPlugin<C: Component + GpuArrayBufferable>(PhantomData<C>); | ||
|
||
impl<C: Component + GpuArrayBufferable> Plugin for GpuComponentArrayBufferPlugin<C> { | ||
fn build(&self, app: &mut App) { | ||
if let Ok(render_app) = app.get_sub_app_mut(RenderApp) { | ||
render_app | ||
.insert_resource(GpuArrayBuffer::<C>::new( | ||
render_app.world.resource::<RenderDevice>(), | ||
)) | ||
.add_systems( | ||
Render, | ||
prepare_gpu_component_array_buffers::<C>.in_set(RenderSet::Prepare), | ||
); | ||
} | ||
} | ||
} | ||
|
||
impl<C: Component + GpuArrayBufferable> Default for GpuComponentArrayBufferPlugin<C> { | ||
fn default() -> Self { | ||
Self(PhantomData::<C>) | ||
} | ||
} | ||
|
||
fn prepare_gpu_component_array_buffers<C: Component + GpuArrayBufferable>( | ||
mut commands: Commands, | ||
render_device: Res<RenderDevice>, | ||
render_queue: Res<RenderQueue>, | ||
mut gpu_array_buffer: ResMut<GpuArrayBuffer<C>>, | ||
components: Query<(Entity, &C)>, | ||
) { | ||
gpu_array_buffer.clear(); | ||
|
||
let entities = components | ||
.iter() | ||
.map(|(entity, component)| (entity, gpu_array_buffer.push(component.clone()))) | ||
.collect::<Vec<_>>(); | ||
commands.insert_or_spawn_batch(entities); | ||
|
||
gpu_array_buffer.write_buffer(&render_device, &render_queue); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
152 changes: 152 additions & 0 deletions
152
crates/bevy_render/src/render_resource/batched_uniform_buffer.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
use super::{GpuArrayBufferIndex, GpuArrayBufferable}; | ||
use crate::{ | ||
render_resource::DynamicUniformBuffer, | ||
renderer::{RenderDevice, RenderQueue}, | ||
}; | ||
use encase::{ | ||
private::{ArrayMetadata, BufferMut, Metadata, RuntimeSizedArray, WriteInto, Writer}, | ||
ShaderType, | ||
}; | ||
use std::{marker::PhantomData, num::NonZeroU64}; | ||
use wgpu::{BindingResource, Limits}; | ||
|
||
// 1MB else we will make really large arrays on macOS which reports very large | ||
// `max_uniform_buffer_binding_size`. On macOS this ends up being the minimum | ||
// size of the uniform buffer as well as the size of each chunk of data at a | ||
// dynamic offset. | ||
#[cfg(any(not(feature = "webgl"), not(target_arch = "wasm32")))] | ||
const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 20; | ||
|
||
// WebGL2 quirk: using uniform buffers larger than 4KB will cause extremely | ||
// long shader compilation times, so the limit needs to be lower on WebGL2. | ||
// This is due to older shader compilers/GPUs that don't support dynamically | ||
// indexing uniform buffers, and instead emulate it with large switch statements | ||
// over buffer indices that take a long time to compile. | ||
#[cfg(all(feature = "webgl", target_arch = "wasm32"))] | ||
const MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE: u32 = 1 << 12; | ||
|
||
/// Similar to [`DynamicUniformBuffer`], except every N elements (depending on size) | ||
/// are grouped into a batch as an `array<T, N>` in WGSL. | ||
/// | ||
/// This reduces the number of rebindings required due to having to pass dynamic | ||
/// offsets to bind group commands, and if indices into the array can be passed | ||
/// in via other means, it enables batching of draw commands. | ||
pub struct BatchedUniformBuffer<T: GpuArrayBufferable> { | ||
// Batches of fixed-size arrays of T are written to this buffer so that | ||
// each batch in a fixed-size array can be bound at a dynamic offset. | ||
uniforms: DynamicUniformBuffer<MaxCapacityArray<Vec<T>>>, | ||
// A batch of T are gathered into this `MaxCapacityArray` until it is full, | ||
// then it is written into the `DynamicUniformBuffer`, cleared, and new T | ||
// are gathered here, and so on for each batch. | ||
temp: MaxCapacityArray<Vec<T>>, | ||
current_offset: u32, | ||
dynamic_offset_alignment: u32, | ||
} | ||
|
||
impl<T: GpuArrayBufferable> BatchedUniformBuffer<T> { | ||
pub fn batch_size(limits: &Limits) -> usize { | ||
(limits | ||
.max_uniform_buffer_binding_size | ||
.min(MAX_REASONABLE_UNIFORM_BUFFER_BINDING_SIZE) as u64 | ||
/ T::min_size().get()) as usize | ||
} | ||
|
||
pub fn new(limits: &Limits) -> Self { | ||
let capacity = Self::batch_size(limits); | ||
let alignment = limits.min_uniform_buffer_offset_alignment; | ||
|
||
Self { | ||
uniforms: DynamicUniformBuffer::new_with_alignment(alignment as u64), | ||
temp: MaxCapacityArray(Vec::with_capacity(capacity), capacity), | ||
current_offset: 0, | ||
dynamic_offset_alignment: alignment, | ||
} | ||
} | ||
|
||
#[inline] | ||
pub fn size(&self) -> NonZeroU64 { | ||
self.temp.size() | ||
} | ||
|
||
pub fn clear(&mut self) { | ||
self.uniforms.clear(); | ||
self.current_offset = 0; | ||
self.temp.0.clear(); | ||
} | ||
|
||
pub fn push(&mut self, component: T) -> GpuArrayBufferIndex<T> { | ||
let result = GpuArrayBufferIndex { | ||
index: self.temp.0.len() as u32, | ||
dynamic_offset: Some(self.current_offset), | ||
element_type: PhantomData, | ||
}; | ||
self.temp.0.push(component); | ||
if self.temp.0.len() == self.temp.1 { | ||
self.flush(); | ||
} | ||
result | ||
} | ||
|
||
pub fn flush(&mut self) { | ||
self.uniforms.push(self.temp.clone()); | ||
|
||
self.current_offset += | ||
align_to_next(self.temp.size().get(), self.dynamic_offset_alignment as u64) as u32; | ||
|
||
self.temp.0.clear(); | ||
} | ||
|
||
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) { | ||
if !self.temp.0.is_empty() { | ||
self.flush(); | ||
} | ||
self.uniforms.write_buffer(device, queue); | ||
} | ||
|
||
#[inline] | ||
pub fn binding(&self) -> Option<BindingResource> { | ||
let mut binding = self.uniforms.binding(); | ||
if let Some(BindingResource::Buffer(binding)) = &mut binding { | ||
// MaxCapacityArray is runtime-sized so can't use T::min_size() | ||
binding.size = Some(self.size()); | ||
} | ||
binding | ||
} | ||
} | ||
|
||
#[inline] | ||
fn align_to_next(value: u64, alignment: u64) -> u64 { | ||
debug_assert!(alignment & (alignment - 1) == 0); | ||
((value - 1) | (alignment - 1)) + 1 | ||
} | ||
|
||
// ---------------------------------------------------------------------------- | ||
// MaxCapacityArray was implemented by Teodor Tanasoaia for encase. It was | ||
// copied here as it was not yet included in an encase release and it is | ||
// unclear if it is the correct long-term solution for encase. | ||
|
||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] | ||
struct MaxCapacityArray<T>(T, usize); | ||
|
||
impl<T> ShaderType for MaxCapacityArray<T> | ||
where | ||
T: ShaderType<ExtraMetadata = ArrayMetadata>, | ||
{ | ||
type ExtraMetadata = ArrayMetadata; | ||
|
||
const METADATA: Metadata<Self::ExtraMetadata> = T::METADATA; | ||
|
||
fn size(&self) -> ::core::num::NonZeroU64 { | ||
Self::METADATA.stride().mul(self.1.max(1) as u64).0 | ||
} | ||
} | ||
|
||
impl<T> WriteInto for MaxCapacityArray<T> | ||
where | ||
T: WriteInto + RuntimeSizedArray, | ||
{ | ||
fn write_into<B: BufferMut>(&self, writer: &mut Writer<B>) { | ||
debug_assert!(self.0.len() <= self.1); | ||
self.0.write_into(writer); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
129 changes: 129 additions & 0 deletions
129
crates/bevy_render/src/render_resource/gpu_array_buffer.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
use super::StorageBuffer; | ||
use crate::{ | ||
render_resource::batched_uniform_buffer::BatchedUniformBuffer, | ||
renderer::{RenderDevice, RenderQueue}, | ||
}; | ||
use bevy_ecs::{prelude::Component, system::Resource}; | ||
use encase::{private::WriteInto, ShaderSize, ShaderType}; | ||
use std::{marker::PhantomData, mem}; | ||
use wgpu::{BindGroupLayoutEntry, BindingResource, BindingType, BufferBindingType, ShaderStages}; | ||
|
||
/// Trait for types able to go in a [`GpuArrayBuffer`]. | ||
pub trait GpuArrayBufferable: ShaderType + ShaderSize + WriteInto + Clone {} | ||
impl<T: ShaderType + ShaderSize + WriteInto + Clone> GpuArrayBufferable for T {} | ||
|
||
/// Stores an array of elements to be transferred to the GPU and made accessible to shaders as a read-only array. | ||
/// | ||
/// On platforms that support storage buffers, this is equivalent to [`StorageBuffer<Vec<T>>`]. | ||
/// Otherwise, this falls back to a dynamic offset uniform buffer with the largest | ||
/// array of T that fits within a uniform buffer binding (within reasonable limits). | ||
/// | ||
/// Other options for storing GPU-accessible data are: | ||
/// * [`StorageBuffer`](crate::render_resource::StorageBuffer) | ||
/// * [`DynamicStorageBuffer`](crate::render_resource::DynamicStorageBuffer) | ||
/// * [`UniformBuffer`](crate::render_resource::UniformBuffer) | ||
/// * [`DynamicUniformBuffer`](crate::render_resource::DynamicUniformBuffer) | ||
/// * [`BufferVec`](crate::render_resource::BufferVec) | ||
/// * [`Texture`](crate::render_resource::Texture) | ||
#[derive(Resource)] | ||
pub enum GpuArrayBuffer<T: GpuArrayBufferable> { | ||
Uniform(BatchedUniformBuffer<T>), | ||
Storage((StorageBuffer<Vec<T>>, Vec<T>)), | ||
} | ||
|
||
impl<T: GpuArrayBufferable> GpuArrayBuffer<T> { | ||
pub fn new(device: &RenderDevice) -> Self { | ||
let limits = device.limits(); | ||
if limits.max_storage_buffers_per_shader_stage == 0 { | ||
GpuArrayBuffer::Uniform(BatchedUniformBuffer::new(&limits)) | ||
} else { | ||
GpuArrayBuffer::Storage((StorageBuffer::default(), Vec::new())) | ||
} | ||
} | ||
|
||
pub fn clear(&mut self) { | ||
match self { | ||
GpuArrayBuffer::Uniform(buffer) => buffer.clear(), | ||
GpuArrayBuffer::Storage((_, buffer)) => buffer.clear(), | ||
} | ||
} | ||
|
||
pub fn push(&mut self, value: T) -> GpuArrayBufferIndex<T> { | ||
match self { | ||
GpuArrayBuffer::Uniform(buffer) => buffer.push(value), | ||
GpuArrayBuffer::Storage((_, buffer)) => { | ||
let index = buffer.len() as u32; | ||
buffer.push(value); | ||
GpuArrayBufferIndex { | ||
index, | ||
dynamic_offset: None, | ||
element_type: PhantomData, | ||
} | ||
} | ||
} | ||
} | ||
|
||
pub fn write_buffer(&mut self, device: &RenderDevice, queue: &RenderQueue) { | ||
match self { | ||
GpuArrayBuffer::Uniform(buffer) => buffer.write_buffer(device, queue), | ||
GpuArrayBuffer::Storage((buffer, vec)) => { | ||
buffer.set(mem::take(vec)); | ||
buffer.write_buffer(device, queue); | ||
} | ||
} | ||
} | ||
|
||
pub fn binding_layout( | ||
binding: u32, | ||
visibility: ShaderStages, | ||
device: &RenderDevice, | ||
) -> BindGroupLayoutEntry { | ||
BindGroupLayoutEntry { | ||
binding, | ||
visibility, | ||
ty: if device.limits().max_storage_buffers_per_shader_stage == 0 { | ||
BindingType::Buffer { | ||
ty: BufferBindingType::Uniform, | ||
has_dynamic_offset: true, | ||
// BatchedUniformBuffer uses a MaxCapacityArray that is runtime-sized, so we use | ||
// None here and let wgpu figure out the size. | ||
min_binding_size: None, | ||
} | ||
} else { | ||
BindingType::Buffer { | ||
ty: BufferBindingType::Storage { read_only: true }, | ||
has_dynamic_offset: false, | ||
min_binding_size: Some(T::min_size()), | ||
} | ||
}, | ||
count: None, | ||
} | ||
} | ||
|
||
pub fn binding(&self) -> Option<BindingResource> { | ||
match self { | ||
GpuArrayBuffer::Uniform(buffer) => buffer.binding(), | ||
GpuArrayBuffer::Storage((buffer, _)) => buffer.binding(), | ||
} | ||
} | ||
|
||
pub fn batch_size(device: &RenderDevice) -> Option<u32> { | ||
let limits = device.limits(); | ||
if limits.max_storage_buffers_per_shader_stage == 0 { | ||
Some(BatchedUniformBuffer::<T>::batch_size(&limits) as u32) | ||
} else { | ||
None | ||
} | ||
} | ||
} | ||
|
||
/// An index into a [`GpuArrayBuffer`] for a given element. | ||
#[derive(Component)] | ||
pub struct GpuArrayBufferIndex<T: GpuArrayBufferable> { | ||
/// The index to use in a shader into the array. | ||
pub index: u32, | ||
/// The dynamic offset to use when setting the bind group in a pass. | ||
/// Only used on platforms that don't support storage buffers. | ||
pub dynamic_offset: Option<u32>, | ||
pub element_type: PhantomData<T>, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.