From 5dd0200be1c4bea7c4063e95289f52b01d5e26d6 Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Mon, 10 Jun 2024 18:53:58 +0200
Subject: [PATCH 01/15] subgroup: add trait VectorOrScalar, representing either
 a vector or a scalar type

---
 crates/spirv-std/src/float.rs  |  4 +++
 crates/spirv-std/src/scalar.rs | 49 +++++++++++++++++++++++++++++++++-
 crates/spirv-std/src/vector.rs | 48 +++++++++++++++++++++++++++++++--
 tests/ui/arch/all.rs           |  5 +++-
 tests/ui/arch/any.rs           |  5 +++-
 5 files changed, 106 insertions(+), 5 deletions(-)
diff --git a/crates/spirv-std/src/float.rs b/crates/spirv-std/src/float.rs
index 0a685b2468..be9133ee0a 100644
--- a/crates/spirv-std/src/float.rs
+++ b/crates/spirv-std/src/float.rs
@@ -1,5 +1,6 @@
 //! Traits and helper functions related to floats.
 
+use crate::scalar::VectorOrScalar;
 use crate::vector::Vector;
 #[cfg(target_arch = "spirv")]
 use core::arch::asm;
@@ -71,6 +72,9 @@ struct F32x2 {
     x: f32,
     y: f32,
 }
+unsafe impl VectorOrScalar for F32x2 {
+    type Scalar = f32;
+}
 unsafe impl Vector<f32, 2> for F32x2 {}
 
 /// Converts an f32 (float) into an f16 (half). The result is a u32, not a u16, due to GPU support
diff --git a/crates/spirv-std/src/scalar.rs b/crates/spirv-std/src/scalar.rs
index 34d1f5db8c..9747cc995e 100644
--- a/crates/spirv-std/src/scalar.rs
+++ b/crates/spirv-std/src/scalar.rs
@@ -1,11 +1,58 @@
 //! Traits related to scalars.
 
+/// Abstract trait representing either a vector or a scalar type.
+///
+/// # Safety
+/// Implementing this trait on non-scalar or non-vector types may break assumptions about other
+/// unsafe code, and should not be done.
+pub unsafe trait VectorOrScalar: Default {
+    /// Either the scalar component type of the vector or the scalar itself.
+    type Scalar: Scalar;
+}
+
+unsafe impl VectorOrScalar for bool {
+    type Scalar = bool;
+}
+unsafe impl VectorOrScalar for f32 {
+    type Scalar = f32;
+}
+unsafe impl VectorOrScalar for f64 {
+    type Scalar = f64;
+}
+unsafe impl VectorOrScalar for u8 {
+    type Scalar = u8;
+}
+unsafe impl VectorOrScalar for u16 {
+    type Scalar = u16;
+}
+unsafe impl VectorOrScalar for u32 {
+    type Scalar = u32;
+}
+unsafe impl VectorOrScalar for u64 {
+    type Scalar = u64;
+}
+unsafe impl VectorOrScalar for i8 {
+    type Scalar = i8;
+}
+unsafe impl VectorOrScalar for i16 {
+    type Scalar = i16;
+}
+unsafe impl VectorOrScalar for i32 {
+    type Scalar = i32;
+}
+unsafe impl VectorOrScalar for i64 {
+    type Scalar = i64;
+}
+
 /// Abstract trait representing a SPIR-V scalar type.
 ///
 /// # Safety
 /// Implementing this trait on non-scalar types breaks assumptions of other unsafe code, and should
 /// not be done.
-pub unsafe trait Scalar: Copy + Default + crate::sealed::Sealed {}
+pub unsafe trait Scalar:
+    VectorOrScalar<Scalar = Self> + Copy + Default + crate::sealed::Sealed
+{
+}
 
 unsafe impl Scalar for bool {}
 unsafe impl Scalar for f32 {}
diff --git a/crates/spirv-std/src/vector.rs b/crates/spirv-std/src/vector.rs
index 19cdb144b9..7510953034 100644
--- a/crates/spirv-std/src/vector.rs
+++ b/crates/spirv-std/src/vector.rs
@@ -1,13 +1,57 @@
 //! Traits related to vectors.
 
+use crate::scalar::{Scalar, VectorOrScalar};
 use glam::{Vec3Swizzles, Vec4Swizzles};
 
+unsafe impl VectorOrScalar for glam::Vec2 {
+    type Scalar = f32;
+}
+unsafe impl VectorOrScalar for glam::Vec3 {
+    type Scalar = f32;
+}
+unsafe impl VectorOrScalar for glam::Vec3A {
+    type Scalar = f32;
+}
+unsafe impl VectorOrScalar for glam::Vec4 {
+    type Scalar = f32;
+}
+
+unsafe impl VectorOrScalar for glam::DVec2 {
+    type Scalar = f64;
+}
+unsafe impl VectorOrScalar for glam::DVec3 {
+    type Scalar = f64;
+}
+unsafe impl VectorOrScalar for glam::DVec4 {
+    type Scalar = f64;
+}
+
+unsafe impl VectorOrScalar for glam::UVec2 {
+    type Scalar = u32;
+}
+unsafe impl VectorOrScalar for glam::UVec3 {
+    type Scalar = u32;
+}
+unsafe impl VectorOrScalar for glam::UVec4 {
+    type Scalar = u32;
+}
+
+unsafe impl VectorOrScalar for glam::IVec2 {
+    type Scalar = i32;
+}
+unsafe impl VectorOrScalar for glam::IVec3 {
+    type Scalar = i32;
+}
+unsafe impl VectorOrScalar for glam::IVec4 {
+    type Scalar = i32;
+}
+
 /// Abstract trait representing a SPIR-V vector type.
 ///
 /// # Safety
 /// Implementing this trait on non-simd-vector types breaks assumptions of other unsafe code, and
 /// should not be done.
-pub unsafe trait Vector<T: crate::scalar::Scalar, const N: usize>: Default {}
+pub unsafe trait Vector<T: Scalar, const N: usize>: VectorOrScalar<Scalar = T> {}
 
 unsafe impl Vector<f32, 2> for glam::Vec2 {}
 unsafe impl Vector<f32, 3> for glam::Vec3 {}
@@ -27,7 +71,7 @@ unsafe impl Vector<i32, 3> for glam::IVec3 {}
 unsafe impl Vector<i32, 4> for glam::IVec4 {}
 
 /// Trait that implements slicing of a vector into a scalar or vector of lower dimensions, by
-/// ignoring the highter dimensions
+/// ignoring the higter dimensions
 pub trait VectorTruncateInto<T> {
     /// Slices the vector into a lower dimensional type by ignoring the higher components
     fn truncate_into(self) -> T;
diff --git a/tests/ui/arch/all.rs b/tests/ui/arch/all.rs
index fd0d5e51db..fbedae03c4 100644
--- a/tests/ui/arch/all.rs
+++ b/tests/ui/arch/all.rs
@@ -3,7 +3,7 @@
 #![feature(repr_simd)]
 
 use spirv_std::spirv;
-use spirv_std::{scalar::Scalar, vector::Vector};
+use spirv_std::{scalar::Scalar, scalar::VectorOrScalar, vector::Vector};
 
 /// HACK(shesp). Rust doesn't allow us to declare regular (tuple-)structs containing `bool` members
 /// as `#[repl(simd)]`. But we need this for `spirv_std::arch::any()` and `spirv_std::arch::all()`
@@ -12,6 +12,9 @@ use spirv_std::{scalar::Scalar, vector::Vector};
 /// it (for now at least)
 #[repr(simd)]
 struct Vec2<T>(T, T);
+unsafe impl<T: Scalar> VectorOrScalar for Vec2<T> {
+    type Scalar = T;
+}
 unsafe impl<T: Scalar> Vector<T, 2> for Vec2<T> {}
 
 impl<T: Scalar> Default for Vec2<T> {
diff --git a/tests/ui/arch/any.rs b/tests/ui/arch/any.rs
index 29cdc14626..5f4caed88f 100644
--- a/tests/ui/arch/any.rs
+++ b/tests/ui/arch/any.rs
@@ -3,7 +3,7 @@
 #![feature(repr_simd)]
 
 use spirv_std::spirv;
-use spirv_std::{scalar::Scalar, vector::Vector};
+use spirv_std::{scalar::Scalar, scalar::VectorOrScalar, vector::Vector};
 
 /// HACK(shesp). Rust doesn't allow us to declare regular (tuple-)structs containing `bool` members
 /// as `#[repl(simd)]`. But we need this for `spirv_std::arch::any()` and `spirv_std::arch::all()`
@@ -12,6 +12,9 @@ use spirv_std::{scalar::Scalar, vector::Vector};
 /// it (for now at least)
 #[repr(simd)]
 struct Vec2<T>(T, T);
+unsafe impl<T: Scalar> VectorOrScalar for Vec2<T> {
+    type Scalar = T;
+}
 unsafe impl<T: Scalar> Vector<T, 2> for Vec2<T> {}
 
 impl<T: Scalar> Default for Vec2<T> {

From a951735d836e06c03bbbd029f21d3d536f351acb Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Mon, 10 Jun 2024 22:22:44 +0200
Subject: [PATCH 02/15] subgroup: added all non-uniform subgroup operations

---
 crates/spirv-std/src/arch.rs          |    2 +
 crates/spirv-std/src/arch/subgroup.rs | 2068 +++++++++++++++++++++++++
 crates/spirv-std/src/vector.rs        |    2 +-
 3 files changed, 2071 insertions(+), 1 deletion(-)
 create mode 100644 crates/spirv-std/src/arch/subgroup.rs

diff --git a/crates/spirv-std/src/arch.rs b/crates/spirv-std/src/arch.rs
index 0fa43fea0f..dc061c7a00 100644
--- a/crates/spirv-std/src/arch.rs
+++ b/crates/spirv-std/src/arch.rs
@@ -19,6 +19,7 @@ mod demote_to_helper_invocation_ext;
 mod derivative;
 mod primitive;
 mod ray_tracing;
+mod subgroup;
 
 pub use atomics::*;
 pub use barrier::*;
@@ -26,6 +27,7 @@ pub use demote_to_helper_invocation_ext::*;
 pub use derivative::*;
 pub use primitive::*;
 pub use ray_tracing::*;
+pub use subgroup::*;
 
 /// Result is true if any component of `vector` is true, otherwise result is
 /// false.
diff --git a/crates/spirv-std/src/arch/subgroup.rs b/crates/spirv-std/src/arch/subgroup.rs
new file mode 100644
index 0000000000..e1b38d2946
--- /dev/null
+++ b/crates/spirv-std/src/arch/subgroup.rs
@@ -0,0 +1,2068 @@
+use crate::float::Float;
+use crate::integer::{Integer, SignedInteger, UnsignedInteger};
+use crate::memory::Scope;
+use crate::scalar::VectorOrScalar;
+#[cfg(target_arch = "spirv")]
+use core::arch::asm;
+
+const SUBGROUP: u32 = Scope::Subgroup as u32;
+
+/// GroupMask is a [`glam::UVec4`] representing a bitmask of all invocations within a subgroup.
+/// Mostly used in group ballot operations.
+pub type SubgroupMask = glam::UVec4;
+
+/// Defines the class of group operation.
+#[non_exhaustive]
+#[derive(Debug, PartialEq, Eq)]
+pub enum GroupOperation {
+    /// A reduction operation for all values of a specific value X specified by invocations within a workgroup.
+    Reduce = 0,
+    /// A binary operation with an identity I and n (where n is the size of the workgroup)
+    /// elements[a0, a1, … an-1] resulting in [a0, (a0 op a1), …(a0 op a1 op … op an-1)]
+    InclusiveScan = 1,
+    /// A binary operation with an identity I and n (where n is the size of the workgroup)
+    /// elements[a0, a1, … an-1] resulting in [I, a0, (a0 op a1), … (a0 op a1 op … op an-2)].
+    ExclusiveScan = 2,
+
+    // /// See [`GROUP_OPERATION_CLUSTERED_REDUCE`]
+    // ClusteredReduce = 3,
+    /// Reserved.
+    #[cfg(target_feature = "GroupNonUniformPartitionedNV")]
+    PartitionedReduceNV = 6,
+    /// Reserved.
+    #[cfg(target_feature = "GroupNonUniformPartitionedNV")]
+    PartitionedInclusiveScanNV = 7,
+    /// Reserved.
+    #[cfg(target_feature = "GroupNonUniformPartitionedNV")]
+    PartitionedExclusiveScanNV = 8,
+}
+
+/// The [`GroupOperation`] `ClusteredReduce`.
+///
+/// All instructions with a [`GroupOperation`] require an additional `ClusterSize` parameter when [`GroupOperation`] is
+/// `ClusteredReduce`. To map this requirement into rust, all function have a base version accepting [`GroupOperation`]
+/// as a const generic, and a `_clustered` variant that is fixed to `ClusteredReduce` and takes the additional
+/// `ClusterSize` parameter as a const generic. To not accidentally use a `ClusteredReduce` in the base variant of the
+/// function, it was removed from the [`GroupOperation`] enum and instead resides individually.
+pub const GROUP_OPERATION_CLUSTERED_REDUCE: u32 = 3;
+
+// TODO barriers
+
+/// Result is true only in the active invocation with the lowest id in the group, otherwise result is false.
+///
+/// Result Type must be a Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+#[cfg(target_feature = "GroupNonUniform")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformElect")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_elect() -> bool {
+    let mut result = false;
+
+    unsafe {
+        asm! {
+            "%bool = OpTypeBool",
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%result = OpGroupNonUniformElect %bool %subgroup",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Evaluates a predicate for all active invocations in the group, resulting in true if predicate evaluates to true for all active invocations in the group, otherwise the result is false.
+///
+/// Result Type must be a Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// Predicate must be a Boolean type.
+#[cfg(target_feature = "GroupNonUniformVote")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformAll")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_all(predicate: bool) -> bool {
+    let mut result = false;
+
+    unsafe {
+        asm! {
+            "%bool = OpTypeBool",
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%predicate = OpLoad _ {predicate}",
+            "%result = OpGroupNonUniformAll %bool %subgroup %predicate",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            predicate = in(reg) &predicate,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Evaluates a predicate for all active invocations in the group, resulting in true if predicate evaluates to true for any active invocation in the group, otherwise the result is false.
+///
+/// Result Type must be a Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// Predicate must be a Boolean type.
+#[cfg(target_feature = "GroupNonUniformVote")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformAny")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_any(predicate: bool) -> bool {
+    let mut result = false;
+
+    unsafe {
+        asm! {
+            "%bool = OpTypeBool",
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%predicate = OpLoad _ {predicate}",
+            "%result = OpGroupNonUniformAny %bool %subgroup %predicate",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            predicate = in(reg) &predicate,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Evaluates a value for all active invocations in the group. The result is true if Value is equal for all active invocations in the group. Otherwise, the result is false.
+///
+/// Result Type must be a Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// Value must be a scalar or vector of floating-point type, integer type, or Boolean type. The compare operation is based on this type, and if it is a floating-point type, an ordered-and-equal compare is used.
+#[cfg(target_feature = "GroupNonUniformVote")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformAllEqual")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_all_equal<T: VectorOrScalar>(value: T) -> bool {
+    let mut result = false;
+
+    unsafe {
+        asm! {
+            "%bool = OpTypeBool",
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformAllEqual %bool %subgroup %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Result is the Value of the invocation identified by the id Id to all active invocations in the group.
+///
+/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// Id must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// Before version 1.5, Id must come from a constant instruction. Starting with version 1.5, this restriction is lifted. However, behavior is undefined when Id is not dynamically uniform.
+///
+/// The resulting value is undefined if Id is an inactive invocation, or is greater than or equal to the size of the group.
+#[cfg(target_feature = "GroupNonUniformBallot")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBroadcast")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_broadcast<T: VectorOrScalar>(value: T, id: u32) -> T {
+    let mut result = T::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%value = OpLoad _ {value}",
+            "%id = OpLoad _ {id}",
+            "%result = OpGroupNonUniformBroadcast _ %subgroup %value %id",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            value = in(reg) &value,
+            id = in(reg) &id,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Result is the Value of the invocation from the active invocation with the lowest id in the group to all active invocations in the group.
+///
+/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformBallot")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBroadcastFirst")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_broadcast_first<T: VectorOrScalar>(value: T) -> T {
+    let mut result = T::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformBroadcastFirst _ %subgroup %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Result is a bitfield value combining the Predicate value from all invocations in the group that execute the same dynamic instance of this instruction. The bit is set to one if the corresponding invocation is active and the Predicate for that invocation evaluated to true; otherwise, it is set to zero.
+///
+/// Result Type must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
+///
+/// Result is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command.
+///
+/// Predicate must be a Boolean type.
+#[cfg(target_feature = "GroupNonUniformBallot")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBallot")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_ballot(predicate: bool) -> SubgroupMask {
+    let mut result = SubgroupMask::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%groupmask = OpTypeVector %u32 4",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%predicate = OpLoad _ {predicate}",
+            "%result = OpGroupNonUniformBallot %groupmask %subgroup %predicate",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            predicate = in(reg) &predicate,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Evaluates a value for all active invocations in the group, resulting in true if the bit in Value for the corresponding invocation is set to one, otherwise the result is false.
+///
+/// Result Type must be a Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
+///
+/// Behavior is undefined unless Value is the same for all invocations that execute the same dynamic instance of this instruction.
+///
+/// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
+#[cfg(target_feature = "GroupNonUniformBallot")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformInverseBallot")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_inverse_ballot(subgroup_mask: SubgroupMask) -> bool {
+    let mut result = false;
+
+    unsafe {
+        asm! {
+            "%bool = OpTypeBool",
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%subgroup_mask = OpLoad _ {subgroup_mask}",
+            "%result = OpGroupNonUniformInverseBallot %bool %subgroup %subgroup_mask",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            subgroup_mask = in(reg) &subgroup_mask,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Evaluates a value for all active invocations in the group, resulting in true if the bit in Value that corresponds to Index is set to one, otherwise the result is false.
+///
+/// Result Type must be a Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
+///
+/// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
+///
+/// Index must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// The resulting value is undefined if Index is greater than or equal to the size of the group.
+#[cfg(target_feature = "GroupNonUniformBallot")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBallotBitExtract")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_ballot_bit_extract(
+    subgroup_mask: SubgroupMask,
+    id: u32,
+) -> bool {
+    let mut result = false;
+
+    unsafe {
+        asm! {
+            "%bool = OpTypeBool",
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%subgroup_mask = OpLoad _ {subgroup_mask}",
+            "%id = OpLoad _ {id}",
+            "%result = OpGroupNonUniformBallotBitExtract %bool %subgroup %subgroup_mask %id",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            subgroup_mask = in(reg) &subgroup_mask,
+            id = in(reg) &id,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Result is the number of bits that are set to 1 in Value, considering only the bits in Value required to represent all bits of the group's invocations.
+///
+/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0.
+///
+/// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
+///
+/// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
+#[cfg(target_feature = "GroupNonUniformBallot")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBallotBitCount")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_ballot_bit_count<const GROUP_OP: u32>(
+    subgroup_mask: SubgroupMask,
+) -> u32 {
+    let mut result = 0;
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%subgroup_mask = OpLoad _ {subgroup_mask}",
+            "%result = OpGroupNonUniformBallotBitCount %u32 %subgroup %groupop %subgroup_mask",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            subgroup_mask = in(reg) &subgroup_mask,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Find the least significant bit set to 1 in Value, considering only the bits in Value required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
+///
+/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
+///
+/// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
+#[cfg(target_feature = "GroupNonUniformBallot")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBallotFindLSB")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_ballot_find_lsb(subgroup_mask: SubgroupMask) -> u32 {
+    let mut result = 0;
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%subgroup_mask = OpLoad _ {subgroup_mask}",
+            "%result = OpGroupNonUniformBallotFindLSB %u32 %subgroup %subgroup_mask",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            subgroup_mask = in(reg) &subgroup_mask,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Find the most significant bit set to 1 in Value, considering only the bits in Value required to represent all bits of the group's invocations. If none of the considered bits is set to 1, the resulting value is undefined.
+///
+/// Result Type must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
+///
+/// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
+#[cfg(target_feature = "GroupNonUniformBallot")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBallotFindMSB")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_ballot_find_msb(subgroup_mask: SubgroupMask) -> u32 {
+    let mut result = 0;
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%subgroup_mask = OpLoad _ {subgroup_mask}",
+            "%result = OpGroupNonUniformBallotFindMSB %u32 %subgroup %subgroup_mask",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            subgroup_mask = in(reg) &subgroup_mask,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Result is the Value of the invocation identified by the id Id.
+///
+/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// Id must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// The resulting value is undefined if Id is an inactive invocation, or is greater than or equal to the size of the group.
+#[cfg(target_feature = "GroupNonUniformShuffle")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformShuffle")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_shuffle<T: VectorOrScalar>(value: T, id: u32) -> T {
+    let mut result = T::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%value = OpLoad _ {value}",
+            "%id = OpLoad _ {id}",
+            "%result = OpGroupNonUniformShuffle _ %subgroup %value %id",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            value = in(reg) &value,
+            id = in(reg) &id,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Result is the Value of the invocation identified by the current invocation’s id within the group xor’ed with Mask.
+///
+/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// Mask must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// The resulting value is undefined if current invocation’s id within the group xor’ed with Mask is an inactive invocation, or is greater than or equal to the size of the group.
+#[cfg(target_feature = "GroupNonUniformShuffle")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformShuffleXor")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_shuffle_xor<T: VectorOrScalar>(value: T, mask: u32) -> T {
+    let mut result = T::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%value = OpLoad _ {value}",
+            "%mask = OpLoad _ {mask}",
+            "%result = OpGroupNonUniformShuffleXor _ %subgroup %value %mask",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            value = in(reg) &value,
+            mask = in(reg) &mask,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Result is the Value of the invocation identified by the current invocation’s id within the group - Delta.
+///
+/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// Delta must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than the current invocation’s id within the group or if the selected lane is inactive.
+#[cfg(target_feature = "GroupNonUniformShuffleRelative")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformShuffleUp")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_shuffle_up<T: VectorOrScalar>(value: T, delta: u32) -> T {
+    let mut result = T::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%value = OpLoad _ {value}",
+            "%delta = OpLoad _ {delta}",
+            "%result = OpGroupNonUniformShuffleUp _ %subgroup %value %delta",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            value = in(reg) &value,
+            delta = in(reg) &delta,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Result is the Value of the invocation identified by the current invocation’s id within the group + Delta.
+///
+/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// Delta must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than or equal to the size of the group, or if the current invocation’s id within the group + Delta is either an inactive invocation or greater than or equal to the size of the group.
+#[cfg(target_feature = "GroupNonUniformShuffleRelative")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformShuffleDown")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_shuffle_down<T: VectorOrScalar>(value: T, delta: u32) -> T {
+    let mut result = T::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%value = OpLoad _ {value}",
+            "%delta = OpLoad _ {delta}",
+            "%result = OpGroupNonUniformShuffleDown _ %subgroup %value %delta",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            value = in(reg) &value,
+            delta = in(reg) &delta,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// An integer add group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformIAdd")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_i_add<
+    const GROUP_OP: u32,
+    I: VectorOrScalar<Scalar = impl Integer>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformIAdd _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// An integer add group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformIAdd")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_i_add_clustered<
+    const CLUSTER_SIZE: u32,
+    I: VectorOrScalar<Scalar = impl Integer>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformIAdd _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A floating point add group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of floating-point type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0.
+///
+/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformFAdd")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_f_add<
+    const GROUP_OP: u32,
+    F: VectorOrScalar<Scalar = impl Float>,
+>(
+    value: F,
+) -> F {
+    let mut result = F::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformFAdd _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A floating point add group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of floating-point type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformFAdd")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_f_add_clustered<
+    const CLUSTER_SIZE: u32,
+    F: VectorOrScalar<Scalar = impl Float>,
+>(
+    value: F,
+) -> F {
+    let mut result = F::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformFAdd _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// An integer multiply group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 1.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformIMul")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_i_mul<
+    const GROUP_OP: u32,
+    I: VectorOrScalar<Scalar = impl Integer>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformIMul _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// An integer multiply group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 1. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformIMul")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_i_mul_clustered<
+    const CLUSTER_SIZE: u32,
+    I: VectorOrScalar<Scalar = impl Integer>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformIMul _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A floating point multiply group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of floating-point type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 1.
+///
+/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformFMul")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_f_mul<
+    const GROUP_OP: u32,
+    F: VectorOrScalar<Scalar = impl Float>,
+>(
+    value: F,
+) -> F {
+    let mut result = F::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformFMul _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A floating point multiply group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of floating-point type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 1. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformFMul")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_f_mul_clustered<
+    const CLUSTER_SIZE: u32,
+    F: VectorOrScalar<Scalar = impl Float>,
+>(
+    value: F,
+) -> F {
+    let mut result = F::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformFMul _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A signed integer minimum group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is INT_MAX.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformSMin")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_s_min<
+    const GROUP_OP: u32,
+    S: VectorOrScalar<Scalar = impl SignedInteger>,
+>(
+    value: S,
+) -> S {
+    let mut result = S::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformSMin _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A signed integer minimum group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is INT_MAX. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformSMin")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_s_min_clustered<
+    const CLUSTER_SIZE: u32,
+    S: VectorOrScalar<Scalar = impl SignedInteger>,
+>(
+    value: S,
+) -> S {
+    let mut result = S::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformSMin _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// An unsigned integer minimum group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is UINT_MAX.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformUMin")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_u_min<
+    const GROUP_OP: u32,
+    U: VectorOrScalar<Scalar = impl UnsignedInteger>,
+>(
+    value: U,
+) -> U {
+    let mut result = U::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformUMin _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// An unsigned integer minimum group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is UINT_MAX. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformUMin")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_u_min_clustered<
+    const CLUSTER_SIZE: u32,
+    U: VectorOrScalar<Scalar = impl UnsignedInteger>,
+>(
+    value: U,
+) -> U {
+    let mut result = U::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformUMin _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A floating point minimum group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of floating-point type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is +INF.
+///
+/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformFMin")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_f_min<
+    const GROUP_OP: u32,
+    F: VectorOrScalar<Scalar = impl Float>,
+>(
+    value: F,
+) -> F {
+    let mut result = F::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformFMin _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A floating point minimum group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of floating-point type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is +INF. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformFMin")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_f_min_clustered<
+    const CLUSTER_SIZE: u32,
+    F: VectorOrScalar<Scalar = impl Float>,
+>(
+    value: F,
+) -> F {
+    let mut result = F::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformFMin _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A signed integer maximum group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is INT_MIN.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformSMax")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_s_max<
+    const GROUP_OP: u32,
+    S: VectorOrScalar<Scalar = impl SignedInteger>,
+>(
+    value: S,
+) -> S {
+    let mut result = S::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformSMax _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A signed integer maximum group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is INT_MIN. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformSMax")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_s_max_clustered<
+    const CLUSTER_SIZE: u32,
+    S: VectorOrScalar<Scalar = impl SignedInteger>,
+>(
+    value: S,
+) -> S {
+    let mut result = S::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformSMax _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// An unsigned integer maximum group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformUMax")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_u_max<
+    const GROUP_OP: u32,
+    U: VectorOrScalar<Scalar = impl UnsignedInteger>,
+>(
+    value: U,
+) -> U {
+    let mut result = U::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformUMax _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// An unsigned integer maximum group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type, whose Signedness operand is 0.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformUMax")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_u_max_clustered<
+    const CLUSTER_SIZE: u32,
+    U: VectorOrScalar<Scalar = impl UnsignedInteger>,
+>(
+    value: U,
+) -> U {
+    let mut result = U::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformUMax _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A floating point maximum group operation of all Value operands contributed by active invocations in by group.
+///
+/// Result Type must be a scalar or vector of floating-point type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is -INF.
+///
+/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformFMax")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_f_max<
+    const GROUP_OP: u32,
+    F: VectorOrScalar<Scalar = impl Float>,
+>(
+    value: F,
+) -> F {
+    let mut result = F::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformFMax _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A floating point maximum group operation of all Value operands contributed by active invocations in by group.
+///
+/// Result Type must be a scalar or vector of floating-point type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is -INF.
+///
+/// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformFMax")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_f_max_clustered<
+    const CLUSTER_SIZE: u32,
+    F: VectorOrScalar<Scalar = impl Float>,
+>(
+    value: F,
+) -> F {
+    let mut result = F::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformFMax _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A bitwise and group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is ~0.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBitwiseAnd")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_bitwise_and<
+    const GROUP_OP: u32,
+    I: VectorOrScalar<Scalar = impl Integer>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformBitwiseAnd _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A bitwise and group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is ~0. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBitwiseAnd")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_bitwise_and_clustered<
+    const CLUSTER_SIZE: u32,
+    I: VectorOrScalar<Scalar = impl Integer>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformBitwiseAnd _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A bitwise or group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBitwiseOr")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_bitwise_or<
+    const GROUP_OP: u32,
+    I: VectorOrScalar<Scalar = impl Integer>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformBitwiseOr _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A bitwise or group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBitwiseOr")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_bitwise_or_clustered<
+    const CLUSTER_SIZE: u32,
+    I: VectorOrScalar<Scalar = impl Integer>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformBitwiseOr _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A bitwise xor group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBitwiseXor")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_bitwise_xor<
+    const GROUP_OP: u32,
+    I: VectorOrScalar<Scalar = impl Integer>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformBitwiseXor _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A bitwise xor group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of integer type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformBitwiseXor")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_bitwise_xor_clustered<
+    const CLUSTER_SIZE: u32,
+    I: VectorOrScalar<Scalar = impl Integer>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformBitwiseXor _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A logical and group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is ~0.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformLogicalAnd")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_logical_and<
+    const GROUP_OP: u32,
+    I: VectorOrScalar<Scalar = bool>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformLogicalAnd _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A logical and group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is ~0. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformLogicalAnd")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_logical_and_clustered<
+    const CLUSTER_SIZE: u32,
+    I: VectorOrScalar<Scalar = bool>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformLogicalAnd _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A logical or group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformLogicalOr")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_logical_or<
+    const GROUP_OP: u32,
+    I: VectorOrScalar<Scalar = bool>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformLogicalOr _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A logical or group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformLogicalOr")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_logical_or_clustered<
+    const CLUSTER_SIZE: u32,
+    I: VectorOrScalar<Scalar = bool>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformLogicalOr _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A logical xor group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0.
+///
+/// The type of Value must be the same as Result Type.
+#[cfg(target_feature = "GroupNonUniformArithmetic")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformLogicalXor")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_logical_xor<
+    const GROUP_OP: u32,
+    I: VectorOrScalar<Scalar = bool>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformLogicalXor _ %subgroup %groupop %value",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OP,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// A logical xor group operation of all Value operands contributed by active invocations in the group.
+///
+/// Result Type must be a scalar or vector of Boolean type.
+///
+/// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
+///
+/// The identity I for Operation is 0. If Operation is ClusteredReduce, ClusterSize must be present.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
+#[cfg(all(
+    target_feature = "GroupNonUniformArithmetic",
+    target_feature = "GroupNonUniformClustered",
+))]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformLogicalXor")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_logical_xor_clustered<
+    const CLUSTER_SIZE: u32,
+    I: VectorOrScalar<Scalar = bool>,
+>(
+    value: I,
+) -> I {
+    let mut result = I::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%groupop = OpConstant %u32 {groupop}",
+            "%value = OpLoad _ {value}",
+            "%clustersize = OpConstant %u32 {clustersize}",
+            "%result = OpGroupNonUniformLogicalXor _ %subgroup %groupop %value %clustersize",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
+            clustersize = const CLUSTER_SIZE,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Result is the Value of the invocation within the quad with a quad index equal to Index.
+///
+/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
+///
+/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// Index must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// Before version 1.5, Index must come from a constant instruction. Starting with version 1.5, Index must be dynamically uniform.
+///
+/// If the value of Index is greater than or equal to 4, or refers to an inactive invocation, the resulting value is undefined.
+#[cfg(target_feature = "GroupNonUniformQuad")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformQuadBroadcast")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_quad_broadcast<T: VectorOrScalar>(value: T, id: u32) -> T {
+    let mut result = T::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%value = OpLoad _ {value}",
+            "%id = OpLoad _ {id}",
+            "%result = OpGroupNonUniformQuadBroadcast _ %subgroup %value %id",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            value = in(reg) &value,
+            id = in(reg) &id,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
+
+/// Direction is the kind of swap to perform.
+///
+/// Direction must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// Direction must come from a constant instruction.
+///
+/// The value returned in Result is the value provided to Value by another invocation in the same quad scope instance. The invocation providing this value is determined according to Direction.
+#[cfg(target_feature = "GroupNonUniformQuad")]
+pub enum QuadDirection {
+    /// A Direction of 0 indicates a horizontal swap;
+    /// - Invocations with quad indices of 0 and 1 swap values
+    /// - Invocations with quad indices of 2 and 3 swap values
+    Horizontal = 0,
+    /// A Direction of 1 indicates a vertical swap;
+    /// - Invocations with quad indices of 0 and 2 swap values
+    /// - Invocations with quad indices of 1 and 3 swap values
+    Vertical = 1,
+    /// A Direction of 2 indicates a diagonal swap;
+    /// - Invocations with quad indices of 0 and 3 swap values
+    /// - Invocations with quad indices of 1 and 2 swap values
+    Diagonal = 2,
+}
+
+/// Swap the Value of the invocation within the quad with another invocation in the quad using Direction.
+///
+/// Result Type must be a scalar or vector of floating-point type, integer type, or Boolean type.
+///
+/// Execution is a Scope, but has no effect on the behavior of this instruction. It must be Subgroup.
+///
+/// The type of Value must be the same as Result Type.
+///
+/// Direction is the kind of swap to perform.
+///
+/// Direction must be a scalar of integer type, whose Signedness operand is 0.
+///
+/// Direction must come from a constant instruction.
+///
+/// The value returned in Result is the value provided to Value by another invocation in the same quad scope instance. The invocation providing this value is determined according to Direction.
+///
+/// A Direction of 0 indicates a horizontal swap;
+/// - Invocations with quad indices of 0 and 1 swap values
+/// - Invocations with quad indices of 2 and 3 swap values
+/// A Direction of 1 indicates a vertical swap;
+/// - Invocations with quad indices of 0 and 2 swap values
+/// - Invocations with quad indices of 1 and 3 swap values
+/// A Direction of 2 indicates a diagonal swap;
+/// - Invocations with quad indices of 0 and 3 swap values
+/// - Invocations with quad indices of 1 and 2 swap values
+///
+/// Direction must be one of the above values.
+///
+/// If an active invocation reads Value from an inactive invocation, the resulting value is undefined.
+#[cfg(target_feature = "GroupNonUniformQuad")]
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "OpGroupNonUniformQuadSwap")]
+#[inline]
+pub unsafe fn subgroup_non_uniform_quad_swap<const DIRECTION: u32, T: VectorOrScalar>(
+    value: T,
+) -> T {
+    let mut result = T::default();
+
+    unsafe {
+        asm! {
+            "%u32 = OpTypeInt 32 0",
+            "%subgroup = OpConstant %u32 {subgroup}",
+            "%direction = OpConstant %u32 {direction}",
+            "%value = OpLoad _ {value}",
+            "%result = OpGroupNonUniformQuadSwap _ %subgroup %value %direction",
+            "OpStore {result} %result",
+            subgroup = const SUBGROUP,
+            direction = const DIRECTION,
+            value = in(reg) &value,
+            result = in(reg) &mut result,
+        }
+    }
+
+    result
+}
diff --git a/crates/spirv-std/src/vector.rs b/crates/spirv-std/src/vector.rs
index 7510953034..2ad544cb9a 100644
--- a/crates/spirv-std/src/vector.rs
+++ b/crates/spirv-std/src/vector.rs
@@ -71,7 +71,7 @@ unsafe impl Vector<i32, 3> for glam::IVec3 {}
 unsafe impl Vector<i32, 4> for glam::IVec4 {}
 
 /// Trait that implements slicing of a vector into a scalar or vector of lower dimensions, by
-/// ignoring the higter dimensions
+/// ignoring the higher dimensions
 pub trait VectorTruncateInto<T> {
     /// Slices the vector into a lower dimensional type by ignoring the higher components
     fn truncate_into(self) -> T;

From 34b269ddb4ad67c65c7f69988499b875c074b0c6 Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Tue, 11 Jun 2024 12:36:52 +0200
Subject: [PATCH 03/15] subgroup: remove all target_feature cfgs, replaced with
 docs

---
 crates/spirv-std/src/arch/subgroup.rs | 210 +++++++++++++-------------
 1 file changed, 108 insertions(+), 102 deletions(-)

diff --git a/crates/spirv-std/src/arch/subgroup.rs b/crates/spirv-std/src/arch/subgroup.rs
index e1b38d2946..52c75731c4 100644
--- a/crates/spirv-std/src/arch/subgroup.rs
+++ b/crates/spirv-std/src/arch/subgroup.rs
@@ -27,13 +27,16 @@ pub enum GroupOperation {
     // /// See [`GROUP_OPERATION_CLUSTERED_REDUCE`]
     // ClusteredReduce = 3,
     /// Reserved.
-    #[cfg(target_feature = "GroupNonUniformPartitionedNV")]
+    ///
+    /// Requires Capability `GroupNonUniformPartitionedNV`.
     PartitionedReduceNV = 6,
     /// Reserved.
-    #[cfg(target_feature = "GroupNonUniformPartitionedNV")]
+    ///
+    /// Requires Capability `GroupNonUniformPartitionedNV`.
     PartitionedInclusiveScanNV = 7,
     /// Reserved.
-    #[cfg(target_feature = "GroupNonUniformPartitionedNV")]
+    ///
+    /// Requires Capability `GroupNonUniformPartitionedNV`.
     PartitionedExclusiveScanNV = 8,
 }
 
@@ -53,7 +56,8 @@ pub const GROUP_OPERATION_CLUSTERED_REDUCE: u32 = 3;
 /// Result Type must be a Boolean type.
 ///
 /// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
-#[cfg(target_feature = "GroupNonUniform")]
+///
+/// Requires Capability `GroupNonUniform`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformElect")]
 #[inline]
@@ -82,7 +86,8 @@ pub unsafe fn subgroup_non_uniform_elect() -> bool {
 /// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
 ///
 /// Predicate must be a Boolean type.
-#[cfg(target_feature = "GroupNonUniformVote")]
+///
+/// Requires Capability `GroupNonUniformVote`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformAll")]
 #[inline]
@@ -113,7 +118,8 @@ pub unsafe fn subgroup_non_uniform_all(predicate: bool) -> bool {
 /// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
 ///
 /// Predicate must be a Boolean type.
-#[cfg(target_feature = "GroupNonUniformVote")]
+///
+/// Requires Capability `GroupNonUniformVote`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformAny")]
 #[inline]
@@ -144,7 +150,8 @@ pub unsafe fn subgroup_non_uniform_any(predicate: bool) -> bool {
 /// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
 ///
 /// Value must be a scalar or vector of floating-point type, integer type, or Boolean type. The compare operation is based on this type, and if it is a floating-point type, an ordered-and-equal compare is used.
-#[cfg(target_feature = "GroupNonUniformVote")]
+///
+/// Requires Capability `GroupNonUniformVote`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformAllEqual")]
 #[inline]
@@ -181,7 +188,8 @@ pub unsafe fn subgroup_non_uniform_all_equal<T: VectorOrScalar>(value: T) -> boo
 /// Before version 1.5, Id must come from a constant instruction. Starting with version 1.5, this restriction is lifted. However, behavior is undefined when Id is not dynamically uniform.
 ///
 /// The resulting value is undefined if Id is an inactive invocation, or is greater than or equal to the size of the group.
-#[cfg(target_feature = "GroupNonUniformBallot")]
+///
+/// Requires Capability `GroupNonUniformBallot`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBroadcast")]
 #[inline]
@@ -213,7 +221,8 @@ pub unsafe fn subgroup_non_uniform_broadcast<T: VectorOrScalar>(value: T, id: u3
 /// Execution is a Scope that identifies the group of invocations affected by this command. It must be Subgroup.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformBallot")]
+///
+/// Requires Capability `GroupNonUniformBallot`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBroadcastFirst")]
 #[inline]
@@ -245,7 +254,8 @@ pub unsafe fn subgroup_non_uniform_broadcast_first<T: VectorOrScalar>(value: T)
 /// Execution is a Scope that identifies the group of invocations affected by this command.
 ///
 /// Predicate must be a Boolean type.
-#[cfg(target_feature = "GroupNonUniformBallot")]
+///
+/// Requires Capability `GroupNonUniformBallot`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBallot")]
 #[inline]
@@ -280,7 +290,8 @@ pub unsafe fn subgroup_non_uniform_ballot(predicate: bool) -> SubgroupMask {
 /// Behavior is undefined unless Value is the same for all invocations that execute the same dynamic instance of this instruction.
 ///
 /// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
-#[cfg(target_feature = "GroupNonUniformBallot")]
+///
+/// Requires Capability `GroupNonUniformBallot`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformInverseBallot")]
 #[inline]
@@ -317,7 +328,8 @@ pub unsafe fn subgroup_non_uniform_inverse_ballot(subgroup_mask: SubgroupMask) -
 /// Index must be a scalar of integer type, whose Signedness operand is 0.
 ///
 /// The resulting value is undefined if Index is greater than or equal to the size of the group.
-#[cfg(target_feature = "GroupNonUniformBallot")]
+///
+/// Requires Capability `GroupNonUniformBallot`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBallotBitExtract")]
 #[inline]
@@ -357,7 +369,8 @@ pub unsafe fn subgroup_non_uniform_ballot_bit_extract(
 /// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
 ///
 /// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
-#[cfg(target_feature = "GroupNonUniformBallot")]
+///
+/// Requires Capability `GroupNonUniformBallot`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBallotBitCount")]
 #[inline]
@@ -393,7 +406,8 @@ pub unsafe fn subgroup_non_uniform_ballot_bit_count<const GROUP_OP: u32>(
 /// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
 ///
 /// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
-#[cfg(target_feature = "GroupNonUniformBallot")]
+///
+/// Requires Capability `GroupNonUniformBallot`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBallotFindLSB")]
 #[inline]
@@ -425,7 +439,8 @@ pub unsafe fn subgroup_non_uniform_ballot_find_lsb(subgroup_mask: SubgroupMask)
 /// Value must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
 ///
 /// Value is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the group) is the higher bit number of the last bitmask needed to represent all bits of the group invocations.
-#[cfg(target_feature = "GroupNonUniformBallot")]
+///
+/// Requires Capability `GroupNonUniformBallot`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBallotFindMSB")]
 #[inline]
@@ -459,7 +474,8 @@ pub unsafe fn subgroup_non_uniform_ballot_find_msb(subgroup_mask: SubgroupMask)
 /// Id must be a scalar of integer type, whose Signedness operand is 0.
 ///
 /// The resulting value is undefined if Id is an inactive invocation, or is greater than or equal to the size of the group.
-#[cfg(target_feature = "GroupNonUniformShuffle")]
+///
+/// Requires Capability `GroupNonUniformShuffle`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformShuffle")]
 #[inline]
@@ -495,7 +511,8 @@ pub unsafe fn subgroup_non_uniform_shuffle<T: VectorOrScalar>(value: T, id: u32)
 /// Mask must be a scalar of integer type, whose Signedness operand is 0.
 ///
 /// The resulting value is undefined if current invocation’s id within the group xor’ed with Mask is an inactive invocation, or is greater than or equal to the size of the group.
-#[cfg(target_feature = "GroupNonUniformShuffle")]
+///
+/// Requires Capability `GroupNonUniformShuffle`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformShuffleXor")]
 #[inline]
@@ -531,7 +548,8 @@ pub unsafe fn subgroup_non_uniform_shuffle_xor<T: VectorOrScalar>(value: T, mask
 /// Delta must be a scalar of integer type, whose Signedness operand is 0.
 ///
 /// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than the current invocation’s id within the group or if the selected lane is inactive.
-#[cfg(target_feature = "GroupNonUniformShuffleRelative")]
+///
+/// Requires Capability `GroupNonUniformShuffleRelative`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformShuffleUp")]
 #[inline]
@@ -567,7 +585,8 @@ pub unsafe fn subgroup_non_uniform_shuffle_up<T: VectorOrScalar>(value: T, delta
 /// Delta must be a scalar of integer type, whose Signedness operand is 0.
 ///
 /// Delta is treated as unsigned and the resulting value is undefined if Delta is greater than or equal to the size of the group, or if the current invocation’s id within the group + Delta is either an inactive invocation or greater than or equal to the size of the group.
-#[cfg(target_feature = "GroupNonUniformShuffleRelative")]
+///
+/// Requires Capability `GroupNonUniformShuffleRelative`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformShuffleDown")]
 #[inline]
@@ -601,7 +620,8 @@ pub unsafe fn subgroup_non_uniform_shuffle_down<T: VectorOrScalar>(value: T, del
 /// The identity I for Operation is 0.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformIAdd")]
 #[inline]
@@ -642,10 +662,8 @@ pub unsafe fn subgroup_non_uniform_i_add<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformIAdd")]
 #[inline]
@@ -686,7 +704,8 @@ pub unsafe fn subgroup_non_uniform_i_add_clustered<
 /// The identity I for Operation is 0.
 ///
 /// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFAdd")]
 #[inline]
@@ -727,10 +746,8 @@ pub unsafe fn subgroup_non_uniform_f_add<
 /// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFAdd")]
 #[inline]
@@ -771,7 +788,8 @@ pub unsafe fn subgroup_non_uniform_f_add_clustered<
 /// The identity I for Operation is 1.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformIMul")]
 #[inline]
@@ -812,10 +830,8 @@ pub unsafe fn subgroup_non_uniform_i_mul<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformIMul")]
 #[inline]
@@ -856,7 +872,8 @@ pub unsafe fn subgroup_non_uniform_i_mul_clustered<
 /// The identity I for Operation is 1.
 ///
 /// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMul")]
 #[inline]
@@ -897,10 +914,8 @@ pub unsafe fn subgroup_non_uniform_f_mul<
 /// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMul")]
 #[inline]
@@ -941,7 +956,8 @@ pub unsafe fn subgroup_non_uniform_f_mul_clustered<
 /// The identity I for Operation is INT_MAX.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformSMin")]
 #[inline]
@@ -982,10 +998,8 @@ pub unsafe fn subgroup_non_uniform_s_min<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformSMin")]
 #[inline]
@@ -1026,7 +1040,8 @@ pub unsafe fn subgroup_non_uniform_s_min_clustered<
 /// The identity I for Operation is UINT_MAX.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformUMin")]
 #[inline]
@@ -1067,10 +1082,8 @@ pub unsafe fn subgroup_non_uniform_u_min<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformUMin")]
 #[inline]
@@ -1111,7 +1124,8 @@ pub unsafe fn subgroup_non_uniform_u_min_clustered<
 /// The identity I for Operation is +INF.
 ///
 /// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMin")]
 #[inline]
@@ -1152,10 +1166,8 @@ pub unsafe fn subgroup_non_uniform_f_min<
 /// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMin")]
 #[inline]
@@ -1196,7 +1208,8 @@ pub unsafe fn subgroup_non_uniform_f_min_clustered<
 /// The identity I for Operation is INT_MIN.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformSMax")]
 #[inline]
@@ -1237,10 +1250,8 @@ pub unsafe fn subgroup_non_uniform_s_max<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformSMax")]
 #[inline]
@@ -1281,7 +1292,8 @@ pub unsafe fn subgroup_non_uniform_s_max_clustered<
 /// The identity I for Operation is 0.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformUMax")]
 #[inline]
@@ -1322,10 +1334,8 @@ pub unsafe fn subgroup_non_uniform_u_max<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformUMax")]
 #[inline]
@@ -1366,7 +1376,8 @@ pub unsafe fn subgroup_non_uniform_u_max_clustered<
 /// The identity I for Operation is -INF.
 ///
 /// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMax")]
 #[inline]
@@ -1405,10 +1416,8 @@ pub unsafe fn subgroup_non_uniform_f_max<
 /// The identity I for Operation is -INF.
 ///
 /// The type of Value must be the same as Result Type. The method used to perform the group operation on the contributed Value(s) from active invocations is implementation defined. From the set of Value(s) provided by active invocations within a subgroup, if for any two Values one of them is a NaN, the other is chosen. If all Value(s) that are used by the current invocation are NaN, then the result is an undefined value.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMax")]
 #[inline]
@@ -1449,7 +1458,8 @@ pub unsafe fn subgroup_non_uniform_f_max_clustered<
 /// The identity I for Operation is ~0.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseAnd")]
 #[inline]
@@ -1490,10 +1500,8 @@ pub unsafe fn subgroup_non_uniform_bitwise_and<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseAnd")]
 #[inline]
@@ -1534,7 +1542,8 @@ pub unsafe fn subgroup_non_uniform_bitwise_and_clustered<
 /// The identity I for Operation is 0.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseOr")]
 #[inline]
@@ -1575,10 +1584,8 @@ pub unsafe fn subgroup_non_uniform_bitwise_or<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseOr")]
 #[inline]
@@ -1619,7 +1626,8 @@ pub unsafe fn subgroup_non_uniform_bitwise_or_clustered<
 /// The identity I for Operation is 0.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseXor")]
 #[inline]
@@ -1660,10 +1668,8 @@ pub unsafe fn subgroup_non_uniform_bitwise_xor<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseXor")]
 #[inline]
@@ -1704,7 +1710,8 @@ pub unsafe fn subgroup_non_uniform_bitwise_xor_clustered<
 /// The identity I for Operation is ~0.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalAnd")]
 #[inline]
@@ -1745,10 +1752,8 @@ pub unsafe fn subgroup_non_uniform_logical_and<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalAnd")]
 #[inline]
@@ -1789,7 +1794,8 @@ pub unsafe fn subgroup_non_uniform_logical_and_clustered<
 /// The identity I for Operation is 0.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalOr")]
 #[inline]
@@ -1830,10 +1836,8 @@ pub unsafe fn subgroup_non_uniform_logical_or<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalOr")]
 #[inline]
@@ -1874,7 +1878,8 @@ pub unsafe fn subgroup_non_uniform_logical_or_clustered<
 /// The identity I for Operation is 0.
 ///
 /// The type of Value must be the same as Result Type.
-#[cfg(target_feature = "GroupNonUniformArithmetic")]
+///
+/// Requires Capability `GroupNonUniformArithmetic`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalXor")]
 #[inline]
@@ -1915,10 +1920,8 @@ pub unsafe fn subgroup_non_uniform_logical_xor<
 /// The type of Value must be the same as Result Type.
 ///
 /// ClusterSize is the size of cluster to use. ClusterSize must be a scalar of integer type, whose Signedness operand is 0. ClusterSize must come from a constant instruction. Behavior is undefined unless ClusterSize is at least 1 and a power of 2. If ClusterSize is greater than the size of the group, executing this instruction results in undefined behavior.
-#[cfg(all(
-    target_feature = "GroupNonUniformArithmetic",
-    target_feature = "GroupNonUniformClustered",
-))]
+///
+/// Requires Capability `GroupNonUniformArithmetic` and `GroupNonUniformClustered`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalXor")]
 #[inline]
@@ -1963,7 +1966,8 @@ pub unsafe fn subgroup_non_uniform_logical_xor_clustered<
 /// Before version 1.5, Index must come from a constant instruction. Starting with version 1.5, Index must be dynamically uniform.
 ///
 /// If the value of Index is greater than or equal to 4, or refers to an inactive invocation, the resulting value is undefined.
-#[cfg(target_feature = "GroupNonUniformQuad")]
+///
+/// Requires Capability `GroupNonUniformQuad`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformQuadBroadcast")]
 #[inline]
@@ -1995,7 +1999,8 @@ pub unsafe fn subgroup_non_uniform_quad_broadcast<T: VectorOrScalar>(value: T, i
 /// Direction must come from a constant instruction.
 ///
 /// The value returned in Result is the value provided to Value by another invocation in the same quad scope instance. The invocation providing this value is determined according to Direction.
-#[cfg(target_feature = "GroupNonUniformQuad")]
+///
+/// Requires Capability `GroupNonUniformQuad`.
 pub enum QuadDirection {
     /// A Direction of 0 indicates a horizontal swap;
     /// - Invocations with quad indices of 0 and 1 swap values
@@ -2040,7 +2045,8 @@ pub enum QuadDirection {
 /// Direction must be one of the above values.
 ///
 /// If an active invocation reads Value from an inactive invocation, the resulting value is undefined.
-#[cfg(target_feature = "GroupNonUniformQuad")]
+///
+/// Requires Capability `GroupNonUniformQuad`.
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformQuadSwap")]
 #[inline]

From d3a91ee9791c46530e282337ee0ddb09d23ecc8e Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Mon, 10 Jun 2024 22:25:52 +0200
Subject: [PATCH 04/15] subgroup: added all subgroupBarrier*() functions from
 glsl

---
 crates/spirv-std/src/arch/subgroup.rs | 114 +++++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 2 deletions(-)

diff --git a/crates/spirv-std/src/arch/subgroup.rs b/crates/spirv-std/src/arch/subgroup.rs
index 52c75731c4..4031576350 100644
--- a/crates/spirv-std/src/arch/subgroup.rs
+++ b/crates/spirv-std/src/arch/subgroup.rs
@@ -1,6 +1,7 @@
+use crate::arch::barrier;
 use crate::float::Float;
 use crate::integer::{Integer, SignedInteger, UnsignedInteger};
-use crate::memory::Scope;
+use crate::memory::{Scope, Semantics};
 use crate::scalar::VectorOrScalar;
 #[cfg(target_arch = "spirv")]
 use core::arch::asm;
@@ -49,7 +50,116 @@ pub enum GroupOperation {
 /// function, it was removed from the [`GroupOperation`] enum and instead resides individually.
 pub const GROUP_OPERATION_CLUSTERED_REDUCE: u32 = 3;
 
-// TODO barriers
+/// Only usable if the extension GL_KHR_shader_subgroup_basic is enabled.
+///
+/// The function subgroupBarrier() enforces that all active invocations within a
+/// subgroup must execute this function before any are allowed to continue their
+/// execution, and the results of any memory stores performed using coherent
+/// variables performed prior to the call will be visible to any future
+/// coherent access to the same memory performed by any other shader invocation
+/// within the same subgroup.
+///
+/// Requires Capability `GroupNonUniform`.
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "subgroupBarrier")]
+#[inline]
+pub unsafe fn subgroup_barrier() {
+    unsafe {
+        barrier::control_barrier::<
+            SUBGROUP,
+            SUBGROUP,
+            {
+                Semantics::ACQUIRE_RELEASE.bits()
+                    | Semantics::UNIFORM_MEMORY.bits()
+                    | Semantics::WORKGROUP_MEMORY.bits()
+                    | Semantics::IMAGE_MEMORY.bits()
+            },
+        >();
+    }
+}
+
+/// Only usable if the extension GL_KHR_shader_subgroup_basic is enabled.
+///
+/// The function subgroupMemoryBarrier() enforces the ordering of all memory
+/// transactions issued within a single shader invocation, as viewed by other
+/// invocations in the same subgroup.
+///
+/// Requires Capability `GroupNonUniform`.
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "subgroupMemoryBarrier")]
+#[inline]
+pub unsafe fn subgroup_memory_barrier() {
+    unsafe {
+        barrier::memory_barrier::<
+            SUBGROUP,
+            {
+                Semantics::ACQUIRE_RELEASE.bits()
+                    | Semantics::UNIFORM_MEMORY.bits()
+                    | Semantics::WORKGROUP_MEMORY.bits()
+                    | Semantics::IMAGE_MEMORY.bits()
+            },
+        >();
+    }
+}
+
+/// Only usable if the extension GL_KHR_shader_subgroup_basic is enabled.
+///
+/// The function subgroupMemoryBarrierBuffer() enforces the ordering of all
+/// memory transactions to buffer variables issued within a single shader
+/// invocation, as viewed by other invocations in the same subgroup.
+///
+/// Requires Capability `GroupNonUniform`.
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "subgroupMemoryBarrierBuffer")]
+#[inline]
+pub unsafe fn subgroup_memory_barrier_buffer() {
+    unsafe {
+        barrier::memory_barrier::<
+            SUBGROUP,
+            { Semantics::ACQUIRE_RELEASE.bits() | Semantics::UNIFORM_MEMORY.bits() },
+        >();
+    }
+}
+
+/// Only usable if the extension GL_KHR_shader_subgroup_basic is enabled.
+///
+/// The function subgroupMemoryBarrierShared() enforces the ordering of all
+/// memory transactions to shared variables issued within a single shader
+/// invocation, as viewed by other invocations in the same subgroup.
+///
+/// Only available in compute shaders.
+///
+/// Requires Capability `GroupNonUniform`.
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "subgroupMemoryBarrierShared")]
+#[inline]
+pub unsafe fn subgroup_memory_barrier_shared() {
+    unsafe {
+        barrier::memory_barrier::<
+            SUBGROUP,
+            { Semantics::ACQUIRE_RELEASE.bits() | Semantics::WORKGROUP_MEMORY.bits() },
+        >();
+    }
+}
+
+/// Only usable if the extension GL_KHR_shader_subgroup_basic is enabled.
+///
+/// The function subgroupMemoryBarrierImage() enforces the ordering of all
+/// memory transactions to images issued within a single shader invocation, as
+/// viewed by other invocations in the same subgroup.
+///
+/// Requires Capability `GroupNonUniform`.
+#[spirv_std_macros::gpu_only]
+#[doc(alias = "subgroupMemoryBarrierImage")]
+#[inline]
+pub unsafe fn subgroup_memory_barrier_image() {
+    unsafe {
+        barrier::memory_barrier::<
+            SUBGROUP,
+            { Semantics::ACQUIRE_RELEASE.bits() | Semantics::IMAGE_MEMORY.bits() },
+        >();
+    }
+}
 
 /// Result is true only in the active invocation with the lowest id in the group, otherwise result is false.
 ///

From 7f72f251f62a9ef3bfc0c75cf429a05ae2ada45f Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Tue, 11 Jun 2024 13:45:49 +0200
Subject: [PATCH 05/15] subgroup: added non group-op tests

---
 .../subgroup/subgroup_non_uniform_ballot.rs     | 17 +++++++++++++++++
 .../subgroup/subgroup_non_uniform_ballot.stderr | 10 ++++++++++
 .../subgroup_non_uniform_broadcast_first.rs     | 17 +++++++++++++++++
 .../subgroup_non_uniform_broadcast_first.stderr |  8 ++++++++
 .../arch/subgroup/subgroup_non_uniform_elect.rs | 16 ++++++++++++++++
 .../subgroup/subgroup_non_uniform_elect.stderr  |  7 +++++++
 6 files changed, 75 insertions(+)
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_ballot.rs
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_ballot.stderr
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.rs
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.stderr
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_elect.rs
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_elect.stderr

diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.rs
new file mode 100644
index 0000000000..9a59677134
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.rs
@@ -0,0 +1,17 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_ballot::subgroup_non_uniform_ballot
+
+use spirv_std::spirv;
+
+unsafe fn subgroup_non_uniform_ballot(predicate: bool) -> bool {
+    let ballot = spirv_std::arch::subgroup_non_uniform_ballot(predicate);
+    spirv_std::arch::subgroup_non_uniform_inverse_ballot(ballot)
+}
+
+#[spirv(compute(threads(1, 1, 1)))]
+pub fn main() {
+    unsafe {
+        subgroup_non_uniform_ballot(true);
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.stderr
new file mode 100644
index 0000000000..559b07c304
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.stderr
@@ -0,0 +1,10 @@
+%1 = OpFunction  %2  None %3
+%4 = OpFunctionParameter  %2
+%5 = OpLabel
+OpLine %6 376 8
+%7 = OpGroupNonUniformBallot  %8  %9 %4
+OpLine %6 412 8
+%10 = OpGroupNonUniformInverseBallot  %2  %9 %7
+OpNoLine
+OpReturnValue %10
+OpFunctionEnd
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.rs
new file mode 100644
index 0000000000..720d215f77
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.rs
@@ -0,0 +1,17 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_broadcast_first::subgroup_non_uniform_broadcast_first
+
+use glam::Vec3;
+use spirv_std::spirv;
+
+unsafe fn subgroup_non_uniform_broadcast_first(vec: Vec3) -> Vec3 {
+    spirv_std::arch::subgroup_non_uniform_broadcast_first::<Vec3>(vec)
+}
+
+#[spirv(compute(threads(1, 1, 1)))]
+pub fn main() {
+    unsafe {
+        subgroup_non_uniform_broadcast_first(Vec3::new(1., 2., 3.));
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.stderr
new file mode 100644
index 0000000000..829d283178
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.stderr
@@ -0,0 +1,8 @@
+%1 = OpFunction  %2  None %3
+%4 = OpFunctionParameter  %2
+%5 = OpLabel
+OpLine %6 343 8
+%7 = OpGroupNonUniformBroadcastFirst  %2  %8 %4
+OpNoLine
+OpReturnValue %7
+OpFunctionEnd
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_elect.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_elect.rs
new file mode 100644
index 0000000000..35e75a6e32
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_elect.rs
@@ -0,0 +1,16 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_elect::subgroup_non_uniform_elect
+
+use spirv_std::spirv;
+
+unsafe fn subgroup_non_uniform_elect() -> bool {
+    spirv_std::arch::subgroup_non_uniform_elect()
+}
+
+#[spirv(compute(threads(1, 1, 1)))]
+pub fn main() {
+    unsafe {
+        subgroup_non_uniform_elect();
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_elect.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_elect.stderr
new file mode 100644
index 0000000000..09efa455b0
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_elect.stderr
@@ -0,0 +1,7 @@
+%1 = OpFunction  %2  None %3
+%4 = OpLabel
+OpLine %5 178 8
+%6 = OpGroupNonUniformElect  %2  %7
+OpNoLine
+OpReturnValue %6
+OpFunctionEnd

From 21fa383c0f6697400b6859c49331bd3a138fdb6e Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Tue, 11 Jun 2024 13:44:20 +0200
Subject: [PATCH 06/15] subgroup: fixed asm for instructions taking GROUP_OP
 generic

---
 .../src/builder/spirv_asm.rs                  | 19 ++--
 crates/spirv-std/src/arch/subgroup.rs         | 99 +++++++------------
 2 files changed, 46 insertions(+), 72 deletions(-)

diff --git a/crates/rustc_codegen_spirv/src/builder/spirv_asm.rs b/crates/rustc_codegen_spirv/src/builder/spirv_asm.rs
index 623893e465..a8328dbec0 100644
--- a/crates/rustc_codegen_spirv/src/builder/spirv_asm.rs
+++ b/crates/rustc_codegen_spirv/src/builder/spirv_asm.rs
@@ -2,11 +2,13 @@ use super::Builder;
 use crate::builder_spirv::{BuilderCursor, SpirvValue};
 use crate::codegen_cx::CodegenCx;
 use crate::spirv_type::SpirvType;
+use num_traits::FromPrimitive;
 use rspirv::dr;
 use rspirv::grammar::{reflect, LogicalOperand, OperandKind, OperandQuantifier};
 use rspirv::spirv::{
-    FPFastMathMode, FragmentShadingRate, FunctionControl, ImageOperands, KernelProfilingInfo,
-    LoopControl, MemoryAccess, MemorySemantics, Op, RayFlags, SelectionControl, StorageClass, Word,
+    FPFastMathMode, FragmentShadingRate, FunctionControl, GroupOperation, ImageOperands,
+    KernelProfilingInfo, LoopControl, MemoryAccess, MemorySemantics, Op, RayFlags,
+    SelectionControl, StorageClass, Word,
 };
 use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece};
 use rustc_codegen_ssa::mir::place::PlaceRef;
@@ -1347,10 +1349,15 @@ impl<'cx, 'tcx> Builder<'cx, 'tcx> {
                 Ok(x) => inst.operands.push(dr::Operand::Scope(x)),
                 Err(()) => self.err(format!("unknown Scope {word}")),
             },
-            (OperandKind::GroupOperation, Some(word)) => match word.parse() {
-                Ok(x) => inst.operands.push(dr::Operand::GroupOperation(x)),
-                Err(()) => self.err(format!("unknown GroupOperation {word}")),
-            },
+            (OperandKind::GroupOperation, Some(word)) => {
+                match word.parse::<u32>().ok().and_then(GroupOperation::from_u32) {
+                    Some(id) => inst.operands.push(dr::Operand::GroupOperation(id)),
+                    None => match word.parse() {
+                        Ok(x) => inst.operands.push(dr::Operand::GroupOperation(x)),
+                        Err(()) => self.err(format!("unknown GroupOperation {word}")),
+                    },
+                }
+            }
             (OperandKind::KernelEnqueueFlags, Some(word)) => match word.parse() {
                 Ok(x) => inst.operands.push(dr::Operand::KernelEnqueueFlags(x)),
                 Err(()) => self.err(format!("unknown KernelEnqueueFlags {word}")),
diff --git a/crates/spirv-std/src/arch/subgroup.rs b/crates/spirv-std/src/arch/subgroup.rs
index 4031576350..3a13a4f595 100644
--- a/crates/spirv-std/src/arch/subgroup.rs
+++ b/crates/spirv-std/src/arch/subgroup.rs
@@ -493,9 +493,8 @@ pub unsafe fn subgroup_non_uniform_ballot_bit_count<const GROUP_OP: u32>(
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%subgroup_mask = OpLoad _ {subgroup_mask}",
-            "%result = OpGroupNonUniformBallotBitCount %u32 %subgroup %groupop %subgroup_mask",
+            "%result = OpGroupNonUniformBallotBitCount %u32 %subgroup {groupop} %subgroup_mask",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -747,9 +746,8 @@ pub unsafe fn subgroup_non_uniform_i_add<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformIAdd _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformIAdd _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -789,10 +787,9 @@ pub unsafe fn subgroup_non_uniform_i_add_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformIAdd _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformIAdd _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -831,9 +828,8 @@ pub unsafe fn subgroup_non_uniform_f_add<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformFAdd _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformFAdd _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -873,10 +869,9 @@ pub unsafe fn subgroup_non_uniform_f_add_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformFAdd _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformFAdd _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -915,9 +910,8 @@ pub unsafe fn subgroup_non_uniform_i_mul<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformIMul _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformIMul _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -957,10 +951,9 @@ pub unsafe fn subgroup_non_uniform_i_mul_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformIMul _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformIMul _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -999,9 +992,8 @@ pub unsafe fn subgroup_non_uniform_f_mul<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformFMul _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformFMul _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1041,10 +1033,9 @@ pub unsafe fn subgroup_non_uniform_f_mul_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformFMul _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformFMul _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -1083,9 +1074,8 @@ pub unsafe fn subgroup_non_uniform_s_min<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformSMin _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformSMin _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1125,10 +1115,9 @@ pub unsafe fn subgroup_non_uniform_s_min_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformSMin _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformSMin _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -1167,9 +1156,8 @@ pub unsafe fn subgroup_non_uniform_u_min<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformUMin _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformUMin _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1209,10 +1197,9 @@ pub unsafe fn subgroup_non_uniform_u_min_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformUMin _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformUMin _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -1251,9 +1238,8 @@ pub unsafe fn subgroup_non_uniform_f_min<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformFMin _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformFMin _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1293,10 +1279,9 @@ pub unsafe fn subgroup_non_uniform_f_min_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformFMin _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformFMin _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -1335,9 +1320,8 @@ pub unsafe fn subgroup_non_uniform_s_max<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformSMax _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformSMax _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1377,10 +1361,9 @@ pub unsafe fn subgroup_non_uniform_s_max_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformSMax _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformSMax _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -1419,9 +1402,8 @@ pub unsafe fn subgroup_non_uniform_u_max<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformUMax _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformUMax _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1461,10 +1443,9 @@ pub unsafe fn subgroup_non_uniform_u_max_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformUMax _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformUMax _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -1503,9 +1484,8 @@ pub unsafe fn subgroup_non_uniform_f_max<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformFMax _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformFMax _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1543,10 +1523,9 @@ pub unsafe fn subgroup_non_uniform_f_max_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformFMax _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformFMax _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -1585,9 +1564,8 @@ pub unsafe fn subgroup_non_uniform_bitwise_and<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformBitwiseAnd _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformBitwiseAnd _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1627,10 +1605,9 @@ pub unsafe fn subgroup_non_uniform_bitwise_and_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformBitwiseAnd _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformBitwiseAnd _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -1669,9 +1646,8 @@ pub unsafe fn subgroup_non_uniform_bitwise_or<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformBitwiseOr _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformBitwiseOr _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1711,10 +1687,9 @@ pub unsafe fn subgroup_non_uniform_bitwise_or_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformBitwiseOr _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformBitwiseOr _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -1753,9 +1728,8 @@ pub unsafe fn subgroup_non_uniform_bitwise_xor<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformBitwiseXor _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformBitwiseXor _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1795,10 +1769,9 @@ pub unsafe fn subgroup_non_uniform_bitwise_xor_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformBitwiseXor _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformBitwiseXor _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -1837,9 +1810,8 @@ pub unsafe fn subgroup_non_uniform_logical_and<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformLogicalAnd _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformLogicalAnd _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1879,10 +1851,9 @@ pub unsafe fn subgroup_non_uniform_logical_and_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformLogicalAnd _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformLogicalAnd _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -1921,9 +1892,8 @@ pub unsafe fn subgroup_non_uniform_logical_or<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformLogicalOr _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformLogicalOr _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -1963,10 +1933,9 @@ pub unsafe fn subgroup_non_uniform_logical_or_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformLogicalOr _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformLogicalOr _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,
@@ -2005,9 +1974,8 @@ pub unsafe fn subgroup_non_uniform_logical_xor<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
-            "%result = OpGroupNonUniformLogicalXor _ %subgroup %groupop %value",
+            "%result = OpGroupNonUniformLogicalXor _ %subgroup {groupop} %value",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OP,
@@ -2047,10 +2015,9 @@ pub unsafe fn subgroup_non_uniform_logical_xor_clustered<
         asm! {
             "%u32 = OpTypeInt 32 0",
             "%subgroup = OpConstant %u32 {subgroup}",
-            "%groupop = OpConstant %u32 {groupop}",
             "%value = OpLoad _ {value}",
             "%clustersize = OpConstant %u32 {clustersize}",
-            "%result = OpGroupNonUniformLogicalXor _ %subgroup %groupop %value %clustersize",
+            "%result = OpGroupNonUniformLogicalXor _ %subgroup {groupop} %value %clustersize",
             "OpStore {result} %result",
             subgroup = const SUBGROUP,
             groupop = const GROUP_OPERATION_CLUSTERED_REDUCE,

From a5881792347c0fa75d61a65954fa379ba940b2cd Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Tue, 11 Jun 2024 13:46:43 +0200
Subject: [PATCH 07/15] subgroup: added tests for group-op instructions

---
 .../subgroup_non_uniform_ballot_bit_count.rs  | 19 ++++++++++++++++++
 ...bgroup_non_uniform_ballot_bit_count.stderr |  8 ++++++++
 .../subgroup_non_uniform_i_add_clustered.rs   | 18 +++++++++++++++++
 ...ubgroup_non_uniform_i_add_clustered.stderr |  8 ++++++++
 ...bgroup_non_uniform_i_add_exclusive_scan.rs | 20 +++++++++++++++++++
 ...up_non_uniform_i_add_exclusive_scan.stderr |  8 ++++++++
 ...bgroup_non_uniform_i_add_inclusive_scan.rs | 20 +++++++++++++++++++
 ...up_non_uniform_i_add_inclusive_scan.stderr |  8 ++++++++
 .../subgroup_non_uniform_i_add_reduce.rs      | 18 +++++++++++++++++
 .../subgroup_non_uniform_i_add_reduce.stderr  |  8 ++++++++
 10 files changed, 135 insertions(+)
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.rs
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.stderr
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.rs
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.stderr
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.rs
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.stderr
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.rs
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.stderr
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.rs
 create mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr

diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.rs
new file mode 100644
index 0000000000..205869dbbd
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.rs
@@ -0,0 +1,19 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_ballot_bit_count::subgroup_non_uniform_ballot_bit_count
+
+use spirv_std::arch::{GroupOperation, SubgroupMask};
+use spirv_std::spirv;
+
+unsafe fn subgroup_non_uniform_ballot_bit_count(ballot: SubgroupMask) -> u32 {
+    spirv_std::arch::subgroup_non_uniform_ballot_bit_count::<{ GroupOperation::Reduce as u32 }>(
+        ballot,
+    )
+}
+
+#[spirv(compute(threads(1, 1, 1)))]
+pub fn main() {
+    unsafe {
+        subgroup_non_uniform_ballot_bit_count(spirv_std::arch::subgroup_non_uniform_ballot(true));
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.stderr
new file mode 100644
index 0000000000..4db8589b64
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.stderr
@@ -0,0 +1,8 @@
+%1 = OpFunction  %2  None %3
+%4 = OpFunctionParameter  %5
+%6 = OpLabel
+OpLine %7 493 8
+%8 = OpGroupNonUniformBallotBitCount  %2  %9 Reduce %4
+OpNoLine
+OpReturnValue %8
+OpFunctionEnd
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.rs
new file mode 100644
index 0000000000..6d7e9901b9
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.rs
@@ -0,0 +1,18 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+GroupNonUniformClustered,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_i_add_clustered::subgroup_non_uniform_i_add_clustered
+
+use glam::UVec3;
+use spirv_std::arch::{GroupOperation, SubgroupMask};
+use spirv_std::spirv;
+
+unsafe fn subgroup_non_uniform_i_add_clustered(value: u32) -> u32 {
+    spirv_std::arch::subgroup_non_uniform_i_add_clustered::<8, _>(value)
+}
+
+#[spirv(compute(threads(32, 1, 1)))]
+pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) {
+    unsafe {
+        subgroup_non_uniform_i_add_clustered(local_invocation_id.x);
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.stderr
new file mode 100644
index 0000000000..e154835797
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.stderr
@@ -0,0 +1,8 @@
+%1 = OpFunction  %2  None %3
+%4 = OpFunctionParameter  %2
+%5 = OpLabel
+OpLine %6 787 8
+%7 = OpGroupNonUniformIAdd  %2  %8 ClusteredReduce %4 %9
+OpNoLine
+OpReturnValue %7
+OpFunctionEnd
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.rs
new file mode 100644
index 0000000000..bbc93d7894
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.rs
@@ -0,0 +1,20 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_i_add_exclusive_scan::subgroup_non_uniform_i_add_exclusive_scan
+
+use glam::UVec3;
+use spirv_std::arch::{GroupOperation, SubgroupMask};
+use spirv_std::spirv;
+
+unsafe fn subgroup_non_uniform_i_add_exclusive_scan(value: u32) -> u32 {
+    spirv_std::arch::subgroup_non_uniform_i_add::<{ GroupOperation::ExclusiveScan as u32 }, _>(
+        value,
+    )
+}
+
+#[spirv(compute(threads(32, 1, 1)))]
+pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) {
+    unsafe {
+        subgroup_non_uniform_i_add_exclusive_scan(local_invocation_id.x);
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.stderr
new file mode 100644
index 0000000000..27b0aeab5e
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.stderr
@@ -0,0 +1,8 @@
+%1 = OpFunction  %2  None %3
+%4 = OpFunctionParameter  %2
+%5 = OpLabel
+OpLine %6 746 8
+%7 = OpGroupNonUniformIAdd  %2  %8 ExclusiveScan %4
+OpNoLine
+OpReturnValue %7
+OpFunctionEnd
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.rs
new file mode 100644
index 0000000000..1c2ed2d2c0
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.rs
@@ -0,0 +1,20 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_i_add_inclusive_scan::subgroup_non_uniform_i_add_inclusive_scan
+
+use glam::UVec3;
+use spirv_std::arch::{GroupOperation, SubgroupMask};
+use spirv_std::spirv;
+
+unsafe fn subgroup_non_uniform_i_add_inclusive_scan(value: u32) -> u32 {
+    spirv_std::arch::subgroup_non_uniform_i_add::<{ GroupOperation::InclusiveScan as u32 }, _>(
+        value,
+    )
+}
+
+#[spirv(compute(threads(32, 1, 1)))]
+pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) {
+    unsafe {
+        subgroup_non_uniform_i_add_inclusive_scan(local_invocation_id.x);
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.stderr
new file mode 100644
index 0000000000..fe0b5ea63d
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.stderr
@@ -0,0 +1,8 @@
+%1 = OpFunction  %2  None %3
+%4 = OpFunctionParameter  %2
+%5 = OpLabel
+OpLine %6 746 8
+%7 = OpGroupNonUniformIAdd  %2  %8 InclusiveScan %4
+OpNoLine
+OpReturnValue %7
+OpFunctionEnd
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.rs
new file mode 100644
index 0000000000..ef0d072411
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.rs
@@ -0,0 +1,18 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_i_add_reduce::subgroup_non_uniform_i_add_reduce
+
+use glam::UVec3;
+use spirv_std::arch::{GroupOperation, SubgroupMask};
+use spirv_std::spirv;
+
+unsafe fn subgroup_non_uniform_i_add_reduce(value: u32) -> u32 {
+    spirv_std::arch::subgroup_non_uniform_i_add::<{ GroupOperation::Reduce as u32 }, _>(value)
+}
+
+#[spirv(compute(threads(32, 1, 1)))]
+pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) {
+    unsafe {
+        subgroup_non_uniform_i_add_reduce(local_invocation_id.x);
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr
new file mode 100644
index 0000000000..45f7e660b5
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr
@@ -0,0 +1,8 @@
+%1 = OpFunction  %2  None %3
+%4 = OpFunctionParameter  %2
+%5 = OpLabel
+OpLine %6 746 8
+%7 = OpGroupNonUniformIAdd  %2  %8 Reduce %4
+OpNoLine
+OpReturnValue %7
+OpFunctionEnd

From 37dd512681d6a47fae66a55a2a1b4b8bff374e47 Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Tue, 11 Jun 2024 12:56:11 +0200
Subject: [PATCH 08/15] gitignore: added rustc-ice* error reports

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index d93c2cc44c..410633c8d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ target/
 .vim/
 tests/Cargo.lock
 .github/install-spirv-tools/Cargo.lock
+rustc-ice-*.txt

From 04b8288868fcb4aca8d268c974a3b9c59391565c Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Tue, 11 Jun 2024 13:07:10 +0200
Subject: [PATCH 09/15] subgroup: added test for subgroup buildins

---
 tests/ui/arch/subgroup/subgroup_builtins.rs | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 tests/ui/arch/subgroup/subgroup_builtins.rs

diff --git a/tests/ui/arch/subgroup/subgroup_builtins.rs b/tests/ui/arch/subgroup/subgroup_builtins.rs
new file mode 100644
index 0000000000..7524fa7b5f
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_builtins.rs
@@ -0,0 +1,17 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model
+
+use spirv_std::arch::SubgroupMask;
+use spirv_std::spirv;
+
+#[spirv(compute(threads(1, 1, 1)))]
+pub fn main(
+    #[spirv(subgroup_id)] subgroup_id: u32,
+    #[spirv(subgroup_local_invocation_id)] subgroup_local_invocation_id: u32,
+    #[spirv(subgroup_eq_mask)] subgroup_eq_mask: SubgroupMask,
+    #[spirv(subgroup_ge_mask)] subgroup_ge_mask: SubgroupMask,
+    #[spirv(subgroup_gt_mask)] subgroup_gt_mask: SubgroupMask,
+    #[spirv(subgroup_le_mask)] subgroup_le_mask: SubgroupMask,
+    #[spirv(subgroup_lt_mask)] subgroup_lt_mask: SubgroupMask,
+) {
+}

From 3ee9b786f1e71690e1c8ab93f1f4c8cac74e6657 Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Tue, 11 Jun 2024 13:11:09 +0200
Subject: [PATCH 10/15] subgroup: make SubgroupMask a struct to prevent
 implicit casts to and from UVec4

---
 crates/spirv-std/src/arch/subgroup.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/spirv-std/src/arch/subgroup.rs b/crates/spirv-std/src/arch/subgroup.rs
index 3a13a4f595..c35f734211 100644
--- a/crates/spirv-std/src/arch/subgroup.rs
+++ b/crates/spirv-std/src/arch/subgroup.rs
@@ -8,9 +8,10 @@ use core::arch::asm;
 
 const SUBGROUP: u32 = Scope::Subgroup as u32;
 
-/// GroupMask is a [`glam::UVec4`] representing a bitmask of all invocations within a subgroup.
+/// SubgroupMask is a [`glam::UVec4`] representing a bitmask of all invocations within a subgroup.
 /// Mostly used in group ballot operations.
-pub type SubgroupMask = glam::UVec4;
+#[derive(Copy, Clone, Default, Eq, PartialEq)]
+pub struct SubgroupMask(pub glam::UVec4);
 
 /// Defines the class of group operation.
 #[non_exhaustive]
@@ -24,7 +25,6 @@ pub enum GroupOperation {
     /// A binary operation with an identity I and n (where n is the size of the workgroup)
     /// elements[a0, a1, … an-1] resulting in [I, a0, (a0 op a1), … (a0 op a1 op … op an-2)].
     ExclusiveScan = 2,
-
     // /// See [`GROUP_OPERATION_CLUSTERED_REDUCE`]
     // ClusteredReduce = 3,
     /// Reserved.

From 64c0e6883bf9bb470e48b1893ff729f02516719f Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Wed, 12 Jun 2024 15:41:36 +0200
Subject: [PATCH 11/15] subgroup: fixed clippy lints

---
 crates/spirv-std/src/arch/subgroup.rs                        | 5 ++++-
 tests/ui/arch/subgroup/subgroup_non_uniform_ballot.stderr    | 4 ++--
 .../subgroup/subgroup_non_uniform_ballot_bit_count.stderr    | 2 +-
 .../subgroup/subgroup_non_uniform_broadcast_first.stderr     | 2 +-
 tests/ui/arch/subgroup/subgroup_non_uniform_elect.stderr     | 2 +-
 .../subgroup/subgroup_non_uniform_i_add_clustered.stderr     | 2 +-
 .../subgroup_non_uniform_i_add_exclusive_scan.stderr         | 2 +-
 .../subgroup_non_uniform_i_add_inclusive_scan.stderr         | 2 +-
 .../arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr   | 2 +-
 9 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/crates/spirv-std/src/arch/subgroup.rs b/crates/spirv-std/src/arch/subgroup.rs
index c35f734211..78d629657e 100644
--- a/crates/spirv-std/src/arch/subgroup.rs
+++ b/crates/spirv-std/src/arch/subgroup.rs
@@ -1,14 +1,17 @@
+#[cfg(target_arch = "spirv")]
 use crate::arch::barrier;
 use crate::float::Float;
 use crate::integer::{Integer, SignedInteger, UnsignedInteger};
+#[cfg(target_arch = "spirv")]
 use crate::memory::{Scope, Semantics};
 use crate::scalar::VectorOrScalar;
 #[cfg(target_arch = "spirv")]
 use core::arch::asm;
 
+#[cfg(target_arch = "spirv")]
 const SUBGROUP: u32 = Scope::Subgroup as u32;
 
-/// SubgroupMask is a [`glam::UVec4`] representing a bitmask of all invocations within a subgroup.
+/// `SubgroupMask` is a [`glam::UVec4`] representing a bitmask of all invocations within a subgroup.
 /// Mostly used in group ballot operations.
 #[derive(Copy, Clone, Default, Eq, PartialEq)]
 pub struct SubgroupMask(pub glam::UVec4);
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.stderr
index 559b07c304..13676166e7 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.stderr
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.stderr
@@ -1,9 +1,9 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %2
 %5 = OpLabel
-OpLine %6 376 8
+OpLine %6 379 8
 %7 = OpGroupNonUniformBallot  %8  %9 %4
-OpLine %6 412 8
+OpLine %6 415 8
 %10 = OpGroupNonUniformInverseBallot  %2  %9 %7
 OpNoLine
 OpReturnValue %10
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.stderr
index 4db8589b64..b3614d4eed 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.stderr
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.stderr
@@ -1,7 +1,7 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %5
 %6 = OpLabel
-OpLine %7 493 8
+OpLine %7 496 8
 %8 = OpGroupNonUniformBallotBitCount  %2  %9 Reduce %4
 OpNoLine
 OpReturnValue %8
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.stderr
index 829d283178..84f784d58e 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.stderr
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.stderr
@@ -1,7 +1,7 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %2
 %5 = OpLabel
-OpLine %6 343 8
+OpLine %6 346 8
 %7 = OpGroupNonUniformBroadcastFirst  %2  %8 %4
 OpNoLine
 OpReturnValue %7
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_elect.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_elect.stderr
index 09efa455b0..73bf0b2778 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_elect.stderr
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_elect.stderr
@@ -1,6 +1,6 @@
 %1 = OpFunction  %2  None %3
 %4 = OpLabel
-OpLine %5 178 8
+OpLine %5 181 8
 %6 = OpGroupNonUniformElect  %2  %7
 OpNoLine
 OpReturnValue %6
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.stderr
index e154835797..6a1216d607 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.stderr
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.stderr
@@ -1,7 +1,7 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %2
 %5 = OpLabel
-OpLine %6 787 8
+OpLine %6 790 8
 %7 = OpGroupNonUniformIAdd  %2  %8 ClusteredReduce %4 %9
 OpNoLine
 OpReturnValue %7
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.stderr
index 27b0aeab5e..7efe91dbb5 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.stderr
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.stderr
@@ -1,7 +1,7 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %2
 %5 = OpLabel
-OpLine %6 746 8
+OpLine %6 749 8
 %7 = OpGroupNonUniformIAdd  %2  %8 ExclusiveScan %4
 OpNoLine
 OpReturnValue %7
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.stderr
index fe0b5ea63d..8c771f386e 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.stderr
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.stderr
@@ -1,7 +1,7 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %2
 %5 = OpLabel
-OpLine %6 746 8
+OpLine %6 749 8
 %7 = OpGroupNonUniformIAdd  %2  %8 InclusiveScan %4
 OpNoLine
 OpReturnValue %7
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr
index 45f7e660b5..40942f30fc 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr
+++ b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr
@@ -1,7 +1,7 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %2
 %5 = OpLabel
-OpLine %6 746 8
+OpLine %6 749 8
 %7 = OpGroupNonUniformIAdd  %2  %8 Reduce %4
 OpNoLine
 OpReturnValue %7

From 8ba5e865ff452d82b86898a932f616bebecb4bdc Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Fri, 20 Sep 2024 13:23:03 +0200
Subject: [PATCH 12/15] subgroup: drop the `non_uniform` from all subgroup
 functions, matching glsl

---
 crates/spirv-std/src/arch/subgroup.rs         | 137 +++++++-----------
 tests/ui/arch/subgroup/subgroup_ballot.rs     |  17 +++
 ...m_ballot.stderr => subgroup_ballot.stderr} |   0
 .../subgroup/subgroup_ballot_bit_count.rs     |  17 +++
 ...tderr => subgroup_ballot_bit_count.stderr} |   2 +-
 .../arch/subgroup/subgroup_broadcast_first.rs |  17 +++
 ...stderr => subgroup_broadcast_first.stderr} |   0
 tests/ui/arch/subgroup/subgroup_elect.rs      |  16 ++
 ...orm_elect.stderr => subgroup_elect.stderr} |   0
 ...ustered.rs => subgroup_i_add_clustered.rs} |   8 +-
 ...stderr => subgroup_i_add_clustered.stderr} |   2 +-
 ...ce.rs => subgroup_i_add_exclusive_scan.rs} |   8 +-
 ...r => subgroup_i_add_exclusive_scan.stderr} |   2 +-
 .../subgroup/subgroup_i_add_inclusive_scan.rs |  18 +++
 ...r => subgroup_i_add_inclusive_scan.stderr} |   2 +-
 .../ui/arch/subgroup/subgroup_i_add_reduce.rs |  18 +++
 ...ce.stderr => subgroup_i_add_reduce.stderr} |   2 +-
 .../subgroup/subgroup_non_uniform_ballot.rs   |  17 ---
 .../subgroup_non_uniform_ballot_bit_count.rs  |  19 ---
 .../subgroup_non_uniform_broadcast_first.rs   |  17 ---
 .../subgroup/subgroup_non_uniform_elect.rs    |  16 --
 ...bgroup_non_uniform_i_add_exclusive_scan.rs |  20 ---
 ...bgroup_non_uniform_i_add_inclusive_scan.rs |  20 ---
 23 files changed, 166 insertions(+), 209 deletions(-)
 create mode 100644 tests/ui/arch/subgroup/subgroup_ballot.rs
 rename tests/ui/arch/subgroup/{subgroup_non_uniform_ballot.stderr => subgroup_ballot.stderr} (100%)
 create mode 100644 tests/ui/arch/subgroup/subgroup_ballot_bit_count.rs
 rename tests/ui/arch/subgroup/{subgroup_non_uniform_ballot_bit_count.stderr => subgroup_ballot_bit_count.stderr} (91%)
 create mode 100644 tests/ui/arch/subgroup/subgroup_broadcast_first.rs
 rename tests/ui/arch/subgroup/{subgroup_non_uniform_broadcast_first.stderr => subgroup_broadcast_first.stderr} (100%)
 create mode 100644 tests/ui/arch/subgroup/subgroup_elect.rs
 rename tests/ui/arch/subgroup/{subgroup_non_uniform_elect.stderr => subgroup_elect.stderr} (100%)
 rename tests/ui/arch/subgroup/{subgroup_non_uniform_i_add_clustered.rs => subgroup_i_add_clustered.rs} (53%)
 rename tests/ui/arch/subgroup/{subgroup_non_uniform_i_add_clustered.stderr => subgroup_i_add_clustered.stderr} (91%)
 rename tests/ui/arch/subgroup/{subgroup_non_uniform_i_add_reduce.rs => subgroup_i_add_exclusive_scan.rs} (50%)
 rename tests/ui/arch/subgroup/{subgroup_non_uniform_i_add_exclusive_scan.stderr => subgroup_i_add_exclusive_scan.stderr} (91%)
 create mode 100644 tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.rs
 rename tests/ui/arch/subgroup/{subgroup_non_uniform_i_add_inclusive_scan.stderr => subgroup_i_add_inclusive_scan.stderr} (91%)
 create mode 100644 tests/ui/arch/subgroup/subgroup_i_add_reduce.rs
 rename tests/ui/arch/subgroup/{subgroup_non_uniform_i_add_reduce.stderr => subgroup_i_add_reduce.stderr} (90%)
 delete mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_ballot.rs
 delete mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.rs
 delete mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.rs
 delete mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_elect.rs
 delete mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.rs
 delete mode 100644 tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.rs

diff --git a/crates/spirv-std/src/arch/subgroup.rs b/crates/spirv-std/src/arch/subgroup.rs
index 78d629657e..a8eb38fc2a 100644
--- a/crates/spirv-std/src/arch/subgroup.rs
+++ b/crates/spirv-std/src/arch/subgroup.rs
@@ -174,7 +174,7 @@ pub unsafe fn subgroup_memory_barrier_image() {
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformElect")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_elect() -> bool {
+pub unsafe fn subgroup_elect() -> bool {
     let mut result = false;
 
     unsafe {
@@ -204,7 +204,7 @@ pub unsafe fn subgroup_non_uniform_elect() -> bool {
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformAll")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_all(predicate: bool) -> bool {
+pub unsafe fn subgroup_all(predicate: bool) -> bool {
     let mut result = false;
 
     unsafe {
@@ -236,7 +236,7 @@ pub unsafe fn subgroup_non_uniform_all(predicate: bool) -> bool {
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformAny")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_any(predicate: bool) -> bool {
+pub unsafe fn subgroup_any(predicate: bool) -> bool {
     let mut result = false;
 
     unsafe {
@@ -268,7 +268,7 @@ pub unsafe fn subgroup_non_uniform_any(predicate: bool) -> bool {
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformAllEqual")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_all_equal<T: VectorOrScalar>(value: T) -> bool {
+pub unsafe fn subgroup_all_equal<T: VectorOrScalar>(value: T) -> bool {
     let mut result = false;
 
     unsafe {
@@ -306,7 +306,7 @@ pub unsafe fn subgroup_non_uniform_all_equal<T: VectorOrScalar>(value: T) -> boo
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBroadcast")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_broadcast<T: VectorOrScalar>(value: T, id: u32) -> T {
+pub unsafe fn subgroup_broadcast<T: VectorOrScalar>(value: T, id: u32) -> T {
     let mut result = T::default();
 
     unsafe {
@@ -339,7 +339,7 @@ pub unsafe fn subgroup_non_uniform_broadcast<T: VectorOrScalar>(value: T, id: u3
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBroadcastFirst")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_broadcast_first<T: VectorOrScalar>(value: T) -> T {
+pub unsafe fn subgroup_broadcast_first<T: VectorOrScalar>(value: T) -> T {
     let mut result = T::default();
 
     unsafe {
@@ -372,7 +372,7 @@ pub unsafe fn subgroup_non_uniform_broadcast_first<T: VectorOrScalar>(value: T)
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBallot")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_ballot(predicate: bool) -> SubgroupMask {
+pub unsafe fn subgroup_ballot(predicate: bool) -> SubgroupMask {
     let mut result = SubgroupMask::default();
 
     unsafe {
@@ -408,7 +408,7 @@ pub unsafe fn subgroup_non_uniform_ballot(predicate: bool) -> SubgroupMask {
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformInverseBallot")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_inverse_ballot(subgroup_mask: SubgroupMask) -> bool {
+pub unsafe fn subgroup_inverse_ballot(subgroup_mask: SubgroupMask) -> bool {
     let mut result = false;
 
     unsafe {
@@ -446,10 +446,7 @@ pub unsafe fn subgroup_non_uniform_inverse_ballot(subgroup_mask: SubgroupMask) -
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBallotBitExtract")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_ballot_bit_extract(
-    subgroup_mask: SubgroupMask,
-    id: u32,
-) -> bool {
+pub unsafe fn subgroup_ballot_bit_extract(subgroup_mask: SubgroupMask, id: u32) -> bool {
     let mut result = false;
 
     unsafe {
@@ -487,9 +484,7 @@ pub unsafe fn subgroup_non_uniform_ballot_bit_extract(
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBallotBitCount")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_ballot_bit_count<const GROUP_OP: u32>(
-    subgroup_mask: SubgroupMask,
-) -> u32 {
+pub unsafe fn subgroup_ballot_bit_count<const GROUP_OP: u32>(subgroup_mask: SubgroupMask) -> u32 {
     let mut result = 0;
 
     unsafe {
@@ -523,7 +518,7 @@ pub unsafe fn subgroup_non_uniform_ballot_bit_count<const GROUP_OP: u32>(
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBallotFindLSB")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_ballot_find_lsb(subgroup_mask: SubgroupMask) -> u32 {
+pub unsafe fn subgroup_ballot_find_lsb(subgroup_mask: SubgroupMask) -> u32 {
     let mut result = 0;
 
     unsafe {
@@ -556,7 +551,7 @@ pub unsafe fn subgroup_non_uniform_ballot_find_lsb(subgroup_mask: SubgroupMask)
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBallotFindMSB")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_ballot_find_msb(subgroup_mask: SubgroupMask) -> u32 {
+pub unsafe fn subgroup_ballot_find_msb(subgroup_mask: SubgroupMask) -> u32 {
     let mut result = 0;
 
     unsafe {
@@ -591,7 +586,7 @@ pub unsafe fn subgroup_non_uniform_ballot_find_msb(subgroup_mask: SubgroupMask)
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformShuffle")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_shuffle<T: VectorOrScalar>(value: T, id: u32) -> T {
+pub unsafe fn subgroup_shuffle<T: VectorOrScalar>(value: T, id: u32) -> T {
     let mut result = T::default();
 
     unsafe {
@@ -628,7 +623,7 @@ pub unsafe fn subgroup_non_uniform_shuffle<T: VectorOrScalar>(value: T, id: u32)
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformShuffleXor")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_shuffle_xor<T: VectorOrScalar>(value: T, mask: u32) -> T {
+pub unsafe fn subgroup_shuffle_xor<T: VectorOrScalar>(value: T, mask: u32) -> T {
     let mut result = T::default();
 
     unsafe {
@@ -665,7 +660,7 @@ pub unsafe fn subgroup_non_uniform_shuffle_xor<T: VectorOrScalar>(value: T, mask
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformShuffleUp")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_shuffle_up<T: VectorOrScalar>(value: T, delta: u32) -> T {
+pub unsafe fn subgroup_shuffle_up<T: VectorOrScalar>(value: T, delta: u32) -> T {
     let mut result = T::default();
 
     unsafe {
@@ -702,7 +697,7 @@ pub unsafe fn subgroup_non_uniform_shuffle_up<T: VectorOrScalar>(value: T, delta
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformShuffleDown")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_shuffle_down<T: VectorOrScalar>(value: T, delta: u32) -> T {
+pub unsafe fn subgroup_shuffle_down<T: VectorOrScalar>(value: T, delta: u32) -> T {
     let mut result = T::default();
 
     unsafe {
@@ -737,10 +732,7 @@ pub unsafe fn subgroup_non_uniform_shuffle_down<T: VectorOrScalar>(value: T, del
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformIAdd")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_i_add<
-    const GROUP_OP: u32,
-    I: VectorOrScalar<Scalar = impl Integer>,
->(
+pub unsafe fn subgroup_i_add<const GROUP_OP: u32, I: VectorOrScalar<Scalar = impl Integer>>(
     value: I,
 ) -> I {
     let mut result = I::default();
@@ -778,7 +770,7 @@ pub unsafe fn subgroup_non_uniform_i_add<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformIAdd")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_i_add_clustered<
+pub unsafe fn subgroup_i_add_clustered<
     const CLUSTER_SIZE: u32,
     I: VectorOrScalar<Scalar = impl Integer>,
 >(
@@ -819,10 +811,7 @@ pub unsafe fn subgroup_non_uniform_i_add_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFAdd")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_f_add<
-    const GROUP_OP: u32,
-    F: VectorOrScalar<Scalar = impl Float>,
->(
+pub unsafe fn subgroup_f_add<const GROUP_OP: u32, F: VectorOrScalar<Scalar = impl Float>>(
     value: F,
 ) -> F {
     let mut result = F::default();
@@ -860,7 +849,7 @@ pub unsafe fn subgroup_non_uniform_f_add<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFAdd")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_f_add_clustered<
+pub unsafe fn subgroup_f_add_clustered<
     const CLUSTER_SIZE: u32,
     F: VectorOrScalar<Scalar = impl Float>,
 >(
@@ -901,10 +890,7 @@ pub unsafe fn subgroup_non_uniform_f_add_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformIMul")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_i_mul<
-    const GROUP_OP: u32,
-    I: VectorOrScalar<Scalar = impl Integer>,
->(
+pub unsafe fn subgroup_i_mul<const GROUP_OP: u32, I: VectorOrScalar<Scalar = impl Integer>>(
     value: I,
 ) -> I {
     let mut result = I::default();
@@ -942,7 +928,7 @@ pub unsafe fn subgroup_non_uniform_i_mul<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformIMul")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_i_mul_clustered<
+pub unsafe fn subgroup_i_mul_clustered<
     const CLUSTER_SIZE: u32,
     I: VectorOrScalar<Scalar = impl Integer>,
 >(
@@ -983,10 +969,7 @@ pub unsafe fn subgroup_non_uniform_i_mul_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMul")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_f_mul<
-    const GROUP_OP: u32,
-    F: VectorOrScalar<Scalar = impl Float>,
->(
+pub unsafe fn subgroup_f_mul<const GROUP_OP: u32, F: VectorOrScalar<Scalar = impl Float>>(
     value: F,
 ) -> F {
     let mut result = F::default();
@@ -1024,7 +1007,7 @@ pub unsafe fn subgroup_non_uniform_f_mul<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMul")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_f_mul_clustered<
+pub unsafe fn subgroup_f_mul_clustered<
     const CLUSTER_SIZE: u32,
     F: VectorOrScalar<Scalar = impl Float>,
 >(
@@ -1065,7 +1048,7 @@ pub unsafe fn subgroup_non_uniform_f_mul_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformSMin")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_s_min<
+pub unsafe fn subgroup_s_min<
     const GROUP_OP: u32,
     S: VectorOrScalar<Scalar = impl SignedInteger>,
 >(
@@ -1106,7 +1089,7 @@ pub unsafe fn subgroup_non_uniform_s_min<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformSMin")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_s_min_clustered<
+pub unsafe fn subgroup_s_min_clustered<
     const CLUSTER_SIZE: u32,
     S: VectorOrScalar<Scalar = impl SignedInteger>,
 >(
@@ -1147,7 +1130,7 @@ pub unsafe fn subgroup_non_uniform_s_min_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformUMin")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_u_min<
+pub unsafe fn subgroup_u_min<
     const GROUP_OP: u32,
     U: VectorOrScalar<Scalar = impl UnsignedInteger>,
 >(
@@ -1188,7 +1171,7 @@ pub unsafe fn subgroup_non_uniform_u_min<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformUMin")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_u_min_clustered<
+pub unsafe fn subgroup_u_min_clustered<
     const CLUSTER_SIZE: u32,
     U: VectorOrScalar<Scalar = impl UnsignedInteger>,
 >(
@@ -1229,10 +1212,7 @@ pub unsafe fn subgroup_non_uniform_u_min_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMin")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_f_min<
-    const GROUP_OP: u32,
-    F: VectorOrScalar<Scalar = impl Float>,
->(
+pub unsafe fn subgroup_f_min<const GROUP_OP: u32, F: VectorOrScalar<Scalar = impl Float>>(
     value: F,
 ) -> F {
     let mut result = F::default();
@@ -1270,7 +1250,7 @@ pub unsafe fn subgroup_non_uniform_f_min<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMin")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_f_min_clustered<
+pub unsafe fn subgroup_f_min_clustered<
     const CLUSTER_SIZE: u32,
     F: VectorOrScalar<Scalar = impl Float>,
 >(
@@ -1311,7 +1291,7 @@ pub unsafe fn subgroup_non_uniform_f_min_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformSMax")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_s_max<
+pub unsafe fn subgroup_s_max<
     const GROUP_OP: u32,
     S: VectorOrScalar<Scalar = impl SignedInteger>,
 >(
@@ -1352,7 +1332,7 @@ pub unsafe fn subgroup_non_uniform_s_max<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformSMax")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_s_max_clustered<
+pub unsafe fn subgroup_s_max_clustered<
     const CLUSTER_SIZE: u32,
     S: VectorOrScalar<Scalar = impl SignedInteger>,
 >(
@@ -1393,7 +1373,7 @@ pub unsafe fn subgroup_non_uniform_s_max_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformUMax")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_u_max<
+pub unsafe fn subgroup_u_max<
     const GROUP_OP: u32,
     U: VectorOrScalar<Scalar = impl UnsignedInteger>,
 >(
@@ -1434,7 +1414,7 @@ pub unsafe fn subgroup_non_uniform_u_max<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformUMax")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_u_max_clustered<
+pub unsafe fn subgroup_u_max_clustered<
     const CLUSTER_SIZE: u32,
     U: VectorOrScalar<Scalar = impl UnsignedInteger>,
 >(
@@ -1475,10 +1455,7 @@ pub unsafe fn subgroup_non_uniform_u_max_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMax")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_f_max<
-    const GROUP_OP: u32,
-    F: VectorOrScalar<Scalar = impl Float>,
->(
+pub unsafe fn subgroup_f_max<const GROUP_OP: u32, F: VectorOrScalar<Scalar = impl Float>>(
     value: F,
 ) -> F {
     let mut result = F::default();
@@ -1514,7 +1491,7 @@ pub unsafe fn subgroup_non_uniform_f_max<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformFMax")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_f_max_clustered<
+pub unsafe fn subgroup_f_max_clustered<
     const CLUSTER_SIZE: u32,
     F: VectorOrScalar<Scalar = impl Float>,
 >(
@@ -1555,7 +1532,7 @@ pub unsafe fn subgroup_non_uniform_f_max_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseAnd")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_bitwise_and<
+pub unsafe fn subgroup_bitwise_and<
     const GROUP_OP: u32,
     I: VectorOrScalar<Scalar = impl Integer>,
 >(
@@ -1596,7 +1573,7 @@ pub unsafe fn subgroup_non_uniform_bitwise_and<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseAnd")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_bitwise_and_clustered<
+pub unsafe fn subgroup_bitwise_and_clustered<
     const CLUSTER_SIZE: u32,
     I: VectorOrScalar<Scalar = impl Integer>,
 >(
@@ -1637,10 +1614,7 @@ pub unsafe fn subgroup_non_uniform_bitwise_and_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseOr")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_bitwise_or<
-    const GROUP_OP: u32,
-    I: VectorOrScalar<Scalar = impl Integer>,
->(
+pub unsafe fn subgroup_bitwise_or<const GROUP_OP: u32, I: VectorOrScalar<Scalar = impl Integer>>(
     value: I,
 ) -> I {
     let mut result = I::default();
@@ -1678,7 +1652,7 @@ pub unsafe fn subgroup_non_uniform_bitwise_or<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseOr")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_bitwise_or_clustered<
+pub unsafe fn subgroup_bitwise_or_clustered<
     const CLUSTER_SIZE: u32,
     I: VectorOrScalar<Scalar = impl Integer>,
 >(
@@ -1719,7 +1693,7 @@ pub unsafe fn subgroup_non_uniform_bitwise_or_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseXor")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_bitwise_xor<
+pub unsafe fn subgroup_bitwise_xor<
     const GROUP_OP: u32,
     I: VectorOrScalar<Scalar = impl Integer>,
 >(
@@ -1760,7 +1734,7 @@ pub unsafe fn subgroup_non_uniform_bitwise_xor<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformBitwiseXor")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_bitwise_xor_clustered<
+pub unsafe fn subgroup_bitwise_xor_clustered<
     const CLUSTER_SIZE: u32,
     I: VectorOrScalar<Scalar = impl Integer>,
 >(
@@ -1801,10 +1775,7 @@ pub unsafe fn subgroup_non_uniform_bitwise_xor_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalAnd")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_logical_and<
-    const GROUP_OP: u32,
-    I: VectorOrScalar<Scalar = bool>,
->(
+pub unsafe fn subgroup_logical_and<const GROUP_OP: u32, I: VectorOrScalar<Scalar = bool>>(
     value: I,
 ) -> I {
     let mut result = I::default();
@@ -1842,7 +1813,7 @@ pub unsafe fn subgroup_non_uniform_logical_and<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalAnd")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_logical_and_clustered<
+pub unsafe fn subgroup_logical_and_clustered<
     const CLUSTER_SIZE: u32,
     I: VectorOrScalar<Scalar = bool>,
 >(
@@ -1883,10 +1854,7 @@ pub unsafe fn subgroup_non_uniform_logical_and_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalOr")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_logical_or<
-    const GROUP_OP: u32,
-    I: VectorOrScalar<Scalar = bool>,
->(
+pub unsafe fn subgroup_logical_or<const GROUP_OP: u32, I: VectorOrScalar<Scalar = bool>>(
     value: I,
 ) -> I {
     let mut result = I::default();
@@ -1924,7 +1892,7 @@ pub unsafe fn subgroup_non_uniform_logical_or<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalOr")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_logical_or_clustered<
+pub unsafe fn subgroup_logical_or_clustered<
     const CLUSTER_SIZE: u32,
     I: VectorOrScalar<Scalar = bool>,
 >(
@@ -1965,10 +1933,7 @@ pub unsafe fn subgroup_non_uniform_logical_or_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalXor")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_logical_xor<
-    const GROUP_OP: u32,
-    I: VectorOrScalar<Scalar = bool>,
->(
+pub unsafe fn subgroup_logical_xor<const GROUP_OP: u32, I: VectorOrScalar<Scalar = bool>>(
     value: I,
 ) -> I {
     let mut result = I::default();
@@ -2006,7 +1971,7 @@ pub unsafe fn subgroup_non_uniform_logical_xor<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformLogicalXor")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_logical_xor_clustered<
+pub unsafe fn subgroup_logical_xor_clustered<
     const CLUSTER_SIZE: u32,
     I: VectorOrScalar<Scalar = bool>,
 >(
@@ -2051,7 +2016,7 @@ pub unsafe fn subgroup_non_uniform_logical_xor_clustered<
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformQuadBroadcast")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_quad_broadcast<T: VectorOrScalar>(value: T, id: u32) -> T {
+pub unsafe fn subgroup_quad_broadcast<T: VectorOrScalar>(value: T, id: u32) -> T {
     let mut result = T::default();
 
     unsafe {
@@ -2130,9 +2095,7 @@ pub enum QuadDirection {
 #[spirv_std_macros::gpu_only]
 #[doc(alias = "OpGroupNonUniformQuadSwap")]
 #[inline]
-pub unsafe fn subgroup_non_uniform_quad_swap<const DIRECTION: u32, T: VectorOrScalar>(
-    value: T,
-) -> T {
+pub unsafe fn subgroup_quad_swap<const DIRECTION: u32, T: VectorOrScalar>(value: T) -> T {
     let mut result = T::default();
 
     unsafe {
diff --git a/tests/ui/arch/subgroup/subgroup_ballot.rs b/tests/ui/arch/subgroup/subgroup_ballot.rs
new file mode 100644
index 0000000000..aaa0142037
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_ballot.rs
@@ -0,0 +1,17 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_ballot::subgroup_ballot
+
+use spirv_std::spirv;
+
+unsafe fn subgroup_ballot(predicate: bool) -> bool {
+    let ballot = spirv_std::arch::subgroup_ballot(predicate);
+    spirv_std::arch::subgroup_inverse_ballot(ballot)
+}
+
+#[spirv(compute(threads(1, 1, 1)))]
+pub fn main() {
+    unsafe {
+        subgroup_ballot(true);
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.stderr b/tests/ui/arch/subgroup/subgroup_ballot.stderr
similarity index 100%
rename from tests/ui/arch/subgroup/subgroup_non_uniform_ballot.stderr
rename to tests/ui/arch/subgroup/subgroup_ballot.stderr
diff --git a/tests/ui/arch/subgroup/subgroup_ballot_bit_count.rs b/tests/ui/arch/subgroup/subgroup_ballot_bit_count.rs
new file mode 100644
index 0000000000..166fdabcbc
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_ballot_bit_count.rs
@@ -0,0 +1,17 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_ballot_bit_count::subgroup_ballot_bit_count
+
+use spirv_std::arch::{GroupOperation, SubgroupMask};
+use spirv_std::spirv;
+
+unsafe fn subgroup_ballot_bit_count(ballot: SubgroupMask) -> u32 {
+    spirv_std::arch::subgroup_ballot_bit_count::<{ GroupOperation::Reduce as u32 }>(ballot)
+}
+
+#[spirv(compute(threads(1, 1, 1)))]
+pub fn main() {
+    unsafe {
+        subgroup_ballot_bit_count(spirv_std::arch::subgroup_ballot(true));
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.stderr b/tests/ui/arch/subgroup/subgroup_ballot_bit_count.stderr
similarity index 91%
rename from tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.stderr
rename to tests/ui/arch/subgroup/subgroup_ballot_bit_count.stderr
index b3614d4eed..319877327e 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.stderr
+++ b/tests/ui/arch/subgroup/subgroup_ballot_bit_count.stderr
@@ -1,7 +1,7 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %5
 %6 = OpLabel
-OpLine %7 496 8
+OpLine %7 491 8
 %8 = OpGroupNonUniformBallotBitCount  %2  %9 Reduce %4
 OpNoLine
 OpReturnValue %8
diff --git a/tests/ui/arch/subgroup/subgroup_broadcast_first.rs b/tests/ui/arch/subgroup/subgroup_broadcast_first.rs
new file mode 100644
index 0000000000..c2544101c2
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_broadcast_first.rs
@@ -0,0 +1,17 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_broadcast_first::subgroup_broadcast_first
+
+use glam::Vec3;
+use spirv_std::spirv;
+
+unsafe fn subgroup_broadcast_first(vec: Vec3) -> Vec3 {
+    spirv_std::arch::subgroup_broadcast_first::<Vec3>(vec)
+}
+
+#[spirv(compute(threads(1, 1, 1)))]
+pub fn main() {
+    unsafe {
+        subgroup_broadcast_first(Vec3::new(1., 2., 3.));
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.stderr b/tests/ui/arch/subgroup/subgroup_broadcast_first.stderr
similarity index 100%
rename from tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.stderr
rename to tests/ui/arch/subgroup/subgroup_broadcast_first.stderr
diff --git a/tests/ui/arch/subgroup/subgroup_elect.rs b/tests/ui/arch/subgroup/subgroup_elect.rs
new file mode 100644
index 0000000000..4aee376c6b
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_elect.rs
@@ -0,0 +1,16 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_elect::subgroup_elect
+
+use spirv_std::spirv;
+
+unsafe fn subgroup_elect() -> bool {
+    spirv_std::arch::subgroup_elect()
+}
+
+#[spirv(compute(threads(1, 1, 1)))]
+pub fn main() {
+    unsafe {
+        subgroup_elect();
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_elect.stderr b/tests/ui/arch/subgroup/subgroup_elect.stderr
similarity index 100%
rename from tests/ui/arch/subgroup/subgroup_non_uniform_elect.stderr
rename to tests/ui/arch/subgroup/subgroup_elect.stderr
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.rs b/tests/ui/arch/subgroup/subgroup_i_add_clustered.rs
similarity index 53%
rename from tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.rs
rename to tests/ui/arch/subgroup/subgroup_i_add_clustered.rs
index 6d7e9901b9..f4cc511461 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.rs
+++ b/tests/ui/arch/subgroup/subgroup_i_add_clustered.rs
@@ -1,18 +1,18 @@
 // build-pass
 // compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+GroupNonUniformClustered,+ext:SPV_KHR_vulkan_memory_model
-// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_i_add_clustered::subgroup_non_uniform_i_add_clustered
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_i_add_clustered::subgroup_i_add_clustered
 
 use glam::UVec3;
 use spirv_std::arch::{GroupOperation, SubgroupMask};
 use spirv_std::spirv;
 
-unsafe fn subgroup_non_uniform_i_add_clustered(value: u32) -> u32 {
-    spirv_std::arch::subgroup_non_uniform_i_add_clustered::<8, _>(value)
+unsafe fn subgroup_i_add_clustered(value: u32) -> u32 {
+    spirv_std::arch::subgroup_i_add_clustered::<8, _>(value)
 }
 
 #[spirv(compute(threads(32, 1, 1)))]
 pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) {
     unsafe {
-        subgroup_non_uniform_i_add_clustered(local_invocation_id.x);
+        subgroup_i_add_clustered(local_invocation_id.x);
     }
 }
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.stderr b/tests/ui/arch/subgroup/subgroup_i_add_clustered.stderr
similarity index 91%
rename from tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.stderr
rename to tests/ui/arch/subgroup/subgroup_i_add_clustered.stderr
index 6a1216d607..f52c1c0632 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_clustered.stderr
+++ b/tests/ui/arch/subgroup/subgroup_i_add_clustered.stderr
@@ -1,7 +1,7 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %2
 %5 = OpLabel
-OpLine %6 790 8
+OpLine %6 782 8
 %7 = OpGroupNonUniformIAdd  %2  %8 ClusteredReduce %4 %9
 OpNoLine
 OpReturnValue %7
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.rs b/tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.rs
similarity index 50%
rename from tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.rs
rename to tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.rs
index ef0d072411..a10d0b3682 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.rs
+++ b/tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.rs
@@ -1,18 +1,18 @@
 // build-pass
 // compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+ext:SPV_KHR_vulkan_memory_model
-// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_i_add_reduce::subgroup_non_uniform_i_add_reduce
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_i_add_exclusive_scan::subgroup_i_add_exclusive_scan
 
 use glam::UVec3;
 use spirv_std::arch::{GroupOperation, SubgroupMask};
 use spirv_std::spirv;
 
-unsafe fn subgroup_non_uniform_i_add_reduce(value: u32) -> u32 {
-    spirv_std::arch::subgroup_non_uniform_i_add::<{ GroupOperation::Reduce as u32 }, _>(value)
+unsafe fn subgroup_i_add_exclusive_scan(value: u32) -> u32 {
+    spirv_std::arch::subgroup_i_add::<{ GroupOperation::ExclusiveScan as u32 }, _>(value)
 }
 
 #[spirv(compute(threads(32, 1, 1)))]
 pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) {
     unsafe {
-        subgroup_non_uniform_i_add_reduce(local_invocation_id.x);
+        subgroup_i_add_exclusive_scan(local_invocation_id.x);
     }
 }
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.stderr b/tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.stderr
similarity index 91%
rename from tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.stderr
rename to tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.stderr
index 7efe91dbb5..bf7dd9f2b9 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.stderr
+++ b/tests/ui/arch/subgroup/subgroup_i_add_exclusive_scan.stderr
@@ -1,7 +1,7 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %2
 %5 = OpLabel
-OpLine %6 749 8
+OpLine %6 741 8
 %7 = OpGroupNonUniformIAdd  %2  %8 ExclusiveScan %4
 OpNoLine
 OpReturnValue %7
diff --git a/tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.rs b/tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.rs
new file mode 100644
index 0000000000..ba823eac10
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.rs
@@ -0,0 +1,18 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_i_add_inclusive_scan::subgroup_i_add_inclusive_scan
+
+use glam::UVec3;
+use spirv_std::arch::{GroupOperation, SubgroupMask};
+use spirv_std::spirv;
+
+unsafe fn subgroup_i_add_inclusive_scan(value: u32) -> u32 {
+    spirv_std::arch::subgroup_i_add::<{ GroupOperation::InclusiveScan as u32 }, _>(value)
+}
+
+#[spirv(compute(threads(32, 1, 1)))]
+pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) {
+    unsafe {
+        subgroup_i_add_inclusive_scan(local_invocation_id.x);
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.stderr b/tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.stderr
similarity index 91%
rename from tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.stderr
rename to tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.stderr
index 8c771f386e..cb69054815 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.stderr
+++ b/tests/ui/arch/subgroup/subgroup_i_add_inclusive_scan.stderr
@@ -1,7 +1,7 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %2
 %5 = OpLabel
-OpLine %6 749 8
+OpLine %6 741 8
 %7 = OpGroupNonUniformIAdd  %2  %8 InclusiveScan %4
 OpNoLine
 OpReturnValue %7
diff --git a/tests/ui/arch/subgroup/subgroup_i_add_reduce.rs b/tests/ui/arch/subgroup/subgroup_i_add_reduce.rs
new file mode 100644
index 0000000000..4a8a42dbf6
--- /dev/null
+++ b/tests/ui/arch/subgroup/subgroup_i_add_reduce.rs
@@ -0,0 +1,18 @@
+// build-pass
+// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+ext:SPV_KHR_vulkan_memory_model
+// compile-flags: -C llvm-args=--disassemble-fn=subgroup_i_add_reduce::subgroup_i_add_reduce
+
+use glam::UVec3;
+use spirv_std::arch::{GroupOperation, SubgroupMask};
+use spirv_std::spirv;
+
+unsafe fn subgroup_i_add_reduce(value: u32) -> u32 {
+    spirv_std::arch::subgroup_i_add::<{ GroupOperation::Reduce as u32 }, _>(value)
+}
+
+#[spirv(compute(threads(32, 1, 1)))]
+pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) {
+    unsafe {
+        subgroup_i_add_reduce(local_invocation_id.x);
+    }
+}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr b/tests/ui/arch/subgroup/subgroup_i_add_reduce.stderr
similarity index 90%
rename from tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr
rename to tests/ui/arch/subgroup/subgroup_i_add_reduce.stderr
index 40942f30fc..6501d5ce1d 100644
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_reduce.stderr
+++ b/tests/ui/arch/subgroup/subgroup_i_add_reduce.stderr
@@ -1,7 +1,7 @@
 %1 = OpFunction  %2  None %3
 %4 = OpFunctionParameter  %2
 %5 = OpLabel
-OpLine %6 749 8
+OpLine %6 741 8
 %7 = OpGroupNonUniformIAdd  %2  %8 Reduce %4
 OpNoLine
 OpReturnValue %7
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.rs
deleted file mode 100644
index 9a59677134..0000000000
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-// build-pass
-// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model
-// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_ballot::subgroup_non_uniform_ballot
-
-use spirv_std::spirv;
-
-unsafe fn subgroup_non_uniform_ballot(predicate: bool) -> bool {
-    let ballot = spirv_std::arch::subgroup_non_uniform_ballot(predicate);
-    spirv_std::arch::subgroup_non_uniform_inverse_ballot(ballot)
-}
-
-#[spirv(compute(threads(1, 1, 1)))]
-pub fn main() {
-    unsafe {
-        subgroup_non_uniform_ballot(true);
-    }
-}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.rs
deleted file mode 100644
index 205869dbbd..0000000000
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_ballot_bit_count.rs
+++ /dev/null
@@ -1,19 +0,0 @@
-// build-pass
-// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model
-// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_ballot_bit_count::subgroup_non_uniform_ballot_bit_count
-
-use spirv_std::arch::{GroupOperation, SubgroupMask};
-use spirv_std::spirv;
-
-unsafe fn subgroup_non_uniform_ballot_bit_count(ballot: SubgroupMask) -> u32 {
-    spirv_std::arch::subgroup_non_uniform_ballot_bit_count::<{ GroupOperation::Reduce as u32 }>(
-        ballot,
-    )
-}
-
-#[spirv(compute(threads(1, 1, 1)))]
-pub fn main() {
-    unsafe {
-        subgroup_non_uniform_ballot_bit_count(spirv_std::arch::subgroup_non_uniform_ballot(true));
-    }
-}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.rs
deleted file mode 100644
index 720d215f77..0000000000
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_broadcast_first.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-// build-pass
-// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformBallot,+ext:SPV_KHR_vulkan_memory_model
-// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_broadcast_first::subgroup_non_uniform_broadcast_first
-
-use glam::Vec3;
-use spirv_std::spirv;
-
-unsafe fn subgroup_non_uniform_broadcast_first(vec: Vec3) -> Vec3 {
-    spirv_std::arch::subgroup_non_uniform_broadcast_first::<Vec3>(vec)
-}
-
-#[spirv(compute(threads(1, 1, 1)))]
-pub fn main() {
-    unsafe {
-        subgroup_non_uniform_broadcast_first(Vec3::new(1., 2., 3.));
-    }
-}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_elect.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_elect.rs
deleted file mode 100644
index 35e75a6e32..0000000000
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_elect.rs
+++ /dev/null
@@ -1,16 +0,0 @@
-// build-pass
-// compile-flags: -C target-feature=+GroupNonUniform,+ext:SPV_KHR_vulkan_memory_model
-// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_elect::subgroup_non_uniform_elect
-
-use spirv_std::spirv;
-
-unsafe fn subgroup_non_uniform_elect() -> bool {
-    spirv_std::arch::subgroup_non_uniform_elect()
-}
-
-#[spirv(compute(threads(1, 1, 1)))]
-pub fn main() {
-    unsafe {
-        subgroup_non_uniform_elect();
-    }
-}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.rs
deleted file mode 100644
index bbc93d7894..0000000000
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_exclusive_scan.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// build-pass
-// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+ext:SPV_KHR_vulkan_memory_model
-// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_i_add_exclusive_scan::subgroup_non_uniform_i_add_exclusive_scan
-
-use glam::UVec3;
-use spirv_std::arch::{GroupOperation, SubgroupMask};
-use spirv_std::spirv;
-
-unsafe fn subgroup_non_uniform_i_add_exclusive_scan(value: u32) -> u32 {
-    spirv_std::arch::subgroup_non_uniform_i_add::<{ GroupOperation::ExclusiveScan as u32 }, _>(
-        value,
-    )
-}
-
-#[spirv(compute(threads(32, 1, 1)))]
-pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) {
-    unsafe {
-        subgroup_non_uniform_i_add_exclusive_scan(local_invocation_id.x);
-    }
-}
diff --git a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.rs b/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.rs
deleted file mode 100644
index 1c2ed2d2c0..0000000000
--- a/tests/ui/arch/subgroup/subgroup_non_uniform_i_add_inclusive_scan.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-// build-pass
-// compile-flags: -C target-feature=+GroupNonUniform,+GroupNonUniformArithmetic,+ext:SPV_KHR_vulkan_memory_model
-// compile-flags: -C llvm-args=--disassemble-fn=subgroup_non_uniform_i_add_inclusive_scan::subgroup_non_uniform_i_add_inclusive_scan
-
-use glam::UVec3;
-use spirv_std::arch::{GroupOperation, SubgroupMask};
-use spirv_std::spirv;
-
-unsafe fn subgroup_non_uniform_i_add_inclusive_scan(value: u32) -> u32 {
-    spirv_std::arch::subgroup_non_uniform_i_add::<{ GroupOperation::InclusiveScan as u32 }, _>(
-        value,
-    )
-}
-
-#[spirv(compute(threads(32, 1, 1)))]
-pub fn main(#[spirv(local_invocation_id)] local_invocation_id: UVec3) {
-    unsafe {
-        subgroup_non_uniform_i_add_inclusive_scan(local_invocation_id.x);
-    }
-}

From 055602919670ebbafa6d44c7c4f64751bf42c83b Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Fri, 20 Sep 2024 13:29:55 +0200
Subject: [PATCH 13/15] changelog: add subgroup intrinsics PR

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dba2a620c4..2b63ba5323 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Signed for loops like `for _ in 0..4i32 {}` no longer compile. We recommend switching to unsigned for loops and casting back to signed integers in the meanwhile.
 
 ### Changed 🛠
+- [PR#14](https://github.com/Rust-GPU/rust-gpu/pull/14) add subgroup intrinsics matching glsl's [`GL_KHR_shader_subgroup`](https://github.com/KhronosGroup/GLSL/blob/main/extensions/khr/GL_KHR_shader_subgroup.txt)
 - [PR#13](https://github.com/Rust-GPU/rust-gpu/pull/13) allow cargo features to be passed to the shader crate
 - [PR#12](https://github.com/rust-gpu/rust-gpu/pull/12) updated toolchain to `nightly-2024-04-24`
 - [PR#9](https://github.com/Rust-GPU/rust-gpu/pull/9) relaxed `glam` version requirements (`>=0.22, <=0.29`)

From 43520667a3f7b92ec4c388390d797a1647a6957e Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Mon, 23 Sep 2024 11:23:31 +0200
Subject: [PATCH 14/15] subgroup: make VectorOrScalar trait match discussions
 in https://github.com/EmbarkStudios/rust-gpu/pull/1030

---
 crates/spirv-std/src/arch/subgroup.rs |  2 +-
 crates/spirv-std/src/float.rs         |  4 +-
 crates/spirv-std/src/scalar.rs        | 69 +++++----------------
 crates/spirv-std/src/vector.rs        | 89 +++++++++++----------------
 tests/ui/arch/all.rs                  |  7 ++-
 tests/ui/arch/any.rs                  |  7 ++-
 6 files changed, 66 insertions(+), 112 deletions(-)

diff --git a/crates/spirv-std/src/arch/subgroup.rs b/crates/spirv-std/src/arch/subgroup.rs
index a8eb38fc2a..b587f0e0ad 100644
--- a/crates/spirv-std/src/arch/subgroup.rs
+++ b/crates/spirv-std/src/arch/subgroup.rs
@@ -4,7 +4,7 @@ use crate::float::Float;
 use crate::integer::{Integer, SignedInteger, UnsignedInteger};
 #[cfg(target_arch = "spirv")]
 use crate::memory::{Scope, Semantics};
-use crate::scalar::VectorOrScalar;
+use crate::vector::VectorOrScalar;
 #[cfg(target_arch = "spirv")]
 use core::arch::asm;
 
diff --git a/crates/spirv-std/src/float.rs b/crates/spirv-std/src/float.rs
index be9133ee0a..ce441e99b5 100644
--- a/crates/spirv-std/src/float.rs
+++ b/crates/spirv-std/src/float.rs
@@ -1,9 +1,10 @@
 //! Traits and helper functions related to floats.
 
-use crate::scalar::VectorOrScalar;
 use crate::vector::Vector;
+use crate::vector::{create_dim, VectorOrScalar};
 #[cfg(target_arch = "spirv")]
 use core::arch::asm;
+use core::num::NonZeroUsize;
 
 /// Abstract trait representing a SPIR-V floating point type.
 ///
@@ -74,6 +75,7 @@ struct F32x2 {
 }
 unsafe impl VectorOrScalar for F32x2 {
     type Scalar = f32;
+    const DIM: NonZeroUsize = create_dim(2);
 }
 unsafe impl Vector<f32, 2> for F32x2 {}
 
diff --git a/crates/spirv-std/src/scalar.rs b/crates/spirv-std/src/scalar.rs
index 9747cc995e..e9ab3ae758 100644
--- a/crates/spirv-std/src/scalar.rs
+++ b/crates/spirv-std/src/scalar.rs
@@ -1,48 +1,7 @@
 //! Traits related to scalars.
 
-/// Abstract trait representing either a vector or a scalar type.
-///
-/// # Safety
-/// Implementing this trait on non-scalar or non-vector types may break assumptions about other
-/// unsafe code, and should not be done.
-pub unsafe trait VectorOrScalar: Default {
-    /// Either the scalar component type of the vector or the scalar itself.
-    type Scalar: Scalar;
-}
-
-unsafe impl VectorOrScalar for bool {
-    type Scalar = bool;
-}
-unsafe impl VectorOrScalar for f32 {
-    type Scalar = f32;
-}
-unsafe impl VectorOrScalar for f64 {
-    type Scalar = f64;
-}
-unsafe impl VectorOrScalar for u8 {
-    type Scalar = u8;
-}
-unsafe impl VectorOrScalar for u16 {
-    type Scalar = u16;
-}
-unsafe impl VectorOrScalar for u32 {
-    type Scalar = u32;
-}
-unsafe impl VectorOrScalar for u64 {
-    type Scalar = u64;
-}
-unsafe impl VectorOrScalar for i8 {
-    type Scalar = i8;
-}
-unsafe impl VectorOrScalar for i16 {
-    type Scalar = i16;
-}
-unsafe impl VectorOrScalar for i32 {
-    type Scalar = i32;
-}
-unsafe impl VectorOrScalar for i64 {
-    type Scalar = i64;
-}
+use crate::vector::{create_dim, VectorOrScalar};
+use core::num::NonZeroUsize;
 
 /// Abstract trait representing a SPIR-V scalar type.
 ///
@@ -54,14 +13,16 @@ pub unsafe trait Scalar:
 {
 }
 
-unsafe impl Scalar for bool {}
-unsafe impl Scalar for f32 {}
-unsafe impl Scalar for f64 {}
-unsafe impl Scalar for u8 {}
-unsafe impl Scalar for u16 {}
-unsafe impl Scalar for u32 {}
-unsafe impl Scalar for u64 {}
-unsafe impl Scalar for i8 {}
-unsafe impl Scalar for i16 {}
-unsafe impl Scalar for i32 {}
-unsafe impl Scalar for i64 {}
+macro_rules! impl_scalar {
+    ($($ty:ty),+) => {
+        $(
+            unsafe impl VectorOrScalar for $ty {
+                type Scalar = Self;
+                const DIM: NonZeroUsize = create_dim(1);
+            }
+            unsafe impl Scalar for $ty {}
+        )+
+    };
+}
+
+impl_scalar!(bool, f32, f64, u8, u16, u32, u64, i8, i16, i32, i64);
diff --git a/crates/spirv-std/src/vector.rs b/crates/spirv-std/src/vector.rs
index 2ad544cb9a..668f40bd32 100644
--- a/crates/spirv-std/src/vector.rs
+++ b/crates/spirv-std/src/vector.rs
@@ -1,49 +1,28 @@
 //! Traits related to vectors.
 
-use crate::scalar::{Scalar, VectorOrScalar};
+use crate::scalar::Scalar;
+use core::num::NonZeroUsize;
 use glam::{Vec3Swizzles, Vec4Swizzles};
 
-unsafe impl VectorOrScalar for glam::Vec2 {
-    type Scalar = f32;
-}
-unsafe impl VectorOrScalar for glam::Vec3 {
-    type Scalar = f32;
-}
-unsafe impl VectorOrScalar for glam::Vec3A {
-    type Scalar = f32;
-}
-unsafe impl VectorOrScalar for glam::Vec4 {
-    type Scalar = f32;
-}
-
-unsafe impl VectorOrScalar for glam::DVec2 {
-    type Scalar = f64;
-}
-unsafe impl VectorOrScalar for glam::DVec3 {
-    type Scalar = f64;
-}
-unsafe impl VectorOrScalar for glam::DVec4 {
-    type Scalar = f64;
-}
+/// Abstract trait representing either a vector or a scalar type.
+///
+/// # Safety
+/// Implementing this trait on non-scalar or non-vector types may break assumptions about other
+/// unsafe code, and should not be done.
+pub unsafe trait VectorOrScalar: Default {
+    /// Either the scalar component type of the vector or the scalar itself.
+    type Scalar: Scalar;
 
-unsafe impl VectorOrScalar for glam::UVec2 {
-    type Scalar = u32;
-}
-unsafe impl VectorOrScalar for glam::UVec3 {
-    type Scalar = u32;
-}
-unsafe impl VectorOrScalar for glam::UVec4 {
-    type Scalar = u32;
+    /// The dimension of the vector, or 1 if it is a scalar
+    const DIM: NonZeroUsize;
 }
 
-unsafe impl VectorOrScalar for glam::IVec2 {
-    type Scalar = i32;
-}
-unsafe impl VectorOrScalar for glam::IVec3 {
-    type Scalar = i32;
-}
-unsafe impl VectorOrScalar for glam::IVec4 {
-    type Scalar = i32;
+/// replace with `NonZeroUsize::new(n).unwrap()` once `unwrap()` is const stabilized
+pub(crate) const fn create_dim(n: usize) -> NonZeroUsize {
+    match NonZeroUsize::new(n) {
+        None => panic!("dim must not be 0"),
+        Some(n) => n,
+    }
 }
 
 /// Abstract trait representing a SPIR-V vector type.
@@ -53,22 +32,24 @@ unsafe impl VectorOrScalar for glam::IVec4 {
 /// should not be done.
 pub unsafe trait Vector<T: Scalar, const N: usize>: VectorOrScalar<Scalar = T> {}
 
-unsafe impl Vector<f32, 2> for glam::Vec2 {}
-unsafe impl Vector<f32, 3> for glam::Vec3 {}
-unsafe impl Vector<f32, 3> for glam::Vec3A {}
-unsafe impl Vector<f32, 4> for glam::Vec4 {}
-
-unsafe impl Vector<f64, 2> for glam::DVec2 {}
-unsafe impl Vector<f64, 3> for glam::DVec3 {}
-unsafe impl Vector<f64, 4> for glam::DVec4 {}
-
-unsafe impl Vector<u32, 2> for glam::UVec2 {}
-unsafe impl Vector<u32, 3> for glam::UVec3 {}
-unsafe impl Vector<u32, 4> for glam::UVec4 {}
+macro_rules! impl_vector {
+    ($($scalar:ty: $($vec:ty => $dim:literal),+;)+) => {
+        $($(
+            unsafe impl VectorOrScalar for $vec {
+                type Scalar = $scalar;
+                const DIM: NonZeroUsize = create_dim($dim);
+            }
+            unsafe impl Vector<$scalar, $dim> for $vec {}
+        )+)+
+    };
+}
 
-unsafe impl Vector<i32, 2> for glam::IVec2 {}
-unsafe impl Vector<i32, 3> for glam::IVec3 {}
-unsafe impl Vector<i32, 4> for glam::IVec4 {}
+impl_vector! {
+    f32: glam::Vec2 => 2, glam::Vec3 => 3, glam::Vec3A => 3, glam::Vec4 => 4;
+    f64: glam::DVec2 => 2, glam::DVec3 => 3, glam::DVec4 => 4;
+    u32: glam::UVec2 => 2, glam::UVec3 => 3, glam::UVec4 => 4;
+    i32: glam::IVec2 => 2, glam::IVec3 => 3, glam::IVec4 => 4;
+}
 
 /// Trait that implements slicing of a vector into a scalar or vector of lower dimensions, by
 /// ignoring the higher dimensions
diff --git a/tests/ui/arch/all.rs b/tests/ui/arch/all.rs
index fbedae03c4..472a2d82a0 100644
--- a/tests/ui/arch/all.rs
+++ b/tests/ui/arch/all.rs
@@ -2,8 +2,9 @@
 
 #![feature(repr_simd)]
 
+use core::num::NonZeroUsize;
 use spirv_std::spirv;
-use spirv_std::{scalar::Scalar, scalar::VectorOrScalar, vector::Vector};
+use spirv_std::{scalar::Scalar, vector::Vector, vector::VectorOrScalar};
 
 /// HACK(shesp). Rust doesn't allow us to declare regular (tuple-)structs containing `bool` members
 /// as `#[repl(simd)]`. But we need this for `spirv_std::arch::any()` and `spirv_std::arch::all()`
@@ -14,6 +15,10 @@ use spirv_std::{scalar::Scalar, scalar::VectorOrScalar, vector::Vector};
 struct Vec2<T>(T, T);
 unsafe impl<T: Scalar> VectorOrScalar for Vec2<T> {
     type Scalar = T;
+    const DIM: NonZeroUsize = match NonZeroUsize::new(2) {
+        None => panic!(),
+        Some(n) => n,
+    };
 }
 unsafe impl<T: Scalar> Vector<T, 2> for Vec2<T> {}
 
diff --git a/tests/ui/arch/any.rs b/tests/ui/arch/any.rs
index 5f4caed88f..c61928fed9 100644
--- a/tests/ui/arch/any.rs
+++ b/tests/ui/arch/any.rs
@@ -2,8 +2,9 @@
 
 #![feature(repr_simd)]
 
+use core::num::NonZeroUsize;
 use spirv_std::spirv;
-use spirv_std::{scalar::Scalar, scalar::VectorOrScalar, vector::Vector};
+use spirv_std::{scalar::Scalar, vector::Vector, vector::VectorOrScalar};
 
 /// HACK(shesp). Rust doesn't allow us to declare regular (tuple-)structs containing `bool` members
 /// as `#[repl(simd)]`. But we need this for `spirv_std::arch::any()` and `spirv_std::arch::all()`
@@ -14,6 +15,10 @@ use spirv_std::{scalar::Scalar, scalar::VectorOrScalar, vector::Vector};
 struct Vec2<T>(T, T);
 unsafe impl<T: Scalar> VectorOrScalar for Vec2<T> {
     type Scalar = T;
+    const DIM: NonZeroUsize = match NonZeroUsize::new(2) {
+        None => panic!(),
+        Some(n) => n,
+    };
 }
 unsafe impl<T: Scalar> Vector<T, 2> for Vec2<T> {}
 

From 528fd47b092a47810835a671422906c7c46c9164 Mon Sep 17 00:00:00 2001
From: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Date: Mon, 23 Sep 2024 11:25:55 +0200
Subject: [PATCH 15/15] cleanup: remove internal type F32x2 for glam::Vec2

---
 crates/spirv-std/src/float.rs | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/crates/spirv-std/src/float.rs b/crates/spirv-std/src/float.rs
index ce441e99b5..aaf85c5051 100644
--- a/crates/spirv-std/src/float.rs
+++ b/crates/spirv-std/src/float.rs
@@ -1,10 +1,8 @@
 //! Traits and helper functions related to floats.
 
 use crate::vector::Vector;
-use crate::vector::{create_dim, VectorOrScalar};
 #[cfg(target_arch = "spirv")]
 use core::arch::asm;
-use core::num::NonZeroUsize;
 
 /// Abstract trait representing a SPIR-V floating point type.
 ///
@@ -63,34 +61,18 @@ pub fn f16x2_to_vec2<V: Vector<f32, 2>>(int: u32) -> V {
     result
 }
 
-// We don't have access to a concrete vector type (cfg(feature = "glam") might not be enabled), so
-// synth up one manually.
-#[cfg_attr(target_arch = "spirv", repr(simd))]
-// sometimes dead because on cpu, the `gpu_only` macro nukes the method bodies
-#[allow(dead_code)]
-#[derive(Default)]
-struct F32x2 {
-    x: f32,
-    y: f32,
-}
-unsafe impl VectorOrScalar for F32x2 {
-    type Scalar = f32;
-    const DIM: NonZeroUsize = create_dim(2);
-}
-unsafe impl Vector<f32, 2> for F32x2 {}
-
 /// Converts an f32 (float) into an f16 (half). The result is a u32, not a u16, due to GPU support
 /// for u16 not being universal - the upper 16 bits will always be zero.
 #[spirv_std_macros::gpu_only]
 pub fn f32_to_f16(float: f32) -> u32 {
-    vec2_to_f16x2(F32x2 { x: float, y: 0.0 })
+    vec2_to_f16x2(glam::Vec2::new(float, 0.))
 }
 
 /// Converts an f16 (half) into an f32 (float). The parameter is a u32, due to GPU support for u16
 /// not being universal - the upper 16 bits are ignored.
 #[spirv_std_macros::gpu_only]
 pub fn f16_to_f32(packed: u32) -> f32 {
-    f16x2_to_vec2::<F32x2>(packed).x
+    f16x2_to_vec2::<glam::Vec2>(packed).x
 }
 
 /// Packs a vec4 into 4 8-bit signed integers. See