From a4fcef9ed1ebd5190cb7c5d5f998ce33c8d120e3 Mon Sep 17 00:00:00 2001 From: Simon Ewing Date: Mon, 4 Nov 2024 15:46:15 -0800 Subject: [PATCH] xe: reduction: limit active channels to inner dim size --- src/gpu/intel/ocl/reduction/combined_reduction.cl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gpu/intel/ocl/reduction/combined_reduction.cl b/src/gpu/intel/ocl/reduction/combined_reduction.cl index 694d6b640f9..d40f0b782fa 100644 --- a/src/gpu/intel/ocl/reduction/combined_reduction.cl +++ b/src/gpu/intel/ocl/reduction/combined_reduction.cl @@ -210,7 +210,8 @@ combined_reduce( const int red_off_sg = (inner_idx_start + sglid) / INNER_DIM_SIZE; const int red_off_tg = red_off_sg + sgid * red_per_sg; - const int active_channels = min(SUBGROUP_SIZE, red_per_sg * INNER_DIM_SIZE); + const int active_channels = min( + SUBGROUP_SIZE, red_per_sg * (INNER_DIM_SIZE - inner_idx_start)); ASSUME(active_channels == SUBGROUP_SIZE || !WITH_BLOCK_READ); const int loop_stride = _SRC_OFF(0, other_reductions, 0);