From a2ec0a0c5805314220db925e1323e4675e3ca379 Mon Sep 17 00:00:00 2001 From: Sergey Kazakov Date: Tue, 24 Oct 2023 13:36:14 -0700 Subject: [PATCH] gpu: lnorm: align BWD fused kernel selection with changed tag matching semantics --- src/gpu/ocl/vectorized_lnorm.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/gpu/ocl/vectorized_lnorm.cpp b/src/gpu/ocl/vectorized_lnorm.cpp index 4f5401b3bb8..e8b1f484b56 100644 --- a/src/gpu/ocl/vectorized_lnorm.cpp +++ b/src/gpu/ocl/vectorized_lnorm.cpp @@ -43,7 +43,6 @@ bool is_fused_kernel_applicable(lnorm_conf_t &conf, auto gpu_arch = compute_engine->device_info()->gpu_arch(); memory_desc_wrapper src_mdw(pd->src_md()); - memory_desc_wrapper dst_mdw(pd->src_md()); memory_desc_wrapper stat_mdw(pd->stat_md()); auto eu_count = compute_engine->device_info()->eu_count(); auto max_eus_per_wg = device_info_t::max_eus_per_wg(gpu_arch); @@ -53,9 +52,11 @@ bool is_fused_kernel_applicable(lnorm_conf_t &conf, const size_t max_slm_size = device_info_t::max_slm_size(gpu_arch); // Plain layout only - if (!(src_mdw.matches_one_of_tag(ab, abc, abcd, abcde) - && stat_mdw.matches_one_of_tag(a, ab))) - return false; + const bool is_plain = src_mdw.matches_one_of_tag(ab, abc) + && stat_mdw.matches_one_of_tag(a, ab) + // kernel does not support M x 1 x N layout + && IMPLICATION(src_mdw.ndims() == 3, src_mdw.dims()[1] != 1); + if (!is_plain) return false; const int desired_sg_size = 16; // based on PVC performance data conf.sub_group_size = mayiuse_sg(desired_sg_size, engine)