diff --git a/src/cpu/aarch64/jit_uni_reorder.cpp b/src/cpu/aarch64/jit_uni_reorder.cpp index 20379fa299b..9239ace9ae1 100644 --- a/src/cpu/aarch64/jit_uni_reorder.cpp +++ b/src/cpu/aarch64/jit_uni_reorder.cpp @@ -781,7 +781,7 @@ struct jit_uni_reorder_kernel_f32_t : public kernel_t, public jit_generator { // transposition on the fly const bool fast_return = prb_.src_scale_type != scale_type_t::MANY && prb_.dst_scale_type != scale_type_t::MANY - && prb_.beta == 0.f; + && prb_.beta == 0.f && !prb_.req_src_zp && !prb_.req_dst_zp; if (fast_return) { if (prb_.src_scale_type == scale_type_t::COMMON) for (int ur = 0; ur < reg_unroll; ur += load_step) diff --git a/tests/benchdnn/inputs/reorder/test_reorder_ci b/tests/benchdnn/inputs/reorder/test_reorder_ci index 97dd695aab2..c8b46222348 100644 --- a/tests/benchdnn/inputs/reorder/test_reorder_ci +++ b/tests/benchdnn/inputs/reorder/test_reorder_ci @@ -12,6 +12,15 @@ --oflag= 2x16x3x4 1x17x5x3 30x1 +--reset + +# 4d reorders +--sdt=s8,u8 +--ddt=f32 +--attr-zero-points=src:common:-1 +--stag=adbc +1x12x128x33 + --reset # compensation reorders without groups --sdt=f32,s8,bf16