diff --git a/src/graph/backend/dnnl/kernels/layernorm.hpp b/src/graph/backend/dnnl/kernels/layernorm.hpp index cad4a0d19a7..38fa8142c79 100644 --- a/src/graph/backend/dnnl/kernels/layernorm.hpp +++ b/src/graph/backend/dnnl/kernels/layernorm.hpp @@ -89,8 +89,9 @@ struct layernorm_fwd_t : public kernel_base_t { BACKEND_DNNL_ADD_PASS(pipeline, lower_down); BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_typecast_to_predecessor); - BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops); BACKEND_DNNL_ADD_PASS(pipeline, remove_quant_data_with_no_effect); + BACKEND_DNNL_ADD_PASS(pipeline, replace_quant_data_with_binary_post_op); + BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops); BACKEND_DNNL_ADD_PASS(pipeline, convert_to_runtime_dst_scales); BACKEND_DNNL_ADD_PASS(pipeline, fuse_dst_scales); BACKEND_DNNL_ADD_PASS(pipeline, infer_shape); diff --git a/src/graph/backend/dnnl/patterns/layernorm_fusion.cpp b/src/graph/backend/dnnl/patterns/layernorm_fusion.cpp index 6bd86bdb137..598cc950d8c 100644 --- a/src/graph/backend/dnnl/patterns/layernorm_fusion.cpp +++ b/src/graph/backend/dnnl/patterns/layernorm_fusion.cpp @@ -79,7 +79,6 @@ DNNL_BACKEND_REGISTER_PATTERN_MATCHER_PASS(dnnl, layernorm_post_ops_fusion_cpu) auto q_graph = std::make_shared(); pm::pb_op_t *pquantize = q_graph->append_op(graph::op_kind::Quantize); - pquantize->append_decision_function(check_zps_values<0>); q_graph->create_input_port(0, pquantize, 0); q_graph->create_output_port(0, pquantize, 0); pgraph->append_optional( diff --git a/tests/benchdnn/inputs/graph/pattern/harness_int8_all b/tests/benchdnn/inputs/graph/pattern/harness_int8_all index 44687a3c79b..598e664ff85 100644 --- a/tests/benchdnn/inputs/graph/pattern/harness_int8_all +++ b/tests/benchdnn/inputs/graph/pattern/harness_int8_all @@ -114,6 +114,8 @@ --reset --in-shapes=0:1x128x150x150*acdb+1:1x128x150x150*acdb+2:1x128x150x150*acdb --op-attrs=3:axis:0 --case=pattern/int8/int8_concat_fusion_3.json #layernorm --reset --case=pattern/int8/int8_lnorm_gelu_quantize.json +# layernorm with zp != 0 +--reset --op-attrs=2:zps:1 --case=pattern/int8/int8_lnorm_gelu_quantize.json --reset --case=pattern/int8/int8_lnorm_multiply_quantize.json #softmax --reset --case=pattern/int8/int8_softmax_add.json diff --git a/tests/gtests/graph/unit/backend/dnnl/test_pass.cpp b/tests/gtests/graph/unit/backend/dnnl/test_pass.cpp index 30042600848..c13606fec2d 100644 --- a/tests/gtests/graph/unit/backend/dnnl/test_pass.cpp +++ b/tests/gtests/graph/unit/backend/dnnl/test_pass.cpp @@ -11926,7 +11926,7 @@ TEST(test_pass_pass_system, FuseLayernormTypecastQuantize_CPU) { ASSERT_EQ(agraph.get_partitions()[0]->get_outputs()[0].id, 5U); } -TEST(test_pass_pass_system, NotFuseLayernormTypecast) { +TEST(test_pass_pass_system, NotFuseLayernormTypecast_GPU) { /* | (bf16) layernorm